From 517a2e5e2644440fde99438e00b0a699c072b414 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 21 Dec 2022 14:18:39 -0500
Subject: [PATCH 01/79] import Lepton library with namespace and header changed
 to LMP_Lepton

---
 lib/README                                    |    2 +
 lib/lepton/Install.py                         |    1 +
 lib/lepton/LICENSE                            |   20 +
 lib/lepton/Makefile.lammps.empty              |    7 +
 lib/lepton/Makefile.mpi                       |   34 +
 lib/lepton/Makefile.serial                    |   34 +
 lib/lepton/README.md                          |   43 +
 lib/lepton/include/LMP_Lepton.h               |   43 +
 .../include/lepton/CompiledExpression.h       |  114 ++
 lib/lepton/include/lepton/CustomFunction.h    |  109 ++
 lib/lepton/include/lepton/Exception.h         |   59 +
 lib/lepton/include/lepton/ExpressionProgram.h |  103 ++
 .../include/lepton/ExpressionTreeNode.h       |  105 ++
 lib/lepton/include/lepton/Operation.h         | 1193 +++++++++++++++++
 lib/lepton/include/lepton/ParsedExpression.h  |  131 ++
 lib/lepton/include/lepton/Parser.h            |   77 ++
 lib/lepton/include/lepton/windowsIncludes.h   |   41 +
 lib/lepton/src/CompiledExpression.cpp         |  418 ++++++
 lib/lepton/src/ExpressionProgram.cpp          |  110 ++
 lib/lepton/src/ExpressionTreeNode.cpp         |  107 ++
 lib/lepton/src/MSVC_erfc.h                    |   91 ++
 lib/lepton/src/Operation.cpp                  |  345 +++++
 lib/lepton/src/ParsedExpression.cpp           |  379 ++++++
 lib/lepton/src/Parser.cpp                     |  409 ++++++
 24 files changed, 3975 insertions(+)
 create mode 120000 lib/lepton/Install.py
 create mode 100644 lib/lepton/LICENSE
 create mode 100644 lib/lepton/Makefile.lammps.empty
 create mode 100644 lib/lepton/Makefile.mpi
 create mode 100644 lib/lepton/Makefile.serial
 create mode 100644 lib/lepton/README.md
 create mode 100644 lib/lepton/include/LMP_Lepton.h
 create mode 100644 lib/lepton/include/lepton/CompiledExpression.h
 create mode 100644 lib/lepton/include/lepton/CustomFunction.h
 create mode 100644 lib/lepton/include/lepton/Exception.h
 create mode 100644 lib/lepton/include/lepton/ExpressionProgram.h
 create mode 100644 lib/lepton/include/lepton/ExpressionTreeNode.h
 create mode 100644 lib/lepton/include/lepton/Operation.h
 create mode 100644 lib/lepton/include/lepton/ParsedExpression.h
 create mode 100644 lib/lepton/include/lepton/Parser.h
 create mode 100644 lib/lepton/include/lepton/windowsIncludes.h
 create mode 100644 lib/lepton/src/CompiledExpression.cpp
 create mode 100644 lib/lepton/src/ExpressionProgram.cpp
 create mode 100644 lib/lepton/src/ExpressionTreeNode.cpp
 create mode 100644 lib/lepton/src/MSVC_erfc.h
 create mode 100644 lib/lepton/src/Operation.cpp
 create mode 100644 lib/lepton/src/ParsedExpression.cpp
 create mode 100644 lib/lepton/src/Parser.cpp

diff --git a/lib/README b/lib/README
index ab71e6763c..255077bb1b 100644
--- a/lib/README
+++ b/lib/README
@@ -33,6 +33,8 @@ kim           hooks to the KIM library, used by KIM package
                 from Ryan Elliott and Ellad Tadmor (U Minn)
 kokkos        Kokkos package for GPU and many-core acceleration
                 from Kokkos development team (Sandia)
+lepton        Lepton library for fast evaluation of mathematical
+                expressions from a string. Imported from OpenMM.
 linalg        set of BLAS and LAPACK routines needed by ATC package
                 from Axel Kohlmeyer (Temple U)
 mdi           hooks to the MDI library, used by MDI package
diff --git a/lib/lepton/Install.py b/lib/lepton/Install.py
new file mode 120000
index 0000000000..ffe709d44c
--- /dev/null
+++ b/lib/lepton/Install.py
@@ -0,0 +1 @@
+../Install.py
\ No newline at end of file
diff --git a/lib/lepton/LICENSE b/lib/lepton/LICENSE
new file mode 100644
index 0000000000..6359209705
--- /dev/null
+++ b/lib/lepton/LICENSE
@@ -0,0 +1,20 @@
+Portions copyright (c) 2009-2019 Stanford University and the Authors.
+Authors: Peter Eastman and OpenMM contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/lib/lepton/Makefile.lammps.empty b/lib/lepton/Makefile.lammps.empty
new file mode 100644
index 0000000000..9e74c23b1d
--- /dev/null
+++ b/lib/lepton/Makefile.lammps.empty
@@ -0,0 +1,7 @@
+# Settings that the LAMMPS build will import when this package library is used
+# The default settings assume that HDF5 support is integrated into the standard
+# distribution and search paths and thus only needs to link the HDF5 library.
+
+lepton_SYSINC =
+lepton_SYSLIB =
+lepton_SYSPATH =
diff --git a/lib/lepton/Makefile.mpi b/lib/lepton/Makefile.mpi
new file mode 100644
index 0000000000..045ea61015
--- /dev/null
+++ b/lib/lepton/Makefile.mpi
@@ -0,0 +1,34 @@
+EXTRAMAKE=Makefile.lammps.empty
+
+CC=mpicxx
+
+# -DH5_NO_DEPRECATED_SYMBOLS is required here to ensure we are using
+# the v1.8 API when HDF5 is configured to default to using the v1.6 API.
+CXXFLAGS=-D_DEFAULT_SOURCE -O2 -Wall -fPIC -std=c++11
+INC=-I include
+AR=ar
+ARFLAGS=rc
+# need to build two libraries to not break compatibility and to support Install.py
+LIB=liblepton.a
+SRC=src/CompiledExpression.cpp src/ExpressionProgram.cpp src/ExpressionTreeNode.cpp src/Operation.cpp src/ParsedExpression.cpp src/Parser.cpp
+OBJ=$(SRC:src/%.cpp=build/%.o)
+
+all: $(LIB) Makefile.lammps
+
+build:
+	mkdir -p build
+
+build/%.o: src/%.cpp build
+	$(CXX) $(INC) $(CXXFLAGS) -c $< -o $@
+
+Makefile.lammps:
+	cp $(EXTRAMAKE) $@
+
+.PHONY: all lib clean
+
+$(LIB) : $(OBJ)
+	$(AR) $(ARFLAGS) $@ $^
+
+clean:
+	rm -f build/*.o $(LIB) *~
+
diff --git a/lib/lepton/Makefile.serial b/lib/lepton/Makefile.serial
new file mode 100644
index 0000000000..58151e49c2
--- /dev/null
+++ b/lib/lepton/Makefile.serial
@@ -0,0 +1,34 @@
+EXTRAMAKE=Makefile.lammps.empty
+
+CC=g++
+
+# -DH5_NO_DEPRECATED_SYMBOLS is required here to ensure we are using
+# the v1.8 API when HDF5 is configured to default to using the v1.6 API.
+CXXFLAGS=-D_DEFAULT_SOURCE -O2 -Wall -fPIC -std=c++11
+INC=-I include
+AR=ar
+ARFLAGS=rc
+# need to build two libraries to not break compatibility and to support Install.py
+LIB=liblepton.a
+SRC=src/CompiledExpression.cpp src/ExpressionProgram.cpp src/ExpressionTreeNode.cpp src/Operation.cpp src/ParsedExpression.cpp src/Parser.cpp
+OBJ=$(SRC:src/%.cpp=build/%.o)
+
+all: $(LIB) Makefile.lammps
+
+build:
+	mkdir -p build
+
+build/%.o: src/%.cpp build
+	$(CXX) $(INC) $(CXXFLAGS) -c $< -o $@
+
+Makefile.lammps:
+	cp $(EXTRAMAKE) $@
+
+.PHONY: all lib clean
+
+$(LIB) : $(OBJ)
+	$(AR) $(ARFLAGS) $@ $^
+
+clean:
+	rm -f build/*.o $(LIB) *~
+
diff --git a/lib/lepton/README.md b/lib/lepton/README.md
new file mode 100644
index 0000000000..d2e4240c92
--- /dev/null
+++ b/lib/lepton/README.md
@@ -0,0 +1,43 @@
+This directory contains the lepton library from the OpenMM software
+which allows to efficiently evaluate mathematical expressions from
+strings.  This library is used with the LEPTON package that support
+force styles within LAMMPS that make use of this library.
+
+You can type "make lib-lepton" from the src directory to see help on how
+to build this library via make commands, or you can do the same thing
+by typing "python Install.py" from within this directory, or you can
+do it manually by following the instructions below.
+
+---------------------
+
+Lepton (short for “lightweight expression parser”) is a C++ library for
+parsing, evaluating, differentiating, and analyzing mathematical
+expressions. It takes expressions in the form of text strings, then
+converts them to an internal representation suitable for evaluation or
+analysis. Here are some of its major features:
+
+- Support for a large number of standard mathematical functions and operations.
+- Support for user defined custom functions.
+- A variety of optimizations for automatically simplifying expressions.
+- Computing analytic derivatives.
+- Representing parsed expressions in two different forms (tree or program) suitable for
+  further analysis or processing.
+
+Lepton was originally created for use in the [OpenMM project](https://openmm.org)
+ch5md is developed by Pierre de Buyl and is released under the 3-clause BSD
+license that can be found in the file LICENSE.
+
+To use the h5md dump style in lammps, execute
+make -f Makefile.h5cc
+in this directory then
+make yes-h5md
+in the src directory of LAMMPS to rebuild LAMMPS.
+
+Note that you must have the h5cc compiler installed to use
+Makefile.h5cc.  It should be part
+
+If HDF5 is not in a standard system location, edit Makefile.lammps accordingly.
+
+In the case of 2015 and more recent debian and ubuntu systems where concurrent
+serial and mpi are possible, use the full platform depedent path, i.e.
+`HDF5_PATH=/usr/lib/x86_64-linux-gnu/hdf5/serial`
diff --git a/lib/lepton/include/LMP_Lepton.h b/lib/lepton/include/LMP_Lepton.h
new file mode 100644
index 0000000000..73b6b6fa38
--- /dev/null
+++ b/lib/lepton/include/LMP_Lepton.h
@@ -0,0 +1,43 @@
+#ifndef LMP_LEPTON_H_
+#define LMP_LEPTON_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/CompiledExpression.h"
+#include "lepton/CustomFunction.h"
+#include "lepton/ExpressionProgram.h"
+#include "lepton/ExpressionTreeNode.h"
+#include "lepton/Operation.h"
+#include "lepton/ParsedExpression.h"
+#include "lepton/Parser.h"
+
+#endif /*LMP_LEPTON_H_*/
diff --git a/lib/lepton/include/lepton/CompiledExpression.h b/lib/lepton/include/lepton/CompiledExpression.h
new file mode 100644
index 0000000000..bf076b0d8b
--- /dev/null
+++ b/lib/lepton/include/lepton/CompiledExpression.h
@@ -0,0 +1,114 @@
+#ifndef LEPTON_COMPILED_EXPRESSION_H_
+#define LEPTON_COMPILED_EXPRESSION_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013-2019 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "ExpressionTreeNode.h"
+#include "windowsIncludes.h"
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+#ifdef LEPTON_USE_JIT
+    #include "asmjit.h"
+#endif
+
+namespace LMP_Lepton {
+
+class Operation;
+class ParsedExpression;
+
+/**
+ * A CompiledExpression is a highly optimized representation of an expression for cases when you want to evaluate
+ * it many times as quickly as possible.  You should treat it as an opaque object; none of the internal representation
+ * is visible.
+ * 
+ * A CompiledExpression is created by calling createCompiledExpression() on a ParsedExpression.
+ * 
+ * WARNING: CompiledExpression is NOT thread safe.  You should never access a CompiledExpression from two threads at
+ * the same time.
+ */
+
+class LEPTON_EXPORT CompiledExpression {
+public:
+    CompiledExpression();
+    CompiledExpression(const CompiledExpression& expression);
+    ~CompiledExpression();
+    CompiledExpression& operator=(const CompiledExpression& expression);
+    /**
+     * Get the names of all variables used by this expression.
+     */
+    const std::set<std::string>& getVariables() const;
+    /**
+     * Get a reference to the memory location where the value of a particular variable is stored.  This can be used
+     * to set the value of the variable before calling evaluate().
+     */
+    double& getVariableReference(const std::string& name);
+    /**
+     * You can optionally specify the memory locations from which the values of variables should be read.
+     * This is useful, for example, when several expressions all use the same variable.  You can then set
+     * the value of that variable in one place, and it will be seen by all of them.
+     */
+    void setVariableLocations(std::map<std::string, double*>& variableLocations);
+    /**
+     * Evaluate the expression.  The values of all variables should have been set before calling this.
+     */
+    double evaluate() const;
+private:
+    friend class ParsedExpression;
+    CompiledExpression(const ParsedExpression& expression);
+    void compileExpression(const ExpressionTreeNode& node, std::vector<std::pair<ExpressionTreeNode, int> >& temps);
+    int findTempIndex(const ExpressionTreeNode& node, std::vector<std::pair<ExpressionTreeNode, int> >& temps);
+    std::map<std::string, double*> variablePointers;
+    std::vector<std::pair<double*, double*> > variablesToCopy;
+    std::vector<std::vector<int> > arguments;
+    std::vector<int> target;
+    std::vector<Operation*> operation;
+    std::map<std::string, int> variableIndices;
+    std::set<std::string> variableNames;
+    mutable std::vector<double> workspace;
+    mutable std::vector<double> argValues;
+    std::map<std::string, double> dummyVariables;
+    double (*jitCode)();
+#ifdef LEPTON_USE_JIT
+    void generateJitCode();
+    void generateSingleArgCall(asmjit::X86Compiler& c, asmjit::X86Xmm& dest, asmjit::X86Xmm& arg, double (*function)(double));
+    void generateTwoArgCall(asmjit::X86Compiler& c, asmjit::X86Xmm& dest, asmjit::X86Xmm& arg1, asmjit::X86Xmm& arg2, double (*function)(double, double));
+    std::vector<double> constants;
+    asmjit::JitRuntime runtime;
+#endif
+};
+
+} // namespace LMP_Lepton
+
+#endif /*LEPTON_COMPILED_EXPRESSION_H_*/
diff --git a/lib/lepton/include/lepton/CustomFunction.h b/lib/lepton/include/lepton/CustomFunction.h
new file mode 100644
index 0000000000..b8cbee8c96
--- /dev/null
+++ b/lib/lepton/include/lepton/CustomFunction.h
@@ -0,0 +1,109 @@
+#ifndef LEPTON_CUSTOM_FUNCTION_H_
+#define LEPTON_CUSTOM_FUNCTION_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "windowsIncludes.h"
+
+namespace LMP_Lepton {
+
+/**
+ * This class is the interface for defining your own function that may be included in expressions.
+ * To use it, create a concrete subclass that implements all of the virtual methods for each new function
+ * you want to define.  Then when you call Parser::parse() to parse an expression, pass a map of
+ * function names to CustomFunction objects.
+ */
+
+class LEPTON_EXPORT CustomFunction {
+public:
+    virtual ~CustomFunction() {
+    }
+    /**
+     * Get the number of arguments this function expects.
+     */
+    virtual int getNumArguments() const = 0;
+    /**
+     * Evaluate the function.
+     *
+     * @param arguments    the array of argument values
+     */
+    virtual double evaluate(const double* arguments) const = 0;
+    /**
+     * Evaluate a derivative of the function.
+     *
+     * @param arguments    the array of argument values
+     * @param derivOrder   an array specifying the number of times the function has been differentiated
+     *                     with respect to each of its arguments.  For example, the array {0, 2} indicates
+     *                     a second derivative with respect to the second argument.
+     */
+    virtual double evaluateDerivative(const double* arguments, const int* derivOrder) const = 0;
+    /**
+     * Create a new duplicate of this object on the heap using the "new" operator.
+     */
+    virtual CustomFunction* clone() const = 0;
+};
+
+/**
+ * This class is an implementation of CustomFunction that does no computation.  It just returns
+ * 0 for the value and derivatives.  This is useful when using the parser to analyze expressions
+ * rather than to evaluate them.  You can just create PlaceholderFunctions to represent any custom
+ * functions that may appear in expressions.
+ */
+
+class LEPTON_EXPORT PlaceholderFunction : public CustomFunction {
+public:
+    /**
+     * Create a Placeholder function.
+     * 
+     * @param numArgs    the number of arguments the function expects
+     */
+    PlaceholderFunction(int numArgs) : numArgs(numArgs) {
+    }
+    int getNumArguments() const {
+        return numArgs;
+    }
+    double evaluate(const double* arguments) const {
+        return 0.0;
+    }
+    double evaluateDerivative(const double* arguments, const int* derivOrder) const {
+        return 0.0;
+    }
+    CustomFunction* clone() const {
+        return new PlaceholderFunction(numArgs);
+    };
+private:
+    int numArgs;
+};
+
+} // namespace LMP_Lepton
+
+#endif /*LEPTON_CUSTOM_FUNCTION_H_*/
diff --git a/lib/lepton/include/lepton/Exception.h b/lib/lepton/include/lepton/Exception.h
new file mode 100644
index 0000000000..413b08f52e
--- /dev/null
+++ b/lib/lepton/include/lepton/Exception.h
@@ -0,0 +1,59 @@
+#ifndef LEPTON_EXCEPTION_H_
+#define LEPTON_EXCEPTION_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include <exception>
+#include <string>
+
+namespace LMP_Lepton {
+
+/**
+ * This class is used for all exceptions thrown by Lepton.
+ */
+
+class Exception : public std::exception {
+public:
+    Exception(const std::string& message) : message(message) {
+    }
+    ~Exception() throw() {
+    }
+    const char* what() const throw() {
+        return message.c_str();
+    }
+private:
+    std::string message;
+};
+
+} // namespace LMP_Lepton
+
+#endif /*LEPTON_EXCEPTION_H_*/
diff --git a/lib/lepton/include/lepton/ExpressionProgram.h b/lib/lepton/include/lepton/ExpressionProgram.h
new file mode 100644
index 0000000000..4fba4051e4
--- /dev/null
+++ b/lib/lepton/include/lepton/ExpressionProgram.h
@@ -0,0 +1,103 @@
+#ifndef LEPTON_EXPRESSION_PROGRAM_H_
+#define LEPTON_EXPRESSION_PROGRAM_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2018 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "ExpressionTreeNode.h"
+#include "windowsIncludes.h"
+#include <map>
+#include <string>
+#include <vector>
+
+namespace LMP_Lepton {
+
+class ParsedExpression;
+
+/**
+ * An ExpressionProgram is a linear sequence of Operations for evaluating an expression.  The evaluation
+ * is done with a stack.  The arguments to each Operation are first taken off the stack in order, then it is
+ * evaluated and the result is pushed back onto the stack.  At the end, the stack contains a single value,
+ * which is the value of the expression.
+ *
+ * An ExpressionProgram is created by calling createProgram() on a ParsedExpression.
+ */
+
+class LEPTON_EXPORT ExpressionProgram {
+public:
+    ExpressionProgram();
+    ExpressionProgram(const ExpressionProgram& program);
+    ~ExpressionProgram();
+    ExpressionProgram& operator=(const ExpressionProgram& program);
+    /**
+     * Get the number of Operations that make up this program.
+     */
+    int getNumOperations() const;
+    /**
+     * Get an Operation in this program.
+     */
+    const Operation& getOperation(int index) const;
+    /**
+     * Change an Operation in this program.
+     * 
+     * The Operation must have been allocated on the heap with the "new" operator.
+     * The ExpressionProgram assumes ownership of it and will delete it when it
+     * is no longer needed.
+     */
+    void setOperation(int index, Operation* operation);
+    /**
+     * Get the size of the stack needed to execute this program.  This is the largest number of elements present
+     * on the stack at any point during evaluation.
+     */
+    int getStackSize() const;
+    /**
+     * Evaluate the expression.  If the expression involves any variables, this method will throw an exception.
+     */
+    double evaluate() const;
+    /**
+     * Evaluate the expression.
+     *
+     * @param variables    a map specifying the values of all variables that appear in the expression.  If any
+     *                     variable appears in the expression but is not included in this map, an exception
+     *                     will be thrown.
+     */
+    double evaluate(const std::map<std::string, double>& variables) const;
+private:
+    friend class ParsedExpression;
+    ExpressionProgram(const ParsedExpression& expression);
+    void buildProgram(const ExpressionTreeNode& node);
+    std::vector<Operation*> operations;
+    int maxArgs, stackSize;
+};
+
+} // namespace LMP_Lepton
+
+#endif /*LEPTON_EXPRESSION_PROGRAM_H_*/
diff --git a/lib/lepton/include/lepton/ExpressionTreeNode.h b/lib/lepton/include/lepton/ExpressionTreeNode.h
new file mode 100644
index 0000000000..514cc008a9
--- /dev/null
+++ b/lib/lepton/include/lepton/ExpressionTreeNode.h
@@ -0,0 +1,105 @@
+#ifndef LEPTON_EXPRESSION_TREE_NODE_H_
+#define LEPTON_EXPRESSION_TREE_NODE_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "windowsIncludes.h"
+#include <string>
+#include <vector>
+
+namespace LMP_Lepton {
+
+class Operation;
+
+/**
+ * This class represents a node in the abstract syntax tree representation of an expression.
+ * Each node is defined by an Operation and a set of children.  When the expression is
+ * evaluated, each child is first evaluated in order, then the resulting values are passed
+ * as the arguments to the Operation's evaluate() method.
+ */
+
+class LEPTON_EXPORT ExpressionTreeNode {
+public:
+    /**
+     * Create a new ExpressionTreeNode.
+     *
+     * @param operation    the operation for this node.  The ExpressionTreeNode takes over ownership
+     *                     of this object, and deletes it when the node is itself deleted.
+     * @param children     the children of this node
+     */
+    ExpressionTreeNode(Operation* operation, const std::vector<ExpressionTreeNode>& children);
+    /**
+     * Create a new ExpressionTreeNode with two children.
+     *
+     * @param operation    the operation for this node.  The ExpressionTreeNode takes over ownership
+     *                     of this object, and deletes it when the node is itself deleted.
+     * @param child1       the first child of this node
+     * @param child2       the second child of this node
+     */
+    ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child1, const ExpressionTreeNode& child2);
+    /**
+     * Create a new ExpressionTreeNode with one child.
+     *
+     * @param operation    the operation for this node.  The ExpressionTreeNode takes over ownership
+     *                     of this object, and deletes it when the node is itself deleted.
+     * @param child        the child of this node
+     */
+    ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child);
+    /**
+     * Create a new ExpressionTreeNode with no children.
+     *
+     * @param operation    the operation for this node.  The ExpressionTreeNode takes over ownership
+     *                     of this object, and deletes it when the node is itself deleted.
+     */
+    ExpressionTreeNode(Operation* operation);
+    ExpressionTreeNode(const ExpressionTreeNode& node);
+    ExpressionTreeNode();
+    ~ExpressionTreeNode();
+    bool operator==(const ExpressionTreeNode& node) const;
+    bool operator!=(const ExpressionTreeNode& node) const;
+    ExpressionTreeNode& operator=(const ExpressionTreeNode& node);
+    /**
+     * Get the Operation performed by this node.
+     */
+    const Operation& getOperation() const;
+    /**
+     * Get this node's child nodes.
+     */
+    const std::vector<ExpressionTreeNode>& getChildren() const;
+private:
+    Operation* operation;
+    std::vector<ExpressionTreeNode> children;
+};
+
+} // namespace LMP_Lepton
+
+#endif /*LEPTON_EXPRESSION_TREE_NODE_H_*/
diff --git a/lib/lepton/include/lepton/Operation.h b/lib/lepton/include/lepton/Operation.h
new file mode 100644
index 0000000000..b27b25d3d8
--- /dev/null
+++ b/lib/lepton/include/lepton/Operation.h
@@ -0,0 +1,1193 @@
+#ifndef LEPTON_OPERATION_H_
+#define LEPTON_OPERATION_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "windowsIncludes.h"
+#include "CustomFunction.h"
+#include "Exception.h"
+#include <cmath>
+#include <map>
+#include <string>
+#include <vector>
+#include <sstream>
+#include <algorithm>
+
+namespace LMP_Lepton {
+
+class ExpressionTreeNode;
+
+/**
+ * An Operation represents a single step in the evaluation of an expression, such as a function,
+ * an operator, or a constant value.  Each Operation takes some number of values as arguments
+ * and produces a single value.
+ *
+ * This is an abstract class with subclasses for specific operations.
+ */
+
+class LEPTON_EXPORT Operation {
+public:
+    virtual ~Operation() {
+    }
+    /**
+     * This enumeration lists all Operation subclasses.  This is provided so that switch statements
+     * can be used when processing or analyzing parsed expressions.
+     */
+    enum Id {CONSTANT, VARIABLE, CUSTOM, ADD, SUBTRACT, MULTIPLY, DIVIDE, POWER, NEGATE, SQRT, EXP, LOG,
+             SIN, COS, SEC, CSC, TAN, COT, ASIN, ACOS, ATAN, ATAN2, SINH, COSH, TANH, ERF, ERFC, STEP, DELTA, SQUARE, CUBE, RECIPROCAL,
+             ADD_CONSTANT, MULTIPLY_CONSTANT, POWER_CONSTANT, MIN, MAX, ABS, FLOOR, CEIL, SELECT};
+    /**
+     * Get the name of this Operation.
+     */
+    virtual std::string getName() const = 0;
+    /**
+     * Get this Operation's ID.
+     */
+    virtual Id getId() const = 0;
+    /**
+     * Get the number of arguments this operation expects.
+     */
+    virtual int getNumArguments() const = 0;
+    /**
+     * Create a clone of this Operation.
+     */
+    virtual Operation* clone() const = 0;
+    /**
+     * Perform the computation represented by this operation.
+     *
+     * @param args        the array of arguments
+     * @param variables   a map containing the values of all variables
+     * @return the result of performing the computation.
+     */
+    virtual double evaluate(double* args, const std::map<std::string, double>& variables) const = 0;
+    /**
+     * Return an ExpressionTreeNode which represents the analytic derivative of this Operation with respect to a variable.
+     *
+     * @param children     the child nodes
+     * @param childDerivs  the derivatives of the child nodes with respect to the variable
+     * @param variable     the variable with respect to which the derivate should be taken
+     */
+    virtual ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const = 0;
+    /**
+     * Get whether this operation should be displayed with infix notation.
+     */
+    virtual bool isInfixOperator() const {
+        return false;
+    }
+    /**
+     * Get whether this is a symmetric binary operation, such that exchanging its arguments
+     * does not affect the result.
+     */
+    virtual bool isSymmetric() const {
+        return false;
+    }
+    virtual bool operator!=(const Operation& op) const {
+        return op.getId() != getId();
+    }
+    virtual bool operator==(const Operation& op) const {
+        return !(*this != op);
+    }
+    class Constant;
+    class Variable;
+    class Custom;
+    class Add;
+    class Subtract;
+    class Multiply;
+    class Divide;
+    class Power;
+    class Negate;
+    class Sqrt;
+    class Exp;
+    class Log;
+    class Sin;
+    class Cos;
+    class Sec;
+    class Csc;
+    class Tan;
+    class Cot;
+    class Asin;
+    class Acos;
+    class Atan;
+    class Atan2;
+    class Sinh;
+    class Cosh;
+    class Tanh;
+    class Erf;
+    class Erfc;
+    class Step;
+    class Delta;
+    class Square;
+    class Cube;
+    class Reciprocal;
+    class AddConstant;
+    class MultiplyConstant;
+    class PowerConstant;
+    class Min;
+    class Max;
+    class Abs;
+    class Floor;
+    class Ceil;
+    class Select;
+};
+
+class LEPTON_EXPORT Operation::Constant : public Operation {
+public:
+    Constant(double value) : value(value) {
+    }
+    std::string getName() const {
+        std::stringstream name;
+        name << value;
+        return name.str();
+    }
+    Id getId() const {
+        return CONSTANT;
+    }
+    int getNumArguments() const {
+        return 0;
+    }
+    Operation* clone() const {
+        return new Constant(value);
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return value;
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    double getValue() const {
+        return value;
+    }
+    bool operator!=(const Operation& op) const {
+        const Constant* o = dynamic_cast<const Constant*>(&op);
+        return (o == NULL || o->value != value);
+    }
+private:
+    double value;
+};
+
+class LEPTON_EXPORT Operation::Variable : public Operation {
+public:
+    Variable(const std::string& name) : name(name) {
+    }
+    std::string getName() const {
+        return name;
+    }
+    Id getId() const {
+        return VARIABLE;
+    }
+    int getNumArguments() const {
+        return 0;
+    }
+    Operation* clone() const {
+        return new Variable(name);
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        std::map<std::string, double>::const_iterator iter = variables.find(name);
+        if (iter == variables.end())
+            throw Exception("No value specified for variable "+name);
+        return iter->second;
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    bool operator!=(const Operation& op) const {
+        const Variable* o = dynamic_cast<const Variable*>(&op);
+        return (o == NULL || o->name != name);
+    }
+private:
+    std::string name;
+};
+
+class LEPTON_EXPORT Operation::Custom : public Operation {
+public:
+    Custom(const std::string& name, CustomFunction* function) : name(name), function(function), isDerivative(false), derivOrder(function->getNumArguments(), 0) {
+    }
+    Custom(const std::string& name, CustomFunction* function, const std::vector<int>& derivOrder) : name(name), function(function), isDerivative(false), derivOrder(derivOrder) {
+        for (int order : derivOrder)
+            if (order != 0)
+                isDerivative = true;
+    }
+    Custom(const Custom& base, int derivIndex) : name(base.name), function(base.function->clone()), isDerivative(true), derivOrder(base.derivOrder) {
+        derivOrder[derivIndex]++;
+    }
+    ~Custom() {
+        delete function;
+    }
+    std::string getName() const {
+        return name;
+    }
+    Id getId() const {
+        return CUSTOM;
+    }
+    int getNumArguments() const {
+        return function->getNumArguments();
+    }
+    Operation* clone() const {
+        Custom* clone = new Custom(name, function->clone());
+        clone->isDerivative = isDerivative;
+        clone->derivOrder = derivOrder;
+        return clone;
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        if (isDerivative)
+            return function->evaluateDerivative(args, &derivOrder[0]);
+        return function->evaluate(args);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    const std::vector<int>& getDerivOrder() const {
+        return derivOrder;
+    }
+    bool operator!=(const Operation& op) const {
+        const Custom* o = dynamic_cast<const Custom*>(&op);
+        return (o == NULL || o->name != name || o->isDerivative != isDerivative || o->derivOrder != derivOrder);
+    }
+private:
+    std::string name;
+    CustomFunction* function;
+    bool isDerivative;
+    std::vector<int> derivOrder;
+};
+
+class LEPTON_EXPORT Operation::Add : public Operation {
+public:
+    Add() {
+    }
+    std::string getName() const {
+        return "+";
+    }
+    Id getId() const {
+        return ADD;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Add();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]+args[1];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    bool isInfixOperator() const {
+        return true;
+    }
+    bool isSymmetric() const {
+        return true;
+    }
+};
+
+class LEPTON_EXPORT Operation::Subtract : public Operation {
+public:
+    Subtract() {
+    }
+    std::string getName() const {
+        return "-";
+    }
+    Id getId() const {
+        return SUBTRACT;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Subtract();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]-args[1];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    bool isInfixOperator() const {
+        return true;
+    }
+};
+
+class LEPTON_EXPORT Operation::Multiply : public Operation {
+public:
+    Multiply() {
+    }
+    std::string getName() const {
+        return "*";
+    }
+    Id getId() const {
+        return MULTIPLY;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Multiply();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]*args[1];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    bool isInfixOperator() const {
+        return true;
+    }
+    bool isSymmetric() const {
+        return true;
+    }
+};
+
+class LEPTON_EXPORT Operation::Divide : public Operation {
+public:
+    Divide() {
+    }
+    std::string getName() const {
+        return "/";
+    }
+    Id getId() const {
+        return DIVIDE;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Divide();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]/args[1];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    bool isInfixOperator() const {
+        return true;
+    }
+};
+
+class LEPTON_EXPORT Operation::Power : public Operation {
+public:
+    Power() {
+    }
+    std::string getName() const {
+        return "^";
+    }
+    Id getId() const {
+        return POWER;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Power();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::pow(args[0], args[1]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    bool isInfixOperator() const {
+        return true;
+    }
+};
+
+class LEPTON_EXPORT Operation::Negate : public Operation {
+public:
+    Negate() {
+    }
+    std::string getName() const {
+        return "-";
+    }
+    Id getId() const {
+        return NEGATE;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Negate();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return -args[0];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Sqrt : public Operation {
+public:
+    Sqrt() {
+    }
+    std::string getName() const {
+        return "sqrt";
+    }
+    Id getId() const {
+        return SQRT;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Sqrt();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::sqrt(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Exp : public Operation {
+public:
+    Exp() {
+    }
+    std::string getName() const {
+        return "exp";
+    }
+    Id getId() const {
+        return EXP;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Exp();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::exp(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Log : public Operation {
+public:
+    Log() {
+    }
+    std::string getName() const {
+        return "log";
+    }
+    Id getId() const {
+        return LOG;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Log();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::log(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Sin : public Operation {
+public:
+    Sin() {
+    }
+    std::string getName() const {
+        return "sin";
+    }
+    Id getId() const {
+        return SIN;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Sin();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::sin(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Cos : public Operation {
+public:
+    Cos() {
+    }
+    std::string getName() const {
+        return "cos";
+    }
+    Id getId() const {
+        return COS;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Cos();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::cos(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Sec : public Operation {
+public:
+    Sec() {
+    }
+    std::string getName() const {
+        return "sec";
+    }
+    Id getId() const {
+        return SEC;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Sec();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return 1.0/std::cos(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Csc : public Operation {
+public:
+    Csc() {
+    }
+    std::string getName() const {
+        return "csc";
+    }
+    Id getId() const {
+        return CSC;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Csc();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return 1.0/std::sin(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Tan : public Operation {
+public:
+    Tan() {
+    }
+    std::string getName() const {
+        return "tan";
+    }
+    Id getId() const {
+        return TAN;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Tan();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::tan(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Cot : public Operation {
+public:
+    Cot() {
+    }
+    std::string getName() const {
+        return "cot";
+    }
+    Id getId() const {
+        return COT;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Cot();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return 1.0/std::tan(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Asin : public Operation {
+public:
+    Asin() {
+    }
+    std::string getName() const {
+        return "asin";
+    }
+    Id getId() const {
+        return ASIN;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Asin();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::asin(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Acos : public Operation {
+public:
+    Acos() {
+    }
+    std::string getName() const {
+        return "acos";
+    }
+    Id getId() const {
+        return ACOS;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Acos();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::acos(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Atan : public Operation {
+public:
+    Atan() {
+    }
+    std::string getName() const {
+        return "atan";
+    }
+    Id getId() const {
+        return ATAN;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Atan();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::atan(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Atan2 : public Operation {
+public:
+    Atan2() {
+    }
+    std::string getName() const {
+        return "atan2";
+    }
+    Id getId() const {
+        return ATAN2;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Atan2();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::atan2(args[0], args[1]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Sinh : public Operation {
+public:
+    Sinh() {
+    }
+    std::string getName() const {
+        return "sinh";
+    }
+    Id getId() const {
+        return SINH;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Sinh();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::sinh(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Cosh : public Operation {
+public:
+    Cosh() {
+    }
+    std::string getName() const {
+        return "cosh";
+    }
+    Id getId() const {
+        return COSH;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Cosh();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::cosh(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Tanh : public Operation {
+public:
+    Tanh() {
+    }
+    std::string getName() const {
+        return "tanh";
+    }
+    Id getId() const {
+        return TANH;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Tanh();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::tanh(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Erf : public Operation {
+public:
+    Erf() {
+    }
+    std::string getName() const {
+        return "erf";
+    }
+    Id getId() const {
+        return ERF;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Erf();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const;
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Erfc : public Operation {
+public:
+    Erfc() {
+    }
+    std::string getName() const {
+        return "erfc";
+    }
+    Id getId() const {
+        return ERFC;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Erfc();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const;
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Step : public Operation {
+public:
+    Step() {
+    }
+    std::string getName() const {
+        return "step";
+    }
+    Id getId() const {
+        return STEP;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Step();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return (args[0] >= 0.0 ? 1.0 : 0.0);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Delta : public Operation {
+public:
+    Delta() {
+    }
+    std::string getName() const {
+        return "delta";
+    }
+    Id getId() const {
+        return DELTA;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Delta();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return (args[0] == 0.0 ? 1.0 : 0.0);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Square : public Operation {
+public:
+    Square() {
+    }
+    std::string getName() const {
+        return "square";
+    }
+    Id getId() const {
+        return SQUARE;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Square();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]*args[0];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Cube : public Operation {
+public:
+    Cube() {
+    }
+    std::string getName() const {
+        return "cube";
+    }
+    Id getId() const {
+        return CUBE;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Cube();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]*args[0]*args[0];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Reciprocal : public Operation {
+public:
+    Reciprocal() {
+    }
+    std::string getName() const {
+        return "recip";
+    }
+    Id getId() const {
+        return RECIPROCAL;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Reciprocal();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return 1.0/args[0];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::AddConstant : public Operation {
+public:
+    AddConstant(double value) : value(value) {
+    }
+    std::string getName() const {
+        std::stringstream name;
+        name << value << "+";
+        return name.str();
+    }
+    Id getId() const {
+        return ADD_CONSTANT;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new AddConstant(value);
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]+value;
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    double getValue() const {
+        return value;
+    }
+    bool operator!=(const Operation& op) const {
+        const AddConstant* o = dynamic_cast<const AddConstant*>(&op);
+        return (o == NULL || o->value != value);
+    }
+private:
+    double value;
+};
+
+class LEPTON_EXPORT Operation::MultiplyConstant : public Operation {
+public:
+    MultiplyConstant(double value) : value(value) {
+    }
+    std::string getName() const {
+        std::stringstream name;
+        name << value << "*";
+        return name.str();
+    }
+    Id getId() const {
+        return MULTIPLY_CONSTANT;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new MultiplyConstant(value);
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]*value;
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    double getValue() const {
+        return value;
+    }
+    bool operator!=(const Operation& op) const {
+        const MultiplyConstant* o = dynamic_cast<const MultiplyConstant*>(&op);
+        return (o == NULL || o->value != value);
+    }
+private:
+    double value;
+};
+
+class LEPTON_EXPORT Operation::PowerConstant : public Operation {
+public:
+    PowerConstant(double value) : value(value) {
+        intValue = (int) value;
+        isIntPower = (intValue == value);
+    }
+    std::string getName() const {
+        std::stringstream name;
+        name << "^" << value;
+        return name.str();
+    }
+    Id getId() const {
+        return POWER_CONSTANT;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new PowerConstant(value);
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        if (isIntPower) {
+            // Integer powers can be computed much more quickly by repeated multiplication.
+            
+            int exponent = intValue;
+            double base = args[0];
+            if (exponent < 0) {
+                exponent = -exponent;
+                base = 1.0/base;
+            }
+            double result = 1.0;
+            while (exponent != 0) {
+                if ((exponent&1) == 1)
+                    result *= base;
+                base *= base;
+                exponent = exponent>>1;
+           }
+           return result;
+        }
+        else
+        return std::pow(args[0], value);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    double getValue() const {
+        return value;
+    }
+    bool operator!=(const Operation& op) const {
+        const PowerConstant* o = dynamic_cast<const PowerConstant*>(&op);
+        return (o == NULL || o->value != value);
+    }
+    bool isInfixOperator() const {
+        return true;
+    }
+private:
+    double value;
+    int intValue;
+    bool isIntPower;
+};
+
+class LEPTON_EXPORT Operation::Min : public Operation {
+public:
+    Min() {
+    }
+    std::string getName() const {
+        return "min";
+    }
+    Id getId() const {
+        return MIN;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Min();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        // parens around (std::min) are workaround for horrible microsoft max/min macro trouble
+        return (std::min)(args[0], args[1]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Max : public Operation {
+public:
+    Max() {
+    }
+    std::string getName() const {
+        return "max";
+    }
+    Id getId() const {
+        return MAX;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Max();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        // parens around (std::min) are workaround for horrible microsoft max/min macro trouble
+        return (std::max)(args[0], args[1]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Abs : public Operation {
+public:
+    Abs() {
+    }
+    std::string getName() const {
+        return "abs";
+    }
+    Id getId() const {
+        return ABS;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Abs();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::abs(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Floor : public Operation {
+public:
+
+    Floor() {
+    }
+    std::string getName() const {
+        return "floor";
+    }
+    Id getId() const {
+        return FLOOR;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Floor();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::floor(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Ceil : public Operation {
+public:
+    Ceil() {
+    }
+    std::string getName() const {
+        return "ceil";
+    }
+    Id getId() const {
+        return CEIL;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Ceil();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::ceil(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Select : public Operation {
+public:
+    Select() {
+    }
+    std::string getName() const {
+        return "select";
+    }
+    Id getId() const {
+        return SELECT;
+    }
+    int getNumArguments() const {
+        return 3;
+    }
+    Operation* clone() const {
+        return new Select();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return (args[0] != 0.0 ? args[1] : args[2]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+} // namespace LMP_Lepton
+
+#endif /*LEPTON_OPERATION_H_*/
diff --git a/lib/lepton/include/lepton/ParsedExpression.h b/lib/lepton/include/lepton/ParsedExpression.h
new file mode 100644
index 0000000000..586acb4d2c
--- /dev/null
+++ b/lib/lepton/include/lepton/ParsedExpression.h
@@ -0,0 +1,131 @@
+#ifndef LEPTON_PARSED_EXPRESSION_H_
+#define LEPTON_PARSED_EXPRESSION_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009=2013 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "ExpressionTreeNode.h"
+#include "windowsIncludes.h"
+#include <map>
+#include <string>
+
+namespace LMP_Lepton {
+
+class CompiledExpression;
+class ExpressionProgram;
+
+/**
+ * This class represents the result of parsing an expression.  It provides methods for working with the
+ * expression in various ways, such as evaluating it, getting the tree representation of the expresson, etc.
+ */
+
+class LEPTON_EXPORT ParsedExpression {
+public:
+    /**
+     * Create an uninitialized ParsedExpression.  This exists so that ParsedExpressions can be put in STL containers.
+     * Doing anything with it will produce an exception.
+     */
+    ParsedExpression();
+    /**
+     * Create a ParsedExpression.  Normally you will not call this directly.  Instead, use the Parser class
+     * to parse expression.
+     */
+    ParsedExpression(const ExpressionTreeNode& rootNode);
+    /**
+     * Get the root node of the expression's abstract syntax tree.
+     */
+    const ExpressionTreeNode& getRootNode() const;
+    /**
+     * Evaluate the expression.  If the expression involves any variables, this method will throw an exception.
+     */
+    double evaluate() const;
+    /**
+     * Evaluate the expression.
+     *
+     * @param variables    a map specifying the values of all variables that appear in the expression.  If any
+     *                     variable appears in the expression but is not included in this map, an exception
+     *                     will be thrown.
+     */
+    double evaluate(const std::map<std::string, double>& variables) const;
+    /**
+     * Create a new ParsedExpression which produces the same result as this one, but is faster to evaluate.
+     */
+    ParsedExpression optimize() const;
+    /**
+     * Create a new ParsedExpression which produces the same result as this one, but is faster to evaluate.
+     *
+     * @param variables    a map specifying values for a subset of variables that appear in the expression.
+     *                     All occurrences of these variables in the expression are replaced with the values
+     *                     specified.
+     */
+    ParsedExpression optimize(const std::map<std::string, double>& variables) const;
+    /**
+     * Create a new ParsedExpression which is the analytic derivative of this expression with respect to a
+     * particular variable.
+     *
+     * @param variable     the variable with respect to which the derivate should be taken
+     */
+    ParsedExpression differentiate(const std::string& variable) const;
+    /**
+     * Create an ExpressionProgram that represents the same calculation as this expression.
+     */
+    ExpressionProgram createProgram() const;
+    /**
+     * Create a CompiledExpression that represents the same calculation as this expression.
+     */
+    CompiledExpression createCompiledExpression() const;
+    /**
+     * Create a new ParsedExpression which is identical to this one, except that the names of some
+     * variables have been changed.
+     *
+     * @param replacements    a map whose keys are the names of variables, and whose values are the
+     *                        new names to replace them with
+     */
+    ParsedExpression renameVariables(const std::map<std::string, std::string>& replacements) const;
+private:
+    static double evaluate(const ExpressionTreeNode& node, const std::map<std::string, double>& variables);
+    static ExpressionTreeNode preevaluateVariables(const ExpressionTreeNode& node, const std::map<std::string, double>& variables);
+    static ExpressionTreeNode precalculateConstantSubexpressions(const ExpressionTreeNode& node);
+    static ExpressionTreeNode substituteSimplerExpression(const ExpressionTreeNode& node);
+    static ExpressionTreeNode differentiate(const ExpressionTreeNode& node, const std::string& variable);
+    static bool isConstant(const ExpressionTreeNode& node);
+    static double getConstantValue(const ExpressionTreeNode& node);
+    static ExpressionTreeNode renameNodeVariables(const ExpressionTreeNode& node, const std::map<std::string, std::string>& replacements);
+    ExpressionTreeNode rootNode;
+};
+
+LEPTON_EXPORT std::ostream& operator<<(std::ostream& out, const ExpressionTreeNode& node);
+
+LEPTON_EXPORT std::ostream& operator<<(std::ostream& out, const ParsedExpression& exp);
+
+} // namespace LMP_Lepton
+
+#endif /*LEPTON_PARSED_EXPRESSION_H_*/
diff --git a/lib/lepton/include/lepton/Parser.h b/lib/lepton/include/lepton/Parser.h
new file mode 100644
index 0000000000..9eefe3f59e
--- /dev/null
+++ b/lib/lepton/include/lepton/Parser.h
@@ -0,0 +1,77 @@
+#ifndef LEPTON_PARSER_H_
+#define LEPTON_PARSER_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "windowsIncludes.h"
+#include <map>
+#include <string>
+#include <vector>
+
+namespace LMP_Lepton {
+
+class CustomFunction;
+class ExpressionTreeNode;
+class Operation;
+class ParsedExpression;
+class ParseToken;
+
+/**
+ * This class provides the main interface for parsing expressions.
+ */
+
+class LEPTON_EXPORT Parser {
+public:
+    /**
+     * Parse a mathematical expression and return a representation of it as an abstract syntax tree.
+     */
+    static ParsedExpression parse(const std::string& expression);
+    /**
+     * Parse a mathematical expression and return a representation of it as an abstract syntax tree.
+     *
+     * @param customFunctions   a map specifying user defined functions that may appear in the expression.
+     *                          The key are function names, and the values are corresponding CustomFunction objects.
+     */
+    static ParsedExpression parse(const std::string& expression, const std::map<std::string, CustomFunction*>& customFunctions);
+private:
+    static std::string trim(const std::string& expression);
+    static std::vector<ParseToken> tokenize(const std::string& expression);
+    static ParseToken getNextToken(const std::string& expression, int start);
+    static ExpressionTreeNode parsePrecedence(const std::vector<ParseToken>& tokens, int& pos, const std::map<std::string, CustomFunction*>& customFunctions,
+            const std::map<std::string, ExpressionTreeNode>& subexpressionDefs, int precedence);
+    static Operation* getOperatorOperation(const std::string& name);
+    static Operation* getFunctionOperation(const std::string& name, const std::map<std::string, CustomFunction*>& customFunctions);
+};
+
+} // namespace LMP_Lepton
+
+#endif /*LEPTON_PARSER_H_*/
diff --git a/lib/lepton/include/lepton/windowsIncludes.h b/lib/lepton/include/lepton/windowsIncludes.h
new file mode 100644
index 0000000000..798229850e
--- /dev/null
+++ b/lib/lepton/include/lepton/windowsIncludes.h
@@ -0,0 +1,41 @@
+#ifndef LEPTON_WINDOW_INCLUDE_H_
+#define LEPTON_WINDOW_INCLUDE_H_
+
+/*
+ * Shared libraries are messy in Visual Studio. We have to distinguish three
+ * cases:
+ *   (1) this header is being used to build the Lepton shared library
+ *       (dllexport)
+ *   (2) this header is being used by a *client* of the Lepton shared
+ *       library (dllimport)
+ *   (3) we are building the Lepton static library, or the client is
+ *       being compiled with the expectation of linking with the
+ *       Lepton static library (nothing special needed)
+ * In the CMake script for building this library, we define one of the symbols
+ *     Lepton_BUILDING_{SHARED|STATIC}_LIBRARY
+ * Client code normally has no special symbol defined, in which case we'll
+ * assume it wants to use the shared library. However, if the client defines
+ * the symbol LEPTON_USE_STATIC_LIBRARIES we'll suppress the dllimport so
+ * that the client code can be linked with static libraries. Note that
+ * the client symbol is not library dependent, while the library symbols
+ * affect only the Lepton library, meaning that other libraries can
+ * be clients of this one. However, we are assuming all-static or all-shared.
+ */
+
+#ifdef _MSC_VER
+    // We don't want to hear about how sprintf is "unsafe".
+    #pragma warning(disable:4996)
+    // Keep MS VC++ quiet about lack of dll export of private members.
+    #pragma warning(disable:4251)
+    #if defined(LEPTON_BUILDING_SHARED_LIBRARY)
+        #define LEPTON_EXPORT __declspec(dllexport)
+    #elif defined(LEPTON_BUILDING_STATIC_LIBRARY) || defined(LEPTON_USE_STATIC_LIBRARIES)
+        #define LEPTON_EXPORT
+    #else
+        #define LEPTON_EXPORT __declspec(dllimport)   // i.e., a client of a shared library
+    #endif
+#else
+    #define LEPTON_EXPORT // Linux, Mac
+#endif
+
+#endif // LEPTON_WINDOW_INCLUDE_H_
diff --git a/lib/lepton/src/CompiledExpression.cpp b/lib/lepton/src/CompiledExpression.cpp
new file mode 100644
index 0000000000..7805c4674d
--- /dev/null
+++ b/lib/lepton/src/CompiledExpression.cpp
@@ -0,0 +1,418 @@
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013-2019 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/CompiledExpression.h"
+#include "lepton/Operation.h"
+#include "lepton/ParsedExpression.h"
+#include <utility>
+
+using namespace LMP_Lepton;
+using namespace std;
+#ifdef LEPTON_USE_JIT
+    using namespace asmjit;
+#endif
+
+CompiledExpression::CompiledExpression() : jitCode(NULL) {
+}
+
+CompiledExpression::CompiledExpression(const ParsedExpression& expression) : jitCode(NULL) {
+    ParsedExpression expr = expression.optimize(); // Just in case it wasn't already optimized.
+    vector<pair<ExpressionTreeNode, int> > temps;
+    compileExpression(expr.getRootNode(), temps);
+    int maxArguments = 1;
+    for (int i = 0; i < (int) operation.size(); i++)
+        if (operation[i]->getNumArguments() > maxArguments)
+            maxArguments = operation[i]->getNumArguments();
+    argValues.resize(maxArguments);
+#ifdef LEPTON_USE_JIT
+    generateJitCode();
+#endif
+}
+
+CompiledExpression::~CompiledExpression() {
+    for (int i = 0; i < (int) operation.size(); i++)
+        if (operation[i] != NULL)
+            delete operation[i];
+}
+
+CompiledExpression::CompiledExpression(const CompiledExpression& expression) : jitCode(NULL) {
+    *this = expression;
+}
+
+CompiledExpression& CompiledExpression::operator=(const CompiledExpression& expression) {
+    arguments = expression.arguments;
+    target = expression.target;
+    variableIndices = expression.variableIndices;
+    variableNames = expression.variableNames;
+    workspace.resize(expression.workspace.size());
+    argValues.resize(expression.argValues.size());
+    operation.resize(expression.operation.size());
+    for (int i = 0; i < (int) operation.size(); i++)
+        operation[i] = expression.operation[i]->clone();
+    setVariableLocations(variablePointers);
+    return *this;
+}
+
+void CompiledExpression::compileExpression(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps) {
+    if (findTempIndex(node, temps) != -1)
+        return; // We have already processed a node identical to this one.
+    
+    // Process the child nodes.
+    
+    vector<int> args;
+    for (int i = 0; i < node.getChildren().size(); i++) {
+        compileExpression(node.getChildren()[i], temps);
+        args.push_back(findTempIndex(node.getChildren()[i], temps));
+    }
+    
+    // Process this node.
+    
+    if (node.getOperation().getId() == Operation::VARIABLE) {
+        variableIndices[node.getOperation().getName()] = (int) workspace.size();
+        variableNames.insert(node.getOperation().getName());
+    }
+    else {
+        int stepIndex = (int) arguments.size();
+        arguments.push_back(vector<int>());
+        target.push_back((int) workspace.size());
+        operation.push_back(node.getOperation().clone());
+        if (args.size() == 0)
+            arguments[stepIndex].push_back(0); // The value won't actually be used.  We just need something there.
+        else {
+            // If the arguments are sequential, we can just pass a pointer to the first one.
+            
+            bool sequential = true;
+            for (int i = 1; i < args.size(); i++)
+                if (args[i] != args[i-1]+1)
+                    sequential = false;
+            if (sequential)
+                arguments[stepIndex].push_back(args[0]);
+            else
+                arguments[stepIndex] = args;
+        }
+    }
+    temps.push_back(make_pair(node, (int) workspace.size()));
+    workspace.push_back(0.0);
+}
+
+int CompiledExpression::findTempIndex(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps) {
+    for (int i = 0; i < (int) temps.size(); i++)
+        if (temps[i].first == node)
+            return i;
+    return -1;
+}
+
+const set<string>& CompiledExpression::getVariables() const {
+    return variableNames;
+}
+
+double& CompiledExpression::getVariableReference(const string& name) {
+    map<string, double*>::iterator pointer = variablePointers.find(name);
+    if (pointer != variablePointers.end())
+        return *pointer->second;
+    map<string, int>::iterator index = variableIndices.find(name);
+    if (index == variableIndices.end())
+        throw Exception("getVariableReference: Unknown variable '"+name+"'");
+    return workspace[index->second];
+}
+
+void CompiledExpression::setVariableLocations(map<string, double*>& variableLocations) {
+    variablePointers = variableLocations;
+#ifdef LEPTON_USE_JIT
+    // Rebuild the JIT code.
+    
+    if (workspace.size() > 0)
+        generateJitCode();
+#else
+    // Make a list of all variables we will need to copy before evaluating the expression.
+    
+    variablesToCopy.clear();
+    for (map<string, int>::const_iterator iter = variableIndices.begin(); iter != variableIndices.end(); ++iter) {
+        map<string, double*>::iterator pointer = variablePointers.find(iter->first);
+        if (pointer != variablePointers.end())
+            variablesToCopy.push_back(make_pair(&workspace[iter->second], pointer->second));
+    }
+#endif
+}
+
+double CompiledExpression::evaluate() const {
+#ifdef LEPTON_USE_JIT
+    return jitCode();
+#else
+    for (int i = 0; i < variablesToCopy.size(); i++)
+        *variablesToCopy[i].first = *variablesToCopy[i].second;
+
+    // Loop over the operations and evaluate each one.
+    
+    for (int step = 0; step < operation.size(); step++) {
+        const vector<int>& args = arguments[step];
+        if (args.size() == 1)
+            workspace[target[step]] = operation[step]->evaluate(&workspace[args[0]], dummyVariables);
+        else {
+            for (int i = 0; i < args.size(); i++)
+                argValues[i] = workspace[args[i]];
+            workspace[target[step]] = operation[step]->evaluate(&argValues[0], dummyVariables);
+        }
+    }
+    return workspace[workspace.size()-1];
+#endif
+}
+
+#ifdef LEPTON_USE_JIT
+static double evaluateOperation(Operation* op, double* args) {
+    static map<string, double> dummyVariables;
+    return op->evaluate(args, dummyVariables);
+}
+
+void CompiledExpression::generateJitCode() {
+    CodeHolder code;
+    code.init(runtime.getCodeInfo());
+    X86Compiler c(&code);
+    c.addFunc(FuncSignature0<double>());
+    vector<X86Xmm> workspaceVar(workspace.size());
+    for (int i = 0; i < (int) workspaceVar.size(); i++)
+        workspaceVar[i] = c.newXmmSd();
+    X86Gp argsPointer = c.newIntPtr();
+    c.mov(argsPointer, imm_ptr(&argValues[0]));
+    
+    // Load the arguments into variables.
+    
+    for (set<string>::const_iterator iter = variableNames.begin(); iter != variableNames.end(); ++iter) {
+        map<string, int>::iterator index = variableIndices.find(*iter);
+        X86Gp variablePointer = c.newIntPtr();
+        c.mov(variablePointer, imm_ptr(&getVariableReference(index->first)));
+        c.movsd(workspaceVar[index->second], x86::ptr(variablePointer, 0, 0));
+    }
+
+    // Make a list of all constants that will be needed for evaluation.
+    
+    vector<int> operationConstantIndex(operation.size(), -1);
+    for (int step = 0; step < (int) operation.size(); step++) {
+        // Find the constant value (if any) used by this operation.
+        
+        Operation& op = *operation[step];
+        double value;
+        if (op.getId() == Operation::CONSTANT)
+            value = dynamic_cast<Operation::Constant&>(op).getValue();
+        else if (op.getId() == Operation::ADD_CONSTANT)
+            value = dynamic_cast<Operation::AddConstant&>(op).getValue();
+        else if (op.getId() == Operation::MULTIPLY_CONSTANT)
+            value = dynamic_cast<Operation::MultiplyConstant&>(op).getValue();
+        else if (op.getId() == Operation::RECIPROCAL)
+            value = 1.0;
+        else if (op.getId() == Operation::STEP)
+            value = 1.0;
+        else if (op.getId() == Operation::DELTA)
+            value = 1.0;
+        else
+            continue;
+        
+        // See if we already have a variable for this constant.
+        
+        for (int i = 0; i < (int) constants.size(); i++)
+            if (value == constants[i]) {
+                operationConstantIndex[step] = i;
+                break;
+            }
+        if (operationConstantIndex[step] == -1) {
+            operationConstantIndex[step] = constants.size();
+            constants.push_back(value);
+        }
+    }
+    
+    // Load constants into variables.
+    
+    vector<X86Xmm> constantVar(constants.size());
+    if (constants.size() > 0) {
+        X86Gp constantsPointer = c.newIntPtr();
+        c.mov(constantsPointer, imm_ptr(&constants[0]));
+        for (int i = 0; i < (int) constants.size(); i++) {
+            constantVar[i] = c.newXmmSd();
+            c.movsd(constantVar[i], x86::ptr(constantsPointer, 8*i, 0));
+        }
+    }
+    
+    // Evaluate the operations.
+    
+    for (int step = 0; step < (int) operation.size(); step++) {
+        Operation& op = *operation[step];
+        vector<int> args = arguments[step];
+        if (args.size() == 1) {
+            // One or more sequential arguments.  Fill out the list.
+            
+            for (int i = 1; i < op.getNumArguments(); i++)
+                args.push_back(args[0]+i);
+        }
+        
+        // Generate instructions to execute this operation.
+        
+        switch (op.getId()) {
+            case Operation::CONSTANT:
+                c.movsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::ADD:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.addsd(workspaceVar[target[step]], workspaceVar[args[1]]);
+                break;
+            case Operation::SUBTRACT:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.subsd(workspaceVar[target[step]], workspaceVar[args[1]]);
+                break;
+            case Operation::MULTIPLY:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.mulsd(workspaceVar[target[step]], workspaceVar[args[1]]);
+                break;
+            case Operation::DIVIDE:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.divsd(workspaceVar[target[step]], workspaceVar[args[1]]);
+                break;
+            case Operation::POWER:
+                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], pow);
+                break;
+            case Operation::NEGATE:
+                c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]);
+                c.subsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::SQRT:
+                c.sqrtsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::EXP:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], exp);
+                break;
+            case Operation::LOG:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], log);
+                break;
+            case Operation::SIN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sin);
+                break;
+            case Operation::COS:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cos);
+                break;
+            case Operation::TAN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tan);
+                break;
+            case Operation::ASIN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], asin);
+                break;
+            case Operation::ACOS:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], acos);
+                break;
+            case Operation::ATAN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], atan);
+                break;
+            case Operation::ATAN2:
+                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], atan2);
+                break;
+            case Operation::SINH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinh);
+                break;
+            case Operation::COSH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cosh);
+                break;
+            case Operation::TANH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanh);
+                break;
+            case Operation::STEP:
+                c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]);
+                c.cmpsd(workspaceVar[target[step]], workspaceVar[args[0]], imm(18)); // Comparison mode is _CMP_LE_OQ = 18
+                c.andps(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::DELTA:
+                c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]);
+                c.cmpsd(workspaceVar[target[step]], workspaceVar[args[0]], imm(16)); // Comparison mode is _CMP_EQ_OS = 16
+                c.andps(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::SQUARE:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::CUBE:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::RECIPROCAL:
+                c.movsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                c.divsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::ADD_CONSTANT:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.addsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::MULTIPLY_CONSTANT:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.mulsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::ABS:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], fabs);
+                break;
+            case Operation::FLOOR:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], floor);
+                break;
+            case Operation::CEIL:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], ceil);
+                break;
+            default:
+                // Just invoke evaluateOperation().
+                
+                for (int i = 0; i < (int) args.size(); i++)
+                    c.movsd(x86::ptr(argsPointer, 8*i, 0), workspaceVar[args[i]]);
+                X86Gp fn = c.newIntPtr();
+                c.mov(fn, imm_ptr((void*) evaluateOperation));
+                CCFuncCall* call = c.call(fn, FuncSignature2<double, Operation*, double*>());
+                call->setArg(0, imm_ptr(&op));
+                call->setArg(1, imm_ptr(&argValues[0]));
+                call->setRet(0, workspaceVar[target[step]]);
+        }
+    }
+    c.ret(workspaceVar[workspace.size()-1]);
+    c.endFunc();
+    c.finalize();
+    runtime.add(&jitCode, &code);
+}
+
+void CompiledExpression::generateSingleArgCall(X86Compiler& c, X86Xmm& dest, X86Xmm& arg, double (*function)(double)) {
+    X86Gp fn = c.newIntPtr();
+    c.mov(fn, imm_ptr((void*) function));
+    CCFuncCall* call = c.call(fn, FuncSignature1<double, double>());
+    call->setArg(0, arg);
+    call->setRet(0, dest);
+}
+
+void CompiledExpression::generateTwoArgCall(X86Compiler& c, X86Xmm& dest, X86Xmm& arg1, X86Xmm& arg2, double (*function)(double, double)) {
+    X86Gp fn = c.newIntPtr();
+    c.mov(fn, imm_ptr((void*) function));
+    CCFuncCall* call = c.call(fn, FuncSignature2<double, double, double>());
+    call->setArg(0, arg1);
+    call->setArg(1, arg2);
+    call->setRet(0, dest);
+}
+#endif
diff --git a/lib/lepton/src/ExpressionProgram.cpp b/lib/lepton/src/ExpressionProgram.cpp
new file mode 100644
index 0000000000..74c545287b
--- /dev/null
+++ b/lib/lepton/src/ExpressionProgram.cpp
@@ -0,0 +1,110 @@
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2018 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/ExpressionProgram.h"
+#include "lepton/Operation.h"
+#include "lepton/ParsedExpression.h"
+
+using namespace LMP_Lepton;
+using namespace std;
+
+ExpressionProgram::ExpressionProgram() : maxArgs(0), stackSize(0) {
+}
+
+ExpressionProgram::ExpressionProgram(const ParsedExpression& expression) : maxArgs(0), stackSize(0) {
+    buildProgram(expression.getRootNode());
+    int currentStackSize = 0;
+    for (int i = 0; i < (int) operations.size(); i++) {
+        int args = operations[i]->getNumArguments();
+        if (args > maxArgs)
+            maxArgs = args;
+        currentStackSize += 1-args;
+        if (currentStackSize > stackSize)
+            stackSize = currentStackSize;
+    }
+}
+
+ExpressionProgram::~ExpressionProgram() {
+    for (int i = 0; i < (int) operations.size(); i++)
+        delete operations[i];
+}
+
+ExpressionProgram::ExpressionProgram(const ExpressionProgram& program) {
+    *this = program;
+}
+
+ExpressionProgram& ExpressionProgram::operator=(const ExpressionProgram& program) {
+    maxArgs = program.maxArgs;
+    stackSize = program.stackSize;
+    operations.resize(program.operations.size());
+    for (int i = 0; i < (int) operations.size(); i++)
+        operations[i] = program.operations[i]->clone();
+    return *this;
+}
+
+void ExpressionProgram::buildProgram(const ExpressionTreeNode& node) {
+    for (int i = (int) node.getChildren().size()-1; i >= 0; i--)
+        buildProgram(node.getChildren()[i]);
+    operations.push_back(node.getOperation().clone());
+}
+
+int ExpressionProgram::getNumOperations() const {
+    return (int) operations.size();
+}
+
+const Operation& ExpressionProgram::getOperation(int index) const {
+    return *operations[index];
+}
+
+void ExpressionProgram::setOperation(int index, Operation* operation) {
+    delete operations[index];
+    operations[index] = operation;
+}
+
+int ExpressionProgram::getStackSize() const {
+    return stackSize;
+}
+
+double ExpressionProgram::evaluate() const {
+    return evaluate(map<string, double>());
+}
+
+double ExpressionProgram::evaluate(const std::map<std::string, double>& variables) const {
+    vector<double> stack(stackSize+1);
+    int stackPointer = stackSize;
+    for (int i = 0; i < (int) operations.size(); i++) {
+        int numArgs = operations[i]->getNumArguments();
+        double result = operations[i]->evaluate(&stack[stackPointer], variables);
+        stackPointer += numArgs-1;
+        stack[stackPointer] = result;
+    }
+    return stack[stackSize-1];
+}
diff --git a/lib/lepton/src/ExpressionTreeNode.cpp b/lib/lepton/src/ExpressionTreeNode.cpp
new file mode 100644
index 0000000000..e4fbbc6f50
--- /dev/null
+++ b/lib/lepton/src/ExpressionTreeNode.cpp
@@ -0,0 +1,107 @@
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2015 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/ExpressionTreeNode.h"
+#include "lepton/Exception.h"
+#include "lepton/Operation.h"
+
+using namespace LMP_Lepton;
+using namespace std;
+
+ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const vector<ExpressionTreeNode>& children) : operation(operation), children(children) {
+    if (operation->getNumArguments() != children.size())
+        throw Exception("wrong number of arguments to function: "+operation->getName());
+}
+
+ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child1, const ExpressionTreeNode& child2) : operation(operation) {
+    children.push_back(child1);
+    children.push_back(child2);
+    if (operation->getNumArguments() != children.size())
+        throw Exception("wrong number of arguments to function: "+operation->getName());
+}
+
+ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child) : operation(operation) {
+    children.push_back(child);
+    if (operation->getNumArguments() != children.size())
+        throw Exception("wrong number of arguments to function: "+operation->getName());
+}
+
+ExpressionTreeNode::ExpressionTreeNode(Operation* operation) : operation(operation) {
+    if (operation->getNumArguments() != children.size())
+        throw Exception("wrong number of arguments to function: "+operation->getName());
+}
+
+ExpressionTreeNode::ExpressionTreeNode(const ExpressionTreeNode& node) : operation(node.operation == NULL ? NULL : node.operation->clone()), children(node.getChildren()) {
+}
+
+ExpressionTreeNode::ExpressionTreeNode() : operation(NULL) {
+}
+
+ExpressionTreeNode::~ExpressionTreeNode() {
+    if (operation != NULL)
+        delete operation;
+}
+
+bool ExpressionTreeNode::operator!=(const ExpressionTreeNode& node) const {
+    if (node.getOperation() != getOperation())
+        return true;
+    if (getOperation().isSymmetric() && getChildren().size() == 2) {
+        if (getChildren()[0] == node.getChildren()[0] && getChildren()[1] == node.getChildren()[1])
+            return false;
+        if (getChildren()[0] == node.getChildren()[1] && getChildren()[1] == node.getChildren()[0])
+            return false;
+        return true;
+    }
+    for (int i = 0; i < (int) getChildren().size(); i++)
+        if (getChildren()[i] != node.getChildren()[i])
+            return true;
+    return false;
+}
+
+bool ExpressionTreeNode::operator==(const ExpressionTreeNode& node) const {
+    return !(*this != node);
+}
+
+ExpressionTreeNode& ExpressionTreeNode::operator=(const ExpressionTreeNode& node) {
+    if (operation != NULL)
+        delete operation;
+    operation = node.getOperation().clone();
+    children = node.getChildren();
+    return *this;
+}
+
+const Operation& ExpressionTreeNode::getOperation() const {
+    return *operation;
+}
+
+const vector<ExpressionTreeNode>& ExpressionTreeNode::getChildren() const {
+    return children;
+}
diff --git a/lib/lepton/src/MSVC_erfc.h b/lib/lepton/src/MSVC_erfc.h
new file mode 100644
index 0000000000..2c6b619e89
--- /dev/null
+++ b/lib/lepton/src/MSVC_erfc.h
@@ -0,0 +1,91 @@
+#ifndef LEPTON_MSVC_ERFC_H_
+#define LEPTON_MSVC_ERFC_H_
+
+/*
+ * Up to version 11 (VC++ 2012), Microsoft does not support the
+ * standard C99 erf() and erfc() functions so we have to fake them here. 
+ * These were added in version 12 (VC++ 2013), which sets _MSC_VER=1800
+ * (VC11 has _MSC_VER=1700).
+ */
+
+#if defined(_MSC_VER) || defined(__MINGW32__)
+#if !defined(M_PI)
+#define M_PI 3.14159265358979323846264338327950288
+#endif
+#endif
+
+#if defined(_MSC_VER)
+#if _MSC_VER <= 1700 // 1700 is VC11, 1800 is VC12 
+/***************************
+*   erf.cpp
+*   author:  Steve Strand
+*   written: 29-Jan-04
+***************************/
+
+#include <cmath>
+
+static const double rel_error= 1E-12;        //calculate 12 significant figures
+//you can adjust rel_error to trade off between accuracy and speed
+//but don't ask for > 15 figures (assuming usual 52 bit mantissa in a double)
+
+static double erfc(double x);
+
+static double erf(double x)
+//erf(x) = 2/sqrt(pi)*integral(exp(-t^2),t,0,x)
+//       = 2/sqrt(pi)*[x - x^3/3 + x^5/5*2! - x^7/7*3! + ...]
+//       = 1-erfc(x)
+{
+    static const double two_sqrtpi=  1.128379167095512574;        // 2/sqrt(pi)
+    if (fabs(x) > 2.2) {
+        return 1.0 - erfc(x);        //use continued fraction when fabs(x) > 2.2
+    }
+    double sum= x, term= x, xsqr= x*x;
+    int j= 1;
+    do {
+        term*= xsqr/j;
+        sum-= term/(2*j+1);
+        ++j;
+        term*= xsqr/j;
+        sum+= term/(2*j+1);
+        ++j;
+    } while (fabs(term)/sum > rel_error);
+    return two_sqrtpi*sum;
+}
+
+
+static double erfc(double x)
+//erfc(x) = 2/sqrt(pi)*integral(exp(-t^2),t,x,inf)
+//        = exp(-x^2)/sqrt(pi) * [1/x+ (1/2)/x+ (2/2)/x+ (3/2)/x+ (4/2)/x+ ...]
+//        = 1-erf(x)
+//expression inside [] is a continued fraction so '+' means add to denominator only
+{
+    static const double one_sqrtpi=  0.564189583547756287;        // 1/sqrt(pi)
+    if (fabs(x) < 2.2) {
+        return 1.0 - erf(x);        //use series when fabs(x) < 2.2
+    }
+    // Don't look for x==0 here!
+    if (x < 0) {               //continued fraction only valid for x>0
+        return 2.0 - erfc(-x);
+    }
+    double a=1, b=x;                //last two convergent numerators
+    double c=x, d=x*x+0.5;          //last two convergent denominators
+    double q1, q2= b/d;             //last two convergents (a/c and b/d)
+    double n= 1.0, t;
+    do {
+        t= a*n+b*x;
+        a= b;
+        b= t;
+        t= c*n+d*x;
+        c= d;
+        d= t;
+        n+= 0.5;
+        q1= q2;
+        q2= b/d;
+      } while (fabs(q1-q2)/q2 > rel_error);
+    return one_sqrtpi*exp(-x*x)*q2;
+}
+
+#endif // _MSC_VER <= 1700
+#endif // _MSC_VER
+
+#endif // LEPTON_MSVC_ERFC_H_
diff --git a/lib/lepton/src/Operation.cpp b/lib/lepton/src/Operation.cpp
new file mode 100644
index 0000000000..512f5db321
--- /dev/null
+++ b/lib/lepton/src/Operation.cpp
@@ -0,0 +1,345 @@
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/Operation.h"
+#include "lepton/ExpressionTreeNode.h"
+#include "MSVC_erfc.h"
+
+using namespace LMP_Lepton;
+using namespace std;
+
+double Operation::Erf::evaluate(double* args, const map<string, double>& variables) const {
+    return erf(args[0]);
+}
+
+double Operation::Erfc::evaluate(double* args, const map<string, double>& variables) const {
+    return erfc(args[0]);
+}
+
+ExpressionTreeNode Operation::Constant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Constant(0.0));
+}
+
+ExpressionTreeNode Operation::Variable::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    if (variable == name)
+        return ExpressionTreeNode(new Operation::Constant(1.0));
+    return ExpressionTreeNode(new Operation::Constant(0.0));
+}
+
+ExpressionTreeNode Operation::Custom::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    if (function->getNumArguments() == 0)
+        return ExpressionTreeNode(new Operation::Constant(0.0));
+    ExpressionTreeNode result = ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::Custom(*this, 0), children), childDerivs[0]);
+    for (int i = 1; i < getNumArguments(); i++) {
+        result = ExpressionTreeNode(new Operation::Add(),
+                                    result,
+                                    ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::Custom(*this, i), children), childDerivs[i]));
+    }
+    return result;
+}
+
+ExpressionTreeNode Operation::Add::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Add(), childDerivs[0], childDerivs[1]);
+}
+
+ExpressionTreeNode Operation::Subtract::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Subtract(), childDerivs[0], childDerivs[1]);
+}
+
+ExpressionTreeNode Operation::Multiply::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Add(),
+                              ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1]),
+                              ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]));
+}
+
+ExpressionTreeNode Operation::Divide::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Divide(),
+                              ExpressionTreeNode(new Operation::Subtract(),
+                                                 ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]),
+                                                 ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1])),
+                              ExpressionTreeNode(new Operation::Square(), children[1]));
+}
+
+ExpressionTreeNode Operation::Power::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Add(),
+                              ExpressionTreeNode(new Operation::Multiply(),
+                                                 ExpressionTreeNode(new Operation::Multiply(),
+                                                                    children[1],
+                                                                    ExpressionTreeNode(new Operation::Power(),
+                                                                                       children[0], ExpressionTreeNode(new Operation::AddConstant(-1.0), children[1]))),
+                                                 childDerivs[0]),
+                              ExpressionTreeNode(new Operation::Multiply(),
+                                                 ExpressionTreeNode(new Operation::Multiply(),
+                                                                    ExpressionTreeNode(new Operation::Log(), children[0]),
+                                                                    ExpressionTreeNode(new Operation::Power(), children[0], children[1])),
+                                                 childDerivs[1]));
+}
+
+ExpressionTreeNode Operation::Negate::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Negate(), childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Sqrt::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::MultiplyConstant(0.5),
+                                                 ExpressionTreeNode(new Operation::Reciprocal(),
+                                                                    ExpressionTreeNode(new Operation::Sqrt(), children[0]))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Exp::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Exp(), children[0]),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Log::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Reciprocal(), children[0]),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Sin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Cos(), children[0]),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Cos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Negate(),
+                                                 ExpressionTreeNode(new Operation::Sin(), children[0])),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Sec::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Multiply(),
+                                                 ExpressionTreeNode(new Operation::Sec(), children[0]),
+                                                 ExpressionTreeNode(new Operation::Tan(), children[0])),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Csc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Negate(),
+                                                 ExpressionTreeNode(new Operation::Multiply(),
+                                                                    ExpressionTreeNode(new Operation::Csc(), children[0]),
+                                                                    ExpressionTreeNode(new Operation::Cot(), children[0]))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Tan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Square(),
+                                                 ExpressionTreeNode(new Operation::Sec(), children[0])),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Cot::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Negate(),
+                                                 ExpressionTreeNode(new Operation::Square(),
+                                                                    ExpressionTreeNode(new Operation::Csc(), children[0]))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Asin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Reciprocal(),
+                                                 ExpressionTreeNode(new Operation::Sqrt(),
+                                                                    ExpressionTreeNode(new Operation::Subtract(),
+                                                                                       ExpressionTreeNode(new Operation::Constant(1.0)),
+                                                                                       ExpressionTreeNode(new Operation::Square(), children[0])))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Acos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Negate(),
+                                                 ExpressionTreeNode(new Operation::Reciprocal(),
+                                                                    ExpressionTreeNode(new Operation::Sqrt(),
+                                                                                       ExpressionTreeNode(new Operation::Subtract(),
+                                                                                                          ExpressionTreeNode(new Operation::Constant(1.0)),
+                                                                                                          ExpressionTreeNode(new Operation::Square(), children[0]))))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Atan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Reciprocal(),
+                                                 ExpressionTreeNode(new Operation::AddConstant(1.0),
+                                                                    ExpressionTreeNode(new Operation::Square(), children[0]))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Atan2::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Divide(),
+                              ExpressionTreeNode(new Operation::Subtract(),
+                                                 ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]),
+                                                 ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1])),
+                              ExpressionTreeNode(new Operation::Add(),
+                                                 ExpressionTreeNode(new Operation::Square(), children[0]),
+                                                 ExpressionTreeNode(new Operation::Square(), children[1])));
+}
+
+ExpressionTreeNode Operation::Sinh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Cosh(),
+                                                 children[0]),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Cosh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Sinh(),
+                                                 children[0]),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Tanh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Subtract(),
+                                                 ExpressionTreeNode(new Operation::Constant(1.0)),
+                                                 ExpressionTreeNode(new Operation::Square(),
+                                                                    ExpressionTreeNode(new Operation::Tanh(), children[0]))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Erf::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Multiply(),
+                                                 ExpressionTreeNode(new Operation::Constant(2.0/sqrt(M_PI))),
+                                                 ExpressionTreeNode(new Operation::Exp(),
+                                                                    ExpressionTreeNode(new Operation::Negate(),
+                                                                                       ExpressionTreeNode(new Operation::Square(), children[0])))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Erfc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Multiply(),
+                                                 ExpressionTreeNode(new Operation::Constant(-2.0/sqrt(M_PI))),
+                                                 ExpressionTreeNode(new Operation::Exp(),
+                                                                    ExpressionTreeNode(new Operation::Negate(),
+                                                                                       ExpressionTreeNode(new Operation::Square(), children[0])))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Step::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Constant(0.0));
+}
+
+ExpressionTreeNode Operation::Delta::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Constant(0.0));
+}
+
+ExpressionTreeNode Operation::Square::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::MultiplyConstant(2.0),
+                                                 children[0]),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Cube::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::MultiplyConstant(3.0),
+                                                 ExpressionTreeNode(new Operation::Square(), children[0])),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Reciprocal::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Negate(),
+                                                 ExpressionTreeNode(new Operation::Reciprocal(),
+                                                                    ExpressionTreeNode(new Operation::Square(), children[0]))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::AddConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return childDerivs[0];
+}
+
+ExpressionTreeNode Operation::MultiplyConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::MultiplyConstant(value),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::PowerConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::MultiplyConstant(value),
+                                                 ExpressionTreeNode(new Operation::PowerConstant(value-1),
+                                                                    children[0])),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Min::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    ExpressionTreeNode step(new Operation::Step(),
+                            ExpressionTreeNode(new Operation::Subtract(), children[0], children[1]));
+    return ExpressionTreeNode(new Operation::Subtract(),
+                              ExpressionTreeNode(new Operation::Multiply(), childDerivs[1], step),
+                              ExpressionTreeNode(new Operation::Multiply(), childDerivs[0],
+                                                 ExpressionTreeNode(new Operation::AddConstant(-1), step)));
+}
+
+ExpressionTreeNode Operation::Max::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    ExpressionTreeNode step(new Operation::Step(),
+                            ExpressionTreeNode(new Operation::Subtract(), children[0], children[1]));
+    return ExpressionTreeNode(new Operation::Subtract(),
+                              ExpressionTreeNode(new Operation::Multiply(), childDerivs[0], step),
+                              ExpressionTreeNode(new Operation::Multiply(), childDerivs[1],
+                                                 ExpressionTreeNode(new Operation::AddConstant(-1), step)));
+}
+
+ExpressionTreeNode Operation::Abs::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    ExpressionTreeNode step(new Operation::Step(), children[0]);
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              childDerivs[0],
+                              ExpressionTreeNode(new Operation::AddConstant(-1),
+                                                 ExpressionTreeNode(new Operation::MultiplyConstant(2), step)));
+}
+
+ExpressionTreeNode Operation::Floor::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Constant(0.0));
+}
+
+ExpressionTreeNode Operation::Ceil::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Constant(0.0));
+}
+
+ExpressionTreeNode Operation::Select::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    vector<ExpressionTreeNode> derivChildren;
+    derivChildren.push_back(children[0]);
+    derivChildren.push_back(childDerivs[1]);
+    derivChildren.push_back(childDerivs[2]);
+    return ExpressionTreeNode(new Operation::Select(), derivChildren);
+}
diff --git a/lib/lepton/src/ParsedExpression.cpp b/lib/lepton/src/ParsedExpression.cpp
new file mode 100644
index 0000000000..13ebbf2dc2
--- /dev/null
+++ b/lib/lepton/src/ParsedExpression.cpp
@@ -0,0 +1,379 @@
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/ParsedExpression.h"
+#include "lepton/CompiledExpression.h"
+#include "lepton/ExpressionProgram.h"
+#include "lepton/Operation.h"
+#include <limits>
+#include <vector>
+
+using namespace LMP_Lepton;
+using namespace std;
+
+ParsedExpression::ParsedExpression() : rootNode(ExpressionTreeNode()) {
+}
+
+ParsedExpression::ParsedExpression(const ExpressionTreeNode& rootNode) : rootNode(rootNode) {
+}
+
+const ExpressionTreeNode& ParsedExpression::getRootNode() const {
+    if (&rootNode.getOperation() == NULL)
+        throw Exception("Illegal call to an initialized ParsedExpression");
+    return rootNode;
+}
+
+double ParsedExpression::evaluate() const {
+    return evaluate(getRootNode(), map<string, double>());
+}
+
+double ParsedExpression::evaluate(const map<string, double>& variables) const {
+    return evaluate(getRootNode(), variables);
+}
+
+double ParsedExpression::evaluate(const ExpressionTreeNode& node, const map<string, double>& variables) {
+    int numArgs = (int) node.getChildren().size();
+    vector<double> args(max(numArgs, 1));
+    for (int i = 0; i < numArgs; i++)
+        args[i] = evaluate(node.getChildren()[i], variables);
+    return node.getOperation().evaluate(&args[0], variables);
+}
+
+ParsedExpression ParsedExpression::optimize() const {
+    ExpressionTreeNode result = precalculateConstantSubexpressions(getRootNode());
+    while (true) {
+        ExpressionTreeNode simplified = substituteSimplerExpression(result);
+        if (simplified == result)
+            break;
+        result = simplified;
+    }
+    return ParsedExpression(result);
+}
+
+ParsedExpression ParsedExpression::optimize(const map<string, double>& variables) const {
+    ExpressionTreeNode result = preevaluateVariables(getRootNode(), variables);
+    result = precalculateConstantSubexpressions(result);
+    while (true) {
+        ExpressionTreeNode simplified = substituteSimplerExpression(result);
+        if (simplified == result)
+            break;
+        result = simplified;
+    }
+    return ParsedExpression(result);
+}
+
+ExpressionTreeNode ParsedExpression::preevaluateVariables(const ExpressionTreeNode& node, const map<string, double>& variables) {
+    if (node.getOperation().getId() == Operation::VARIABLE) {
+        const Operation::Variable& var = dynamic_cast<const Operation::Variable&>(node.getOperation());
+        map<string, double>::const_iterator iter = variables.find(var.getName());
+        if (iter == variables.end())
+            return node;
+        return ExpressionTreeNode(new Operation::Constant(iter->second));
+    }
+    vector<ExpressionTreeNode> children(node.getChildren().size());
+    for (int i = 0; i < (int) children.size(); i++)
+        children[i] = preevaluateVariables(node.getChildren()[i], variables);
+    return ExpressionTreeNode(node.getOperation().clone(), children);
+}
+
+ExpressionTreeNode ParsedExpression::precalculateConstantSubexpressions(const ExpressionTreeNode& node) {
+    vector<ExpressionTreeNode> children(node.getChildren().size());
+    for (int i = 0; i < (int) children.size(); i++)
+        children[i] = precalculateConstantSubexpressions(node.getChildren()[i]);
+    ExpressionTreeNode result = ExpressionTreeNode(node.getOperation().clone(), children);
+    if (node.getOperation().getId() == Operation::VARIABLE || node.getOperation().getId() == Operation::CUSTOM)
+        return result;
+    for (int i = 0; i < (int) children.size(); i++)
+        if (children[i].getOperation().getId() != Operation::CONSTANT)
+            return result;
+    return ExpressionTreeNode(new Operation::Constant(evaluate(result, map<string, double>())));
+}
+
+ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const ExpressionTreeNode& node) {
+    vector<ExpressionTreeNode> children(node.getChildren().size());
+    for (int i = 0; i < (int) children.size(); i++)
+        children[i] = substituteSimplerExpression(node.getChildren()[i]);
+    
+    // Collect some info on constant expressions in children
+    bool first_const = children.size() > 0 && isConstant(children[0]); // is first child constant?
+    bool second_const = children.size() > 1 && isConstant(children[1]); ; // is second child constant?   
+    double first, second; // if yes, value of first and second child
+    if (first_const)
+        first = getConstantValue(children[0]);
+    if (second_const)
+        second = getConstantValue(children[1]);
+
+    switch (node.getOperation().getId()) {
+        case Operation::ADD:
+        {
+            if (first_const) {
+                if (first == 0.0) { // Add 0
+                    return children[1];
+                } else { // Add a constant
+                    return ExpressionTreeNode(new Operation::AddConstant(first), children[1]);
+                }
+            }
+            if (second_const) {
+                if (second == 0.0) { // Add 0
+                    return children[0];
+                } else { // Add a constant
+                    return ExpressionTreeNode(new Operation::AddConstant(second), children[0]);
+                }
+            }
+            if (children[1].getOperation().getId() == Operation::NEGATE) // a+(-b) = a-b
+                return ExpressionTreeNode(new Operation::Subtract(), children[0], children[1].getChildren()[0]);
+            if (children[0].getOperation().getId() == Operation::NEGATE) // (-a)+b = b-a
+                return ExpressionTreeNode(new Operation::Subtract(), children[1], children[0].getChildren()[0]);
+            break;
+        }
+        case Operation::SUBTRACT:
+        {
+            if (children[0] == children[1])
+                return ExpressionTreeNode(new Operation::Constant(0.0)); // Subtracting anything from itself is 0
+            if (first_const) {
+                if (first == 0.0) // Subtract from 0
+                    return ExpressionTreeNode(new Operation::Negate(), children[1]);
+            }
+            if (second_const) {
+                if (second == 0.0) { // Subtract 0
+                    return children[0];
+                } else { // Subtract a constant
+                    return ExpressionTreeNode(new Operation::AddConstant(-second), children[0]);
+                }
+            }
+            if (children[1].getOperation().getId() == Operation::NEGATE) // a-(-b) = a+b
+                return ExpressionTreeNode(new Operation::Add(), children[0], children[1].getChildren()[0]);
+            break;
+        }
+        case Operation::MULTIPLY:
+        {   
+            if ((first_const && first == 0.0) || (second_const && second == 0.0)) // Multiply by 0
+                return ExpressionTreeNode(new Operation::Constant(0.0));
+            if (first_const && first == 1.0) // Multiply by 1
+                return children[1];
+            if (second_const && second == 1.0) // Multiply by 1
+                return children[0];
+            if (first_const) { // Multiply by a constant
+                if (children[1].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine two multiplies into a single one
+                    return ExpressionTreeNode(new Operation::MultiplyConstant(first*dynamic_cast<const Operation::MultiplyConstant*>(&children[1].getOperation())->getValue()), children[1].getChildren()[0]);
+                return ExpressionTreeNode(new Operation::MultiplyConstant(first), children[1]);
+            }
+            if (second_const) { // Multiply by a constant
+                if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine two multiplies into a single one
+                    return ExpressionTreeNode(new Operation::MultiplyConstant(second*dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]);
+                return ExpressionTreeNode(new Operation::MultiplyConstant(second), children[0]);
+            }
+            if (children[0].getOperation().getId() == Operation::NEGATE && children[1].getOperation().getId() == Operation::NEGATE) // The two negations cancel
+                return ExpressionTreeNode(new Operation::Multiply(), children[0].getChildren()[0], children[1].getChildren()[0]);
+            if (children[0].getOperation().getId() == Operation::NEGATE && children[1].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Negate the constant
+                return ExpressionTreeNode(new Operation::Multiply(), children[0].getChildren()[0], ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[1].getOperation())->getValue()), children[1].getChildren()[0]));
+            if (children[1].getOperation().getId() == Operation::NEGATE && children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Negate the constant
+                return ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]), children[1].getChildren()[0]);
+            if (children[0].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further
+                return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Multiply(), children[0].getChildren()[0], children[1]));
+            if (children[1].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further
+                return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Multiply(), children[0], children[1].getChildren()[0]));
+            if (children[1].getOperation().getId() == Operation::RECIPROCAL) // a*(1/b) = a/b
+                return ExpressionTreeNode(new Operation::Divide(), children[0], children[1].getChildren()[0]);
+            if (children[0].getOperation().getId() == Operation::RECIPROCAL) // (1/a)*b = b/a
+                return ExpressionTreeNode(new Operation::Divide(), children[1], children[0].getChildren()[0]);
+            if (children[0] == children[1])
+                return ExpressionTreeNode(new Operation::Square(), children[0]); // x*x = square(x)
+            if (children[0].getOperation().getId() == Operation::SQUARE && children[0].getChildren()[0] == children[1])
+                return ExpressionTreeNode(new Operation::Cube(), children[1]); // x*x*x = cube(x)
+            if (children[1].getOperation().getId() == Operation::SQUARE && children[1].getChildren()[0] == children[0])
+                return ExpressionTreeNode(new Operation::Cube(), children[0]); // x*x*x = cube(x)
+            break;
+        }
+        case Operation::DIVIDE:
+        {
+            if (children[0] == children[1])
+                return ExpressionTreeNode(new Operation::Constant(1.0)); // Dividing anything from itself is 0
+            if (first_const && first == 0.0) // 0 divided by something
+                return ExpressionTreeNode(new Operation::Constant(0.0));
+            if (first_const && first == 1.0) // 1 divided by something
+                return ExpressionTreeNode(new Operation::Reciprocal(), children[1]);
+            if (second_const && second == 1.0) // Divide by 1
+                return children[0];
+            if (second_const) {
+                if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine a multiply and a divide into one multiply
+                    return ExpressionTreeNode(new Operation::MultiplyConstant(dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()/second), children[0].getChildren()[0]);
+                return ExpressionTreeNode(new Operation::MultiplyConstant(1.0/second), children[0]); // Replace a divide with a multiply
+            }
+            if (children[0].getOperation().getId() == Operation::NEGATE && children[1].getOperation().getId() == Operation::NEGATE) // The two negations cancel
+                return ExpressionTreeNode(new Operation::Divide(), children[0].getChildren()[0], children[1].getChildren()[0]);
+            if (children[1].getOperation().getId() == Operation::NEGATE && children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Negate the constant
+                return ExpressionTreeNode(new Operation::Divide(), ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]), children[1].getChildren()[0]);
+            if (children[0].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further
+                return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Divide(), children[0].getChildren()[0], children[1]));
+            if (children[1].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further
+                return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Divide(), children[0], children[1].getChildren()[0]));
+            if (children[1].getOperation().getId() == Operation::RECIPROCAL) // a/(1/b) = a*b
+                return ExpressionTreeNode(new Operation::Multiply(), children[0], children[1].getChildren()[0]);
+            break;
+        }
+        case Operation::POWER:
+        {
+            if (first_const && first == 0.0) // 0 to any power is 0
+                return ExpressionTreeNode(new Operation::Constant(0.0));
+            if (first_const && first == 1.0) // 1 to any power is 1
+                return ExpressionTreeNode(new Operation::Constant(1.0));
+            if (second_const) { // Constant exponent
+                if (second == 0.0) // x^0 = 1
+                    return ExpressionTreeNode(new Operation::Constant(1.0));
+                if (second == 1.0) // x^1 = x
+                    return children[0];
+                if (second == -1.0) // x^-1 = recip(x)
+                    return ExpressionTreeNode(new Operation::Reciprocal(), children[0]);
+                if (second == 2.0) // x^2 = square(x)
+                    return ExpressionTreeNode(new Operation::Square(), children[0]);
+                if (second == 3.0) // x^3 = cube(x)
+                    return ExpressionTreeNode(new Operation::Cube(), children[0]);
+                if (second == 0.5) // x^0.5 = sqrt(x)
+                    return ExpressionTreeNode(new Operation::Sqrt(), children[0]);
+                // Constant power
+                return ExpressionTreeNode(new Operation::PowerConstant(second), children[0]);
+            }
+            break;
+        }
+        case Operation::NEGATE:
+        {
+            if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine a multiply and a negate into a single multiply
+                return ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]);
+            if (first_const) // Negate a constant
+                return ExpressionTreeNode(new Operation::Constant(-first));
+            if (children[0].getOperation().getId() == Operation::NEGATE) // The two negations cancel
+                return children[0].getChildren()[0];
+            break;
+        }
+        case Operation::MULTIPLY_CONSTANT:
+        {
+            if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine two multiplies into a single one
+                return ExpressionTreeNode(new Operation::MultiplyConstant(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()*dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]);
+            if (first_const) // Multiply two constants
+                return ExpressionTreeNode(new Operation::Constant(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()*getConstantValue(children[0])));
+            if (children[0].getOperation().getId() == Operation::NEGATE) // Combine a multiply and a negate into a single multiply
+                return ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()), children[0].getChildren()[0]);
+            break;
+        }
+        case Operation::SQRT:
+        {
+            if (children[0].getOperation().getId() == Operation::SQUARE) // sqrt(square(x)) = abs(x)
+                return ExpressionTreeNode(new Operation::Abs(), children[0].getChildren()[0]);
+        }
+        case Operation::SQUARE:
+        {
+            if (children[0].getOperation().getId() == Operation::SQRT) // square(sqrt(x)) = x
+                return children[0].getChildren()[0];
+        }
+        default:
+        {
+            // If operation ID is not one of the above,
+            // we don't substitute a simpler expression.
+            break;
+        }
+
+    }
+    return ExpressionTreeNode(node.getOperation().clone(), children);
+}
+
+ParsedExpression ParsedExpression::differentiate(const string& variable) const {
+    return differentiate(getRootNode(), variable);
+}
+
+ExpressionTreeNode ParsedExpression::differentiate(const ExpressionTreeNode& node, const string& variable) {
+    vector<ExpressionTreeNode> childDerivs(node.getChildren().size());
+    for (int i = 0; i < (int) childDerivs.size(); i++)
+        childDerivs[i] = differentiate(node.getChildren()[i], variable);
+    return node.getOperation().differentiate(node.getChildren(),childDerivs, variable);
+}
+
+bool ParsedExpression::isConstant(const ExpressionTreeNode& node) {
+    return (node.getOperation().getId() == Operation::CONSTANT);
+}
+
+double ParsedExpression::getConstantValue(const ExpressionTreeNode& node) {
+    if (node.getOperation().getId() != Operation::CONSTANT) {
+        throw Exception("getConstantValue called on a non-constant ExpressionNode");
+    }
+    return dynamic_cast<const Operation::Constant&>(node.getOperation()).getValue();
+}
+
+ExpressionProgram ParsedExpression::createProgram() const {
+    return ExpressionProgram(*this);
+}
+
+CompiledExpression ParsedExpression::createCompiledExpression() const {
+    return CompiledExpression(*this);
+}
+
+ParsedExpression ParsedExpression::renameVariables(const map<string, string>& replacements) const {
+    return ParsedExpression(renameNodeVariables(getRootNode(), replacements));
+}
+
+ExpressionTreeNode ParsedExpression::renameNodeVariables(const ExpressionTreeNode& node, const map<string, string>& replacements) {
+    if (node.getOperation().getId() == Operation::VARIABLE) {
+        map<string, string>::const_iterator replace = replacements.find(node.getOperation().getName());
+        if (replace != replacements.end())
+            return ExpressionTreeNode(new Operation::Variable(replace->second));
+    }
+    vector<ExpressionTreeNode> children;
+    for (int i = 0; i < (int) node.getChildren().size(); i++)
+        children.push_back(renameNodeVariables(node.getChildren()[i], replacements));
+    return ExpressionTreeNode(node.getOperation().clone(), children);
+}
+
+ostream& LMP_Lepton::operator<<(ostream& out, const ExpressionTreeNode& node) {
+    if (node.getOperation().isInfixOperator() && node.getChildren().size() == 2) {
+        out << "(" << node.getChildren()[0] << ")" << node.getOperation().getName() << "(" << node.getChildren()[1] << ")";
+    }
+    else if (node.getOperation().isInfixOperator() && node.getChildren().size() == 1) {
+        out << "(" << node.getChildren()[0] << ")" << node.getOperation().getName();
+    }
+    else {
+        out << node.getOperation().getName();
+        if (node.getChildren().size() > 0) {
+            out << "(";
+            for (int i = 0; i < (int) node.getChildren().size(); i++) {
+                if (i > 0)
+                    out << ", ";
+                out << node.getChildren()[i];
+            }
+            out << ")";
+        }
+    }
+    return out;
+}
+
+ostream& LMP_Lepton::operator<<(ostream& out, const ParsedExpression& exp) {
+    out << exp.getRootNode();
+    return out;
+}
diff --git a/lib/lepton/src/Parser.cpp b/lib/lepton/src/Parser.cpp
new file mode 100644
index 0000000000..c0b4c185e8
--- /dev/null
+++ b/lib/lepton/src/Parser.cpp
@@ -0,0 +1,409 @@
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/Parser.h"
+#include "lepton/CustomFunction.h"
+#include "lepton/Exception.h"
+#include "lepton/ExpressionTreeNode.h"
+#include "lepton/Operation.h"
+#include "lepton/ParsedExpression.h"
+#include <cctype>
+#include <iostream>
+
+using namespace LMP_Lepton;
+using namespace std;
+
+static const string Digits = "0123456789";
+static const string Operators = "+-*/^";
+static const bool LeftAssociative[] = {true, true, true, true, false};
+static const int Precedence[] = {0, 0, 1, 1, 3};
+static const Operation::Id OperationId[] = {Operation::ADD, Operation::SUBTRACT, Operation::MULTIPLY, Operation::DIVIDE, Operation::POWER};
+
+class LMP_Lepton::ParseToken {
+public:
+    enum Type {Number, Operator, Variable, Function, LeftParen, RightParen, Comma, Whitespace};
+
+    ParseToken(string text, Type type) : text(text), type(type) {
+    }
+    const string& getText() const {
+        return text;
+    }
+    Type getType() const {
+        return type;
+    }
+private:
+    string text;
+    Type type;
+};
+
+string Parser::trim(const string& expression) {
+    // Remove leading and trailing spaces.
+    
+    int start, end;
+    for (start = 0; start < (int) expression.size() && isspace(expression[start]); start++)
+        ;
+    for (end = (int) expression.size()-1; end > start && isspace(expression[end]); end--)
+        ;
+    if (start == end && isspace(expression[end]))
+        return "";
+    return expression.substr(start, end-start+1);
+}
+
+ParseToken Parser::getNextToken(const string& expression, int start) {
+    char c = expression[start];
+    if (c == '(')
+        return ParseToken("(", ParseToken::LeftParen);
+    if (c == ')')
+        return ParseToken(")", ParseToken::RightParen);
+    if (c == ',')
+        return ParseToken(",", ParseToken::Comma);
+    if (Operators.find(c) != string::npos)
+        return ParseToken(string(1, c), ParseToken::Operator);
+    if (isspace(c)) {
+        // White space
+
+        for (int pos = start+1; pos < (int) expression.size(); pos++) {
+            if (!isspace(expression[pos]))
+                return ParseToken(expression.substr(start, pos-start), ParseToken::Whitespace);
+        }
+        return ParseToken(expression.substr(start, string::npos), ParseToken::Whitespace);
+    }
+    if (c == '.' || Digits.find(c) != string::npos) {
+        // A number
+
+        bool foundDecimal = (c == '.');
+        bool foundExp = false;
+        int pos;
+        for (pos = start+1; pos < (int) expression.size(); pos++) {
+            c = expression[pos];
+            if (Digits.find(c) != string::npos)
+                continue;
+            if (c == '.' && !foundDecimal) {
+                foundDecimal = true;
+                continue;
+            }
+            if ((c == 'e' || c == 'E') && !foundExp) {
+                foundExp = true;
+                if (pos < (int) expression.size()-1 && (expression[pos+1] == '-' || expression[pos+1] == '+'))
+                    pos++;
+                continue;
+            }
+            break;
+        }
+        return ParseToken(expression.substr(start, pos-start), ParseToken::Number);
+    }
+
+    // A variable, function, or left parenthesis
+
+    for (int pos = start; pos < (int) expression.size(); pos++) {
+        c = expression[pos];
+        if (c == '(')
+            return ParseToken(expression.substr(start, pos-start+1), ParseToken::Function);
+        if (Operators.find(c) != string::npos || c == ',' || c == ')' || isspace(c))
+            return ParseToken(expression.substr(start, pos-start), ParseToken::Variable);
+    }
+    return ParseToken(expression.substr(start, string::npos), ParseToken::Variable);
+}
+
+vector<ParseToken> Parser::tokenize(const string& expression) {
+    vector<ParseToken> tokens;
+    int pos = 0;
+    while (pos < (int) expression.size()) {
+        ParseToken token = getNextToken(expression, pos);
+        if (token.getType() != ParseToken::Whitespace)
+            tokens.push_back(token);
+        pos += (int) token.getText().size();
+    }
+    return tokens;
+}
+
+ParsedExpression Parser::parse(const string& expression) {
+    return parse(expression, map<string, CustomFunction*>());
+}
+
+ParsedExpression Parser::parse(const string& expression, const map<string, CustomFunction*>& customFunctions) {
+    try {
+        // First split the expression into subexpressions.
+
+        string primaryExpression = expression;
+        vector<string> subexpressions;
+        while (true) {
+            string::size_type pos = primaryExpression.find_last_of(';');
+            if (pos == string::npos)
+                break;
+            string sub = trim(primaryExpression.substr(pos+1));
+            if (sub.size() > 0)
+                subexpressions.push_back(sub);
+            primaryExpression = primaryExpression.substr(0, pos);
+        }
+
+        // Parse the subexpressions.
+
+        map<string, ExpressionTreeNode> subexpDefs;
+        for (int i = 0; i < (int) subexpressions.size(); i++) {
+            string::size_type equalsPos = subexpressions[i].find('=');
+            if (equalsPos == string::npos)
+                throw Exception("subexpression does not specify a name");
+            string name = trim(subexpressions[i].substr(0, equalsPos));
+            if (name.size() == 0)
+                throw Exception("subexpression does not specify a name");
+            vector<ParseToken> tokens = tokenize(subexpressions[i].substr(equalsPos+1));
+            int pos = 0;
+            subexpDefs[name] = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
+            if (pos != tokens.size())
+                throw Exception("unexpected text at end of subexpression: "+tokens[pos].getText());
+        }
+
+        // Now parse the primary expression.
+
+        vector<ParseToken> tokens = tokenize(primaryExpression);
+        int pos = 0;
+        ExpressionTreeNode result = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
+        if (pos != tokens.size())
+            throw Exception("unexpected text at end of expression: "+tokens[pos].getText());
+        return ParsedExpression(result);
+    }
+    catch (Exception& ex) {
+        throw Exception("Parse error in expression \""+expression+"\": "+ex.what());
+    }
+}
+
+ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int& pos, const map<string, CustomFunction*>& customFunctions,
+            const map<string, ExpressionTreeNode>& subexpressionDefs, int precedence) {
+    if (pos == tokens.size())
+        throw Exception("unexpected end of expression");
+
+    // Parse the next value (number, variable, function, parenthesized expression)
+
+    ParseToken token = tokens[pos];
+    ExpressionTreeNode result;
+    if (token.getType() == ParseToken::Number) {
+        double value;
+        stringstream(token.getText()) >> value;
+        result = ExpressionTreeNode(new Operation::Constant(value));
+        pos++;
+    }
+    else if (token.getType() == ParseToken::Variable) {
+        map<string, ExpressionTreeNode>::const_iterator subexp = subexpressionDefs.find(token.getText());
+        if (subexp == subexpressionDefs.end()) {
+            Operation* op = new Operation::Variable(token.getText());
+            result = ExpressionTreeNode(op);
+        }
+        else
+            result = subexp->second;
+        pos++;
+    }
+    else if (token.getType() == ParseToken::LeftParen) {
+        pos++;
+        result = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0);
+        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
+            throw Exception("unbalanced parentheses");
+        pos++;
+    }
+    else if (token.getType() == ParseToken::Function) {
+        pos++;
+        vector<ExpressionTreeNode> args;
+        bool moreArgs;
+        do {
+            args.push_back(parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0));
+            moreArgs = (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Comma);
+            if (moreArgs)
+                pos++;
+        } while (moreArgs);
+        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
+            throw Exception("unbalanced parentheses");
+        pos++;
+        Operation* op = getFunctionOperation(token.getText(), customFunctions);
+        try {
+            result = ExpressionTreeNode(op, args);
+        }
+        catch (...) {
+            delete op;
+            throw;
+        }
+    }
+    else if (token.getType() == ParseToken::Operator && token.getText() == "-") {
+        pos++;
+        ExpressionTreeNode toNegate = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 2);
+        result = ExpressionTreeNode(new Operation::Negate(), toNegate);
+    }
+    else
+        throw Exception("unexpected token: "+token.getText());
+
+    // Now deal with the next binary operator.
+
+    while (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Operator) {
+        token = tokens[pos];
+        int opIndex = (int) Operators.find(token.getText());
+        int opPrecedence = Precedence[opIndex];
+        if (opPrecedence < precedence)
+            return result;
+        pos++;
+        ExpressionTreeNode arg = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, LeftAssociative[opIndex] ? opPrecedence+1 : opPrecedence);
+        Operation* op = getOperatorOperation(token.getText());
+        try {
+            result = ExpressionTreeNode(op, result, arg);
+        }
+        catch (...) {
+            delete op;
+            throw;
+        }
+    }
+    return result;
+}
+
+Operation* Parser::getOperatorOperation(const std::string& name) {
+    switch (OperationId[Operators.find(name)]) {
+        case Operation::ADD:
+            return new Operation::Add();
+        case Operation::SUBTRACT:
+            return new Operation::Subtract();
+        case Operation::MULTIPLY:
+            return new Operation::Multiply();
+        case Operation::DIVIDE:
+            return new Operation::Divide();
+        case Operation::POWER:
+            return new Operation::Power();
+        default:
+            throw Exception("unknown operator");
+    }
+}
+
+Operation* Parser::getFunctionOperation(const std::string& name, const map<string, CustomFunction*>& customFunctions) {
+
+    static map<string, Operation::Id> opMap;
+    if (opMap.size() == 0) {
+        opMap["sqrt"] = Operation::SQRT;
+        opMap["exp"] = Operation::EXP;
+        opMap["log"] = Operation::LOG;
+        opMap["sin"] = Operation::SIN;
+        opMap["cos"] = Operation::COS;
+        opMap["sec"] = Operation::SEC;
+        opMap["csc"] = Operation::CSC;
+        opMap["tan"] = Operation::TAN;
+        opMap["cot"] = Operation::COT;
+        opMap["asin"] = Operation::ASIN;
+        opMap["acos"] = Operation::ACOS;
+        opMap["atan"] = Operation::ATAN;
+        opMap["atan2"] = Operation::ATAN2;
+        opMap["sinh"] = Operation::SINH;
+        opMap["cosh"] = Operation::COSH;
+        opMap["tanh"] = Operation::TANH;
+        opMap["erf"] = Operation::ERF;
+        opMap["erfc"] = Operation::ERFC;
+        opMap["step"] = Operation::STEP;
+        opMap["delta"] = Operation::DELTA;
+        opMap["square"] = Operation::SQUARE;
+        opMap["cube"] = Operation::CUBE;
+        opMap["recip"] = Operation::RECIPROCAL;
+        opMap["min"] = Operation::MIN;
+        opMap["max"] = Operation::MAX;
+        opMap["abs"] = Operation::ABS;
+        opMap["floor"] = Operation::FLOOR;
+        opMap["ceil"] = Operation::CEIL;
+        opMap["select"] = Operation::SELECT;
+    }
+    string trimmed = name.substr(0, name.size()-1);
+
+    // First check custom functions.
+
+    map<string, CustomFunction*>::const_iterator custom = customFunctions.find(trimmed);
+    if (custom != customFunctions.end())
+        return new Operation::Custom(trimmed, custom->second->clone());
+
+    // Now try standard functions.
+
+    map<string, Operation::Id>::const_iterator iter = opMap.find(trimmed);
+    if (iter == opMap.end())
+        throw Exception("unknown function: "+trimmed);
+    switch (iter->second) {
+        case Operation::SQRT:
+            return new Operation::Sqrt();
+        case Operation::EXP:
+            return new Operation::Exp();
+        case Operation::LOG:
+            return new Operation::Log();
+        case Operation::SIN:
+            return new Operation::Sin();
+        case Operation::COS:
+            return new Operation::Cos();
+        case Operation::SEC:
+            return new Operation::Sec();
+        case Operation::CSC:
+            return new Operation::Csc();
+        case Operation::TAN:
+            return new Operation::Tan();
+        case Operation::COT:
+            return new Operation::Cot();
+        case Operation::ASIN:
+            return new Operation::Asin();
+        case Operation::ACOS:
+            return new Operation::Acos();
+        case Operation::ATAN:
+            return new Operation::Atan();
+        case Operation::ATAN2:
+            return new Operation::Atan2();
+        case Operation::SINH:
+            return new Operation::Sinh();
+        case Operation::COSH:
+            return new Operation::Cosh();
+        case Operation::TANH:
+            return new Operation::Tanh();
+        case Operation::ERF:
+            return new Operation::Erf();
+        case Operation::ERFC:
+            return new Operation::Erfc();
+        case Operation::STEP:
+            return new Operation::Step();
+        case Operation::DELTA:
+            return new Operation::Delta();
+        case Operation::SQUARE:
+            return new Operation::Square();
+        case Operation::CUBE:
+            return new Operation::Cube();
+        case Operation::RECIPROCAL:
+            return new Operation::Reciprocal();
+        case Operation::MIN:
+            return new Operation::Min();
+        case Operation::MAX:
+            return new Operation::Max();
+        case Operation::ABS:
+            return new Operation::Abs();
+        case Operation::FLOOR:
+            return new Operation::Floor();
+        case Operation::CEIL:
+            return new Operation::Ceil();
+        case Operation::SELECT:
+            return new Operation::Select();
+        default:
+            throw Exception("unknown function");
+    }
+}

From c44e87d87a35116100713d6e4612af750cdb5cd9 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 21 Dec 2022 18:28:04 -0500
Subject: [PATCH 02/79] avoid name conflict with COLVARS package

---
 lib/lepton/Makefile.mpi    | 6 +-----
 lib/lepton/Makefile.serial | 6 +-----
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/lib/lepton/Makefile.mpi b/lib/lepton/Makefile.mpi
index 045ea61015..934188ba50 100644
--- a/lib/lepton/Makefile.mpi
+++ b/lib/lepton/Makefile.mpi
@@ -1,15 +1,11 @@
 EXTRAMAKE=Makefile.lammps.empty
 
 CC=mpicxx
-
-# -DH5_NO_DEPRECATED_SYMBOLS is required here to ensure we are using
-# the v1.8 API when HDF5 is configured to default to using the v1.6 API.
 CXXFLAGS=-D_DEFAULT_SOURCE -O2 -Wall -fPIC -std=c++11
 INC=-I include
 AR=ar
 ARFLAGS=rc
-# need to build two libraries to not break compatibility and to support Install.py
-LIB=liblepton.a
+LIB=liblmplepton.a
 SRC=src/CompiledExpression.cpp src/ExpressionProgram.cpp src/ExpressionTreeNode.cpp src/Operation.cpp src/ParsedExpression.cpp src/Parser.cpp
 OBJ=$(SRC:src/%.cpp=build/%.o)
 
diff --git a/lib/lepton/Makefile.serial b/lib/lepton/Makefile.serial
index 58151e49c2..23d9b3dd57 100644
--- a/lib/lepton/Makefile.serial
+++ b/lib/lepton/Makefile.serial
@@ -1,15 +1,11 @@
 EXTRAMAKE=Makefile.lammps.empty
 
 CC=g++
-
-# -DH5_NO_DEPRECATED_SYMBOLS is required here to ensure we are using
-# the v1.8 API when HDF5 is configured to default to using the v1.6 API.
 CXXFLAGS=-D_DEFAULT_SOURCE -O2 -Wall -fPIC -std=c++11
 INC=-I include
 AR=ar
 ARFLAGS=rc
-# need to build two libraries to not break compatibility and to support Install.py
-LIB=liblepton.a
+LIB=liblmplepton.a
 SRC=src/CompiledExpression.cpp src/ExpressionProgram.cpp src/ExpressionTreeNode.cpp src/Operation.cpp src/ParsedExpression.cpp src/Parser.cpp
 OBJ=$(SRC:src/%.cpp=build/%.o)
 

From 5f934e3eae9a53f51c4eb0b79f6476c437cefde6 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 21 Dec 2022 18:28:35 -0500
Subject: [PATCH 03/79] add LEPTON package build system support for CMake

---
 cmake/CMakeLists.txt                | 5 +++--
 cmake/Modules/Packages/LEPTON.cmake | 8 ++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)
 create mode 100644 cmake/Modules/Packages/LEPTON.cmake

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index ec142af426..bea9a31197 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -251,6 +251,7 @@ set(STANDARD_PACKAGES
   KSPACE
   LATBOLTZ
   LATTE
+  LEPTON
   MACHDYN
   MANIFOLD
   MANYBODY
@@ -266,7 +267,7 @@ set(STANDARD_PACKAGES
   ML-QUIP
   ML-RANN
   ML-SNAP
-  ML-POD  
+  ML-POD
   MOFFF
   MOLECULE
   MOLFILE
@@ -517,7 +518,7 @@ else()
 endif()
 
 foreach(PKG_WITH_INCL KSPACE PYTHON ML-IAP VORONOI COLVARS ML-HDNNP MDI MOLFILE NETCDF
-        PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM LATTE MSCG COMPRESS ML-PACE)
+        PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM LATTE MSCG COMPRESS ML-PACE LEPTON)
   if(PKG_${PKG_WITH_INCL})
     include(Packages/${PKG_WITH_INCL})
   endif()
diff --git a/cmake/Modules/Packages/LEPTON.cmake b/cmake/Modules/Packages/LEPTON.cmake
new file mode 100644
index 0000000000..05241de592
--- /dev/null
+++ b/cmake/Modules/Packages/LEPTON.cmake
@@ -0,0 +1,8 @@
+set(LEPTON_SOURCE_DIR ${LAMMPS_LIB_SOURCE_DIR}/lepton)
+
+file(GLOB LEPTON_SOURCES ${LEPTON_SOURCE_DIR}/src/[^.]*.cpp)
+add_library(lmplepton STATIC ${LEPTON_SOURCES})
+target_compile_definitions(lmplepton PRIVATE -DLEPTON_BUILDING_STATIC_LIBRARY)
+set_target_properties(lmplepton PROPERTIES OUTPUT_NAME lammps_lmplepton${LAMMPS_MACHINE})
+target_include_directories(lmplepton PUBLIC ${LEPTON_SOURCE_DIR}/include)
+target_link_libraries(lammps PRIVATE lmplepton)

From 76a84d7865d2724dc75781514d317632a9de7812 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 21 Dec 2022 18:28:57 -0500
Subject: [PATCH 04/79] add pair style lepton

---
 src/LEPTON/pair_lepton.cpp | 273 +++++++++++++++++++++++++++++++++++++
 src/LEPTON/pair_lepton.h   |  61 +++++++++
 2 files changed, 334 insertions(+)
 create mode 100644 src/LEPTON/pair_lepton.cpp
 create mode 100644 src/LEPTON/pair_lepton.h

diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
new file mode 100644
index 0000000000..7ec12bc988
--- /dev/null
+++ b/src/LEPTON/pair_lepton.cpp
@@ -0,0 +1,273 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pair_lepton.h"
+
+#include "atom.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neigh_list.h"
+#include "update.h"
+
+#include <LMP_Lepton.h>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairLepton::PairLepton(LAMMPS *lmp) : Pair(lmp), cut(nullptr), type2expression(nullptr)
+{
+  respa_enable = 0;
+  single_enable = 1;
+  writedata = 1;
+  restartinfo = 0;
+  reinitflag = 0;
+  cut_global = 0.0;
+  centroidstressflag = CENTROID_SAME;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairLepton::~PairLepton()
+{
+  if (allocated) {
+    memory->destroy(cut);
+    memory->destroy(cutsq);
+    memory->destroy(setflag);
+    memory->destroy(type2expression);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLepton::compute(int eflag, int vflag)
+{
+  int i, j, ii, jj, inum, jnum, itype, jtype;
+  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
+  double rsq, factor_lj;
+  int *ilist, *jlist, *numneigh, **firstneigh;
+
+  evdwl = 0.0;
+  ev_init(eflag, vflag);
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double *special_lj = force->special_lj;
+  int newton_pair = force->newton_pair;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  std::vector<LMP_Lepton::CompiledExpression> force;
+  std::vector<LMP_Lepton::CompiledExpression> epot;
+  for (const auto &expr : expressions) {
+    force.emplace_back(
+        LMP_Lepton::Parser::parse(expr).differentiate("r").createCompiledExpression());
+    if (eflag) epot.emplace_back(LMP_Lepton::Parser::parse(expr).createCompiledExpression());
+  }
+
+  // loop over neighbors of my atoms
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      const double factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx * delx + dely * dely + delz * delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+        const double r = sqrt(rsq);
+        const int idx = type2expression[itype][jtype];
+        double &r_for = force[idx].getVariableReference("r");
+        r_for = r;
+        fpair = -force[idx].evaluate() / r;
+        fpair *= factor_lj;
+
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= delx * fpair;
+          f[j][1] -= dely * fpair;
+          f[j][2] -= delz * fpair;
+        }
+
+        if (eflag) {
+          double &r_pot = epot[idx].getVariableReference("r");
+          r_pot = r;
+          evdwl = factor_lj * epot[idx].evaluate();
+        } else
+          evdwl = 0.0;
+
+        if (evflag) ev_tally(i, j, nlocal, newton_pair, evdwl, 0.0, fpair, delx, dely, delz);
+      }
+    }
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairLepton::allocate()
+{
+  allocated = 1;
+  int np1 = atom->ntypes + 1;
+
+  memory->create(setflag, np1, np1, "pair:setflag");
+  for (int i = 1; i < np1; i++)
+    for (int j = i; j < np1; j++) setflag[i][j] = 0;
+
+  memory->create(cut, np1, np1, "pair:cut");
+  memory->create(cutsq, np1, np1, "pair:cutsq");
+  memory->create(type2expression, np1, np1, "pair:type2expression");
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairLepton::settings(int narg, char **arg)
+{
+  if (narg != 1) error->all(FLERR, "Illegal pair_style command");
+
+  cut_global = utils::numeric(FLERR, arg[0], false, lmp);
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for all type pairs
+------------------------------------------------------------------------- */
+
+void PairLepton::coeff(int narg, char **arg)
+{
+  if (narg < 3 || narg > 4) error->all(FLERR, "Incorrect number of args for pair coefficients");
+  if (!allocated) allocate();
+
+  int ilo, ihi, jlo, jhi;
+  utils::bounds(FLERR, arg[0], 1, atom->ntypes, ilo, ihi, error);
+  utils::bounds(FLERR, arg[1], 1, atom->ntypes, jlo, jhi, error);
+
+  std::string exp_one = arg[2];
+  double cut_one = cut_global;
+  if (narg == 4) cut_one = utils::numeric(FLERR, arg[3], false, lmp);
+
+  // check if the expression can be parsed and evaluated as needed without error
+  try {
+    auto epot = LMP_Lepton::Parser::parse(exp_one).createCompiledExpression();
+    auto force = LMP_Lepton::Parser::parse(exp_one).differentiate("r").createCompiledExpression();
+    double &r_pot = epot.getVariableReference("r");
+    double &r_for = force.getVariableReference("r");
+    r_for = r_pot = 1.0;
+    epot.evaluate();
+    force.evaluate();
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  std::size_t idx = 0;
+  for (const auto &exp : expressions) {
+    if (exp == exp_one) break;
+    ++idx;
+  }
+
+  // not found, add to list
+  if ((expressions.size() == 0) || (idx == expressions.size())) expressions.push_back(exp_one);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo, i); j <= jhi; j++) {
+      cut[i][j] = cut_one;
+      setflag[i][j] = 1;
+      type2expression[i][j] = idx;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients");
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLepton::init_one(int i, int j)
+{
+  if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
+
+  cut[j][i] = cut[i][j];
+  type2expression[j][i] = type2expression[i][j];
+
+  return cut[i][j];
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to data file
+------------------------------------------------------------------------- */
+
+void PairLepton::write_data(FILE *fp)
+{
+  for (int i = 1; i <= atom->ntypes; i++)
+    fprintf(fp, "%d '%s' %g\n", i, expressions[type2expression[i][i]].c_str(), cut[i][i]);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes all pairs to data file
+------------------------------------------------------------------------- */
+
+void PairLepton::write_data_all(FILE *fp)
+{
+  for (int i = 1; i <= atom->ntypes; i++)
+    for (int j = i; j <= atom->ntypes; j++)
+      fprintf(fp, "%d %d '%s' %g\n", i, j, expressions[type2expression[i][j]].c_str(), cut[i][j]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLepton::single(int /* i */, int /* j */, int itype, int jtype, double rsq,
+                          double /* factor_coul */, double factor_lj, double &fforce)
+{
+  auto expr = expressions[type2expression[itype][jtype]];
+  auto epot = LMP_Lepton::Parser::parse(expr).createCompiledExpression();
+  auto force = LMP_Lepton::Parser::parse(expr).differentiate("r").createCompiledExpression();
+
+  double r = sqrt(rsq);
+  double &r_pot = epot.getVariableReference("r");
+  double &r_for = force.getVariableReference("r");
+
+  r_pot = r_for = r;
+  fforce = -force.evaluate() / r;
+  return epot.evaluate();
+}
diff --git a/src/LEPTON/pair_lepton.h b/src/LEPTON/pair_lepton.h
new file mode 100644
index 0000000000..a4f3b0c084
--- /dev/null
+++ b/src/LEPTON/pair_lepton.h
@@ -0,0 +1,61 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Pair zero is a dummy pair interaction useful for requiring a
+   force cutoff distance in the absence of pair-interactions or
+   with hybrid/overlay if a larger force cutoff distance is required.
+
+   This can be used in conjunction with bond/create to create bonds
+   that are longer than the cutoff of a given force field, or to
+   calculate radial distribution functions for models without
+   pair interactions.
+
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lepton,PairLepton);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_LEPTON_H
+#define LMP_PAIR_LEPTON_H
+
+#include "pair.h"
+
+namespace LAMMPS_NS {
+
+class PairLepton : public Pair {
+ public:
+  PairLepton(class LAMMPS *);
+  ~PairLepton() override;
+  void compute(int, int) override;
+  void settings(int, char **) override;
+  void coeff(int, char **) override;
+  double init_one(int, int) override;
+  void write_data(FILE *) override;
+  void write_data_all(FILE *) override;  
+  double single(int, int, int, int, double, double, double, double &) override;
+
+ protected:
+  std::vector<std::string> expressions;
+  double **cut;
+  int **type2expression;
+  double cut_global;
+
+  virtual void allocate();
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif

From 2cf1793a93996576e84e4668abc9e27b332db112 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 21 Dec 2022 18:29:11 -0500
Subject: [PATCH 05/79] add unit test for pair style lepton

---
 .../force-styles/tests/mol-pair-lepton.yaml   | 98 +++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 unittest/force-styles/tests/mol-pair-lepton.yaml

diff --git a/unittest/force-styles/tests/mol-pair-lepton.yaml b/unittest/force-styles/tests/mol-pair-lepton.yaml
new file mode 100644
index 0000000000..eaed580b02
--- /dev/null
+++ b/unittest/force-styles/tests/mol-pair-lepton.yaml
@@ -0,0 +1,98 @@
+---
+lammps_version: 3 Nov 2022
+tags: generated
+date_generated: Wed Dec 21 18:26:23 2022
+epsilon: 5e-14
+skip_tests:
+prerequisites: ! |
+  atom full
+  pair lepton
+pre_commands: ! |
+  variable write_data_pair index ij
+post_commands: ! ""
+input_file: in.fourmol
+pair_style: lepton 8.0
+pair_coeff: ! |
+  * *    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.015;sig=3.1"
+  1 1    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.02;sig=2.5"
+  1 2    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.01;sig=1.75"
+  1 3    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.02;sig=2.85"
+  1 4*5  "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.0173205;sig=2.8"
+  2 2    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.005;sig=1.0"
+  2 3    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.01;sig=2.1"
+  2 4    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.005;sig=0.5"
+  2 5    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.00866025;sig=2.05"
+  3 3    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.02;sig=3.2"
+  3 4    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.0173205;sig=3.15"
+  3 5    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.0173205;sig=3.15"
+extract: ! ""
+natoms: 29
+init_vdwl: 749.2370315373564
+init_coul: 0
+init_stress: ! |2-
+   2.1793853434038242e+03  2.1988955172192768e+03  4.6653977523326257e+03 -7.5956547636050584e+02  2.4751536734032868e+01  6.6652028436400667e+02
+init_forces: ! |2
+    1 -2.3333390280895909e+01  2.6994567613322647e+02  3.3272827850356805e+02
+    2  1.5828554630414899e+02  1.3025008843535872e+02 -1.8629682358935722e+02
+    3 -1.3528903738169063e+02 -3.8704313358319996e+02 -1.4568978437133106e+02
+    4 -7.8711096705893366e+00  2.1350518625373534e+00 -5.5954532185548143e+00
+    5 -2.5176757268228540e+00 -4.0521510681020230e+00  1.2152704057877019e+01
+    6 -8.3190662465252137e+02  9.6394149462625592e+02  1.1509093566509246e+03
+    7  5.8203388932513604e+01 -3.3608997951626793e+02 -1.7179617996573040e+03
+    8  1.4451392284291526e+02 -1.0927475861088995e+02  3.9990593492420442e+02
+    9  7.9156945283097443e+01  8.5273009783986538e+01  3.5032175698445189e+02
+   10  5.3118875219105371e+02 -6.1040990859419412e+02 -1.8355872642619292e+02
+   11 -2.3530157267965524e+00 -5.9077640073819726e+00 -9.6590723955414290e+00
+   12  1.7527155146800425e+01  1.0633119523437513e+01 -7.9254398064483160e+00
+   13  8.0986409579532950e+00 -3.2098088264781550e+00 -1.4896399843793842e-01
+   14 -3.3852721292265158e+00  6.8636181241903627e-01 -8.7507190862499868e+00
+   15 -2.0454999188605297e-01  8.4846165523049901e+00  3.0131615419406712e+00
+   16  4.6326310311812108e+02 -3.3087715736498188e+02 -1.1893024561782554e+03
+   17 -4.5334300923766733e+02  3.1554283255882575e+02  1.2058417793481203e+03
+   18 -1.8862623280672661e-02 -3.3402010907951661e-02  3.1000479299095263e-02
+   19  3.1843079640570047e-04 -2.3918627818763423e-04  1.7427252638513439e-03
+   20 -9.9760831209706009e-04 -1.0209184826753086e-03  3.6910972636601454e-04
+   21 -7.1566125273265186e+01 -8.1615678329920655e+01  2.2589561408339890e+02
+   22 -1.0808835729977497e+02 -2.6193787235943894e+01 -1.6957904943161401e+02
+   23  1.7964455474779490e+02  1.0782097695276948e+02 -5.6305786479140629e+01
+   24  3.6591406576584546e+01 -2.1181587621785579e+02  1.1218301872572377e+02
+   25 -1.4851489147738798e+02  2.3907118122949061e+01 -1.2485634873166291e+02
+   26  1.1191129453598218e+02  1.8789774664223384e+02  1.2650137204319906e+01
+   27  5.1810388677546001e+01 -2.2705458321213797e+02  9.0849111082069669e+01
+   28 -1.8041307121444069e+02  7.7534042932772905e+01 -1.2206956760706598e+02
+   29  1.2861057254925012e+02  1.4952711274394568e+02  3.1216025556267880e+01
+run_vdwl: 719.4432816774653
+run_coul: 0
+run_stress: ! |2-
+   2.1330153957371017e+03  2.1547728168285516e+03  4.3976497417710116e+03 -7.3873328448298525e+02  4.1743821105368760e+01  6.2788012209191072e+02
+run_forces: ! |2
+    1 -2.0299419751359164e+01  2.6686193378823020e+02  3.2358785870694010e+02
+    2  1.5298617928491225e+02  1.2596516341409203e+02 -1.7961292655338619e+02
+    3 -1.3353630652439830e+02 -3.7923748696131315e+02 -1.4291839793625815e+02
+    4 -7.8374717836161762e+00  2.1276610789823409e+00 -5.5845014473820616e+00
+    5 -2.5014258630866721e+00 -4.0250131424704412e+00  1.2103512372025637e+01
+    6 -8.0681462887292457e+02  9.2165637136761688e+02  1.0270795806932783e+03
+    7  5.5780279349903516e+01 -3.1117530951561662e+02 -1.5746991292869018e+03
+    8  1.3452983055535049e+02 -1.0064659350255911e+02  3.8851791558207651e+02
+    9  7.6746213883425980e+01  8.2501469877402130e+01  3.3944351200617888e+02
+   10  5.2128033527695595e+02 -5.9920098848285863e+02 -1.8126029815043339e+02
+   11 -2.3573118090915250e+00 -5.8616944550888368e+00 -9.6049808811326240e+00
+   12  1.7503975847822900e+01  1.0626930310560816e+01 -8.0603160272054986e+00
+   13  8.0530313322973104e+00 -3.1756495170399108e+00 -1.4618315664740528e-01
+   14 -3.3416065168069768e+00  6.6492606336082150e-01 -8.6345131440469700e+00
+   15 -2.2253843262374914e-01  8.5025661635348762e+00  3.0369735873081622e+00
+   16  4.3476311264989465e+02 -3.1171086735551415e+02 -1.1135217194927448e+03
+   17 -4.2469846140777139e+02  2.9615411776780593e+02  1.1302573488400665e+03
+   18 -1.8849981672825911e-02 -3.3371636477421307e-02  3.0986293443778734e-02
+   19  3.0940277774414016e-04 -2.4634536455373038e-04  1.7433360008861014e-03
+   20 -9.8648131277150747e-04 -1.0112587134526948e-03  3.6932948773965417e-04
+   21 -7.0490745283106378e+01 -7.9749153581142139e+01  2.2171003384646431e+02
+   22 -1.0638717908920071e+02 -2.5949502163177975e+01 -1.6645589526812273e+02
+   23  1.7686797710735033e+02  1.0571018898885515e+02 -5.5243337084099373e+01
+   24  3.8206017656281375e+01 -2.1022820141992960e+02  1.1260711266189014e+02
+   25 -1.4918881473530880e+02  2.3762151395876508e+01 -1.2549188139143085e+02
+   26  1.1097059498808308e+02  1.8645503634228518e+02  1.2861559677865248e+01
+   27  5.0800844984832125e+01 -2.2296588090685469e+02  8.8607367716323253e+01
+   28 -1.7694190504288886e+02  7.6029945485182026e+01 -1.1950518150242071e+02
+   29  1.2614894925528141e+02  1.4694250820033548e+02  3.0893386672863034e+01
+...

From 6c5a698be48fa4f8d53b864485ee21273cc338be Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 21 Dec 2022 19:24:28 -0500
Subject: [PATCH 06/79] try to speed up compute kernel

---
 src/LEPTON/pair_lepton.cpp | 106 ++++++++++++++++++++++---------------
 src/LEPTON/pair_lepton.h   |   5 +-
 2 files changed, 66 insertions(+), 45 deletions(-)

diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index 7ec12bc988..46377a341a 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -58,86 +58,104 @@ PairLepton::~PairLepton()
 
 void PairLepton::compute(int eflag, int vflag)
 {
-  int i, j, ii, jj, inum, jnum, itype, jtype;
-  double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
-  double rsq, factor_lj;
-  int *ilist, *jlist, *numneigh, **firstneigh;
-
-  evdwl = 0.0;
   ev_init(eflag, vflag);
+  if (evflag) {
+    if (eflag) {
+      if (force->newton_pair)
+        eval<1, 1, 1>();
+      else
+        eval<1, 1, 0>();
+    } else {
+      if (force->newton_pair)
+        eval<1, 0, 1>();
+      else
+        eval<1, 0, 0>();
+    }
+  } else {
+    if (force->newton_pair)
+      eval<0, 0, 1>();
+    else
+      eval<0, 0, 0>();
+  }
+  if (vflag_fdotr) virial_fdotr_compute();
+}
 
-  double **x = atom->x;
-  double **f = atom->f;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
-  int newton_pair = force->newton_pair;
+/* ---------------------------------------------------------------------- */
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
+{
+  const double *const *const x = atom->x;
+  double *const *const f = atom->f;
+  const int *const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double *const special_lj = force->special_lj;
 
-  inum = list->inum;
-  ilist = list->ilist;
-  numneigh = list->numneigh;
-  firstneigh = list->firstneigh;
+  const int inum = list->inum;
+  const int *const ilist = list->ilist;
+  const int *const numneigh = list->numneigh;
+  const int *const *const firstneigh = list->firstneigh;
 
   std::vector<LMP_Lepton::CompiledExpression> force;
   std::vector<LMP_Lepton::CompiledExpression> epot;
   for (const auto &expr : expressions) {
     force.emplace_back(
         LMP_Lepton::Parser::parse(expr).differentiate("r").createCompiledExpression());
-    if (eflag) epot.emplace_back(LMP_Lepton::Parser::parse(expr).createCompiledExpression());
+    if (EFLAG) epot.emplace_back(LMP_Lepton::Parser::parse(expr).createCompiledExpression());
   }
 
   // loop over neighbors of my atoms
 
-  for (ii = 0; ii < inum; ii++) {
-    i = ilist[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    itype = type[i];
-    jlist = firstneigh[i];
-    jnum = numneigh[i];
+  for (int ii = 0; ii < inum; ii++) {
+    const int i = ilist[ii];
+    const double xtmp = x[i][0];
+    const double ytmp = x[i][1];
+    const double ztmp = x[i][2];
+    const int itype = type[i];
+    const int *jlist = firstneigh[i];
+    const int jnum = numneigh[i];
+    double fxtmp, fytmp, fztmp;
+    fxtmp = fytmp = fztmp = 0.0;
 
-    for (jj = 0; jj < jnum; jj++) {
-      j = jlist[jj];
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = jlist[jj];
       const double factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
+      const int jtype = type[j];
 
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      jtype = type[j];
+      const double delx = xtmp - x[j][0];
+      const double dely = ytmp - x[j][1];
+      const double delz = ztmp - x[j][2];
+      const double rsq = delx * delx + dely * dely + delz * delz;
 
       if (rsq < cutsq[itype][jtype]) {
         const double r = sqrt(rsq);
         const int idx = type2expression[itype][jtype];
         double &r_for = force[idx].getVariableReference("r");
         r_for = r;
-        fpair = -force[idx].evaluate() / r;
-        fpair *= factor_lj;
+        const double fpair = -force[idx].evaluate() / r * factor_lj;
 
-        f[i][0] += delx * fpair;
-        f[i][1] += dely * fpair;
-        f[i][2] += delz * fpair;
-        if (newton_pair || j < nlocal) {
+        fxtmp += delx * fpair;
+        fytmp += dely * fpair;
+        fztmp += delz * fpair;
+        if (NEWTON_PAIR || (j < nlocal)) {
           f[j][0] -= delx * fpair;
           f[j][1] -= dely * fpair;
           f[j][2] -= delz * fpair;
         }
 
-        if (eflag) {
+        double evdwl = 0.0;
+        if (EFLAG) {
           double &r_pot = epot[idx].getVariableReference("r");
           r_pot = r;
           evdwl = factor_lj * epot[idx].evaluate();
-        } else
-          evdwl = 0.0;
+        }
 
-        if (evflag) ev_tally(i, j, nlocal, newton_pair, evdwl, 0.0, fpair, delx, dely, delz);
+        if (EVFLAG) ev_tally(i, j, nlocal, NEWTON_PAIR, evdwl, 0.0, fpair, delx, dely, delz);
       }
     }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
   }
-
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/LEPTON/pair_lepton.h b/src/LEPTON/pair_lepton.h
index a4f3b0c084..dd1f40d1d3 100644
--- a/src/LEPTON/pair_lepton.h
+++ b/src/LEPTON/pair_lepton.h
@@ -43,7 +43,7 @@ class PairLepton : public Pair {
   void coeff(int, char **) override;
   double init_one(int, int) override;
   void write_data(FILE *) override;
-  void write_data_all(FILE *) override;  
+  void write_data_all(FILE *) override;
   double single(int, int, int, int, double, double, double, double &) override;
 
  protected:
@@ -52,6 +52,9 @@ class PairLepton : public Pair {
   int **type2expression;
   double cut_global;
 
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void eval();
+
   virtual void allocate();
 };
 

From 969ac572562396b9840a1fce1d1df2ccd9564065 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 21 Dec 2022 21:16:23 -0500
Subject: [PATCH 07/79] make expression string compact and easier restartable
 by removing quotes and whitespace

---
 src/LEPTON/pair_lepton.cpp                    | 12 +++++++----
 .../force-styles/tests/mol-pair-lepton.yaml   | 20 +++++++++----------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index 46377a341a..f596a901f0 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -200,11 +200,15 @@ void PairLepton::coeff(int narg, char **arg)
   utils::bounds(FLERR, arg[0], 1, atom->ntypes, ilo, ihi, error);
   utils::bounds(FLERR, arg[1], 1, atom->ntypes, jlo, jhi, error);
 
-  std::string exp_one = arg[2];
   double cut_one = cut_global;
   if (narg == 4) cut_one = utils::numeric(FLERR, arg[3], false, lmp);
 
-  // check if the expression can be parsed and evaluated as needed without error
+  // remove whitespace and quotes from expression string and then
+  // check if the expression can be parsed and evaluated without error
+  std::string exp_one;
+  for (const auto &c : std::string(arg[2]))
+    if (!isspace(c) && (c != '"') && (c != '\'')) exp_one.push_back(c);
+
   try {
     auto epot = LMP_Lepton::Parser::parse(exp_one).createCompiledExpression();
     auto force = LMP_Lepton::Parser::parse(exp_one).differentiate("r").createCompiledExpression();
@@ -258,7 +262,7 @@ double PairLepton::init_one(int i, int j)
 void PairLepton::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
-    fprintf(fp, "%d '%s' %g\n", i, expressions[type2expression[i][i]].c_str(), cut[i][i]);
+    fprintf(fp, "%d %s %g\n", i, expressions[type2expression[i][i]].c_str(), cut[i][i]);
 }
 
 /* ----------------------------------------------------------------------
@@ -269,7 +273,7 @@ void PairLepton::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
-      fprintf(fp, "%d %d '%s' %g\n", i, j, expressions[type2expression[i][j]].c_str(), cut[i][j]);
+      fprintf(fp, "%d %d %s %g\n", i, j, expressions[type2expression[i][j]].c_str(), cut[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/unittest/force-styles/tests/mol-pair-lepton.yaml b/unittest/force-styles/tests/mol-pair-lepton.yaml
index eaed580b02..56640e3834 100644
--- a/unittest/force-styles/tests/mol-pair-lepton.yaml
+++ b/unittest/force-styles/tests/mol-pair-lepton.yaml
@@ -14,17 +14,17 @@ input_file: in.fourmol
 pair_style: lepton 8.0
 pair_coeff: ! |
   * *    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.015;sig=3.1"
-  1 1    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.02;sig=2.5"
+  1 1    '4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.02;sig=2.5'
   1 2    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.01;sig=1.75"
-  1 3    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.02;sig=2.85"
-  1 4*5  "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.0173205;sig=2.8"
-  2 2    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.005;sig=1.0"
-  2 3    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.01;sig=2.1"
-  2 4    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.005;sig=0.5"
-  2 5    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.00866025;sig=2.05"
-  3 3    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.02;sig=3.2"
-  3 4    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.0173205;sig=3.15"
-  3 5    "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.0173205;sig=3.15"
+  1 3    '4.0*eps*((sig/r)^12-(sig/r)^6);  eps=0.02;sig=2.85'
+  1 4*5  "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205; 	sig=2.8"
+  2 2    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.005;sig=1.0"
+  2 3    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.01;sig=2.1"
+  2 4    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.005;sig=0.5"
+  2 5    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.00866025;sig=2.05"
+  3 3    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.02;sig=3.2"
+  3 4    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205;sig=3.15"
+  3 5    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205;sig=3.15"
 extract: ! ""
 natoms: 29
 init_vdwl: 749.2370315373564

From c64066eb21c0c21d4ac8b230f4347c7626bb59ff Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 21 Dec 2022 21:16:59 -0500
Subject: [PATCH 08/79] simplify processing of expressions

---
 src/LEPTON/pair_lepton.cpp                    | 27 ++++---
 .../force-styles/tests/mol-pair-lepton.yaml   | 78 +++++++++----------
 2 files changed, 54 insertions(+), 51 deletions(-)

diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index f596a901f0..11587d637c 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -24,9 +24,11 @@
 #include "neigh_list.h"
 #include "update.h"
 
-#include <LMP_Lepton.h>
+#include <cctype>
 #include <cstring>
 
+#include "LMP_Lepton.h"
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
@@ -93,13 +95,14 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
   const int *const ilist = list->ilist;
   const int *const numneigh = list->numneigh;
   const int *const *const firstneigh = list->firstneigh;
+  double fxtmp, fytmp, fztmp;
 
   std::vector<LMP_Lepton::CompiledExpression> force;
   std::vector<LMP_Lepton::CompiledExpression> epot;
   for (const auto &expr : expressions) {
-    force.emplace_back(
-        LMP_Lepton::Parser::parse(expr).differentiate("r").createCompiledExpression());
-    if (EFLAG) epot.emplace_back(LMP_Lepton::Parser::parse(expr).createCompiledExpression());
+    auto parsed = LMP_Lepton::Parser::parse(expr);
+    force.emplace_back(parsed.differentiate("r").createCompiledExpression());
+    if (EFLAG) epot.emplace_back(parsed.createCompiledExpression());
   }
 
   // loop over neighbors of my atoms
@@ -112,7 +115,6 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
     const int itype = type[i];
     const int *jlist = firstneigh[i];
     const int jnum = numneigh[i];
-    double fxtmp, fytmp, fztmp;
     fxtmp = fytmp = fztmp = 0.0;
 
     for (int jj = 0; jj < jnum; jj++) {
@@ -210,8 +212,9 @@ void PairLepton::coeff(int narg, char **arg)
     if (!isspace(c) && (c != '"') && (c != '\'')) exp_one.push_back(c);
 
   try {
-    auto epot = LMP_Lepton::Parser::parse(exp_one).createCompiledExpression();
-    auto force = LMP_Lepton::Parser::parse(exp_one).differentiate("r").createCompiledExpression();
+    auto parsed = LMP_Lepton::Parser::parse(exp_one);
+    auto epot = parsed.createCompiledExpression();
+    auto force = parsed.differentiate("r").createCompiledExpression();
     double &r_pot = epot.getVariableReference("r");
     double &r_for = force.getVariableReference("r");
     r_for = r_pot = 1.0;
@@ -281,15 +284,15 @@ void PairLepton::write_data_all(FILE *fp)
 double PairLepton::single(int /* i */, int /* j */, int itype, int jtype, double rsq,
                           double /* factor_coul */, double factor_lj, double &fforce)
 {
-  auto expr = expressions[type2expression[itype][jtype]];
-  auto epot = LMP_Lepton::Parser::parse(expr).createCompiledExpression();
-  auto force = LMP_Lepton::Parser::parse(expr).differentiate("r").createCompiledExpression();
+  auto parsed = LMP_Lepton::Parser::parse(expressions[type2expression[itype][jtype]]);
+  auto epot = parsed.createCompiledExpression();
+  auto force = parsed.differentiate("r").createCompiledExpression();
 
   double r = sqrt(rsq);
   double &r_pot = epot.getVariableReference("r");
   double &r_for = force.getVariableReference("r");
 
   r_pot = r_for = r;
-  fforce = -force.evaluate() / r;
-  return epot.evaluate();
+  fforce = -force.evaluate() / r * factor_lj;
+  return epot.evaluate() * factor_lj;
 }
diff --git a/unittest/force-styles/tests/mol-pair-lepton.yaml b/unittest/force-styles/tests/mol-pair-lepton.yaml
index 56640e3834..d58780b1fe 100644
--- a/unittest/force-styles/tests/mol-pair-lepton.yaml
+++ b/unittest/force-styles/tests/mol-pair-lepton.yaml
@@ -1,7 +1,7 @@
 ---
 lammps_version: 3 Nov 2022
 tags: generated
-date_generated: Wed Dec 21 18:26:23 2022
+date_generated: Wed Dec 21 20:29:34 2022
 epsilon: 5e-14
 skip_tests:
 prerequisites: ! |
@@ -30,65 +30,65 @@ natoms: 29
 init_vdwl: 749.2370315373564
 init_coul: 0
 init_stress: ! |2-
-   2.1793853434038242e+03  2.1988955172192768e+03  4.6653977523326257e+03 -7.5956547636050584e+02  2.4751536734032868e+01  6.6652028436400667e+02
+   2.1793853434038242e+03  2.1988955172192768e+03  4.6653977523326257e+03 -7.5956547636050584e+02  2.4751536734032861e+01  6.6652028436400667e+02
 init_forces: ! |2
-    1 -2.3333390280895909e+01  2.6994567613322647e+02  3.3272827850356805e+02
+    1 -2.3333390280895912e+01  2.6994567613322641e+02  3.3272827850356805e+02
     2  1.5828554630414899e+02  1.3025008843535872e+02 -1.8629682358935722e+02
-    3 -1.3528903738169063e+02 -3.8704313358319996e+02 -1.4568978437133106e+02
-    4 -7.8711096705893366e+00  2.1350518625373534e+00 -5.5954532185548143e+00
-    5 -2.5176757268228540e+00 -4.0521510681020230e+00  1.2152704057877019e+01
-    6 -8.3190662465252137e+02  9.6394149462625592e+02  1.1509093566509246e+03
-    7  5.8203388932513604e+01 -3.3608997951626793e+02 -1.7179617996573040e+03
-    8  1.4451392284291526e+02 -1.0927475861088995e+02  3.9990593492420442e+02
+    3 -1.3528903738169066e+02 -3.8704313358319990e+02 -1.4568978437133106e+02
+    4 -7.8711096705893366e+00  2.1350518625373538e+00 -5.5954532185548134e+00
+    5 -2.5176757268228540e+00 -4.0521510681020239e+00  1.2152704057877019e+01
+    6 -8.3190662465252137e+02  9.6394149462625603e+02  1.1509093566509248e+03
+    7  5.8203388932513583e+01 -3.3608997951626793e+02 -1.7179617996573040e+03
+    8  1.4451392284291535e+02 -1.0927475861088995e+02  3.9990593492420442e+02
     9  7.9156945283097443e+01  8.5273009783986538e+01  3.5032175698445189e+02
-   10  5.3118875219105371e+02 -6.1040990859419412e+02 -1.8355872642619292e+02
-   11 -2.3530157267965524e+00 -5.9077640073819726e+00 -9.6590723955414290e+00
-   12  1.7527155146800425e+01  1.0633119523437513e+01 -7.9254398064483160e+00
-   13  8.0986409579532950e+00 -3.2098088264781550e+00 -1.4896399843793842e-01
-   14 -3.3852721292265158e+00  6.8636181241903627e-01 -8.7507190862499868e+00
-   15 -2.0454999188605297e-01  8.4846165523049901e+00  3.0131615419406712e+00
+   10  5.3118875219105360e+02 -6.1040990859419412e+02 -1.8355872642619292e+02
+   11 -2.3530157267965532e+00 -5.9077640073819717e+00 -9.6590723955414290e+00
+   12  1.7527155146800425e+01  1.0633119523437511e+01 -7.9254398064483169e+00
+   13  8.0986409579532967e+00 -3.2098088264781546e+00 -1.4896399843793839e-01
+   14 -3.3852721292265153e+00  6.8636181241903649e-01 -8.7507190862499868e+00
+   15 -2.0454999188605300e-01  8.4846165523049883e+00  3.0131615419406712e+00
    16  4.6326310311812108e+02 -3.3087715736498188e+02 -1.1893024561782554e+03
-   17 -4.5334300923766733e+02  3.1554283255882575e+02  1.2058417793481203e+03
-   18 -1.8862623280672661e-02 -3.3402010907951661e-02  3.1000479299095263e-02
-   19  3.1843079640570047e-04 -2.3918627818763423e-04  1.7427252638513439e-03
-   20 -9.9760831209706009e-04 -1.0209184826753086e-03  3.6910972636601454e-04
+   17 -4.5334300923766727e+02  3.1554283255882569e+02  1.2058417793481203e+03
+   18 -1.8862623280672661e-02 -3.3402010907951661e-02  3.1000479299095260e-02
+   19  3.1843079640570047e-04 -2.3918627818763426e-04  1.7427252638513439e-03
+   20 -9.9760831209706009e-04 -1.0209184826753090e-03  3.6910972636601454e-04
    21 -7.1566125273265186e+01 -8.1615678329920655e+01  2.2589561408339890e+02
-   22 -1.0808835729977497e+02 -2.6193787235943894e+01 -1.6957904943161401e+02
-   23  1.7964455474779490e+02  1.0782097695276948e+02 -5.6305786479140629e+01
+   22 -1.0808835729977498e+02 -2.6193787235943887e+01 -1.6957904943161401e+02
+   23  1.7964455474779487e+02  1.0782097695276950e+02 -5.6305786479140636e+01
    24  3.6591406576584546e+01 -2.1181587621785579e+02  1.1218301872572377e+02
    25 -1.4851489147738798e+02  2.3907118122949061e+01 -1.2485634873166291e+02
-   26  1.1191129453598218e+02  1.8789774664223384e+02  1.2650137204319906e+01
+   26  1.1191129453598219e+02  1.8789774664223384e+02  1.2650137204319904e+01
    27  5.1810388677546001e+01 -2.2705458321213797e+02  9.0849111082069669e+01
    28 -1.8041307121444069e+02  7.7534042932772905e+01 -1.2206956760706598e+02
    29  1.2861057254925012e+02  1.4952711274394568e+02  3.1216025556267880e+01
 run_vdwl: 719.4432816774653
 run_coul: 0
 run_stress: ! |2-
-   2.1330153957371017e+03  2.1547728168285516e+03  4.3976497417710116e+03 -7.3873328448298525e+02  4.1743821105368760e+01  6.2788012209191072e+02
+   2.1330153957371017e+03  2.1547728168285516e+03  4.3976497417710125e+03 -7.3873328448298525e+02  4.1743821105370067e+01  6.2788012209191027e+02
 run_forces: ! |2
-    1 -2.0299419751359164e+01  2.6686193378823020e+02  3.2358785870694010e+02
+    1 -2.0299419751359164e+01  2.6686193378823020e+02  3.2358785870694015e+02
     2  1.5298617928491225e+02  1.2596516341409203e+02 -1.7961292655338619e+02
-    3 -1.3353630652439830e+02 -3.7923748696131315e+02 -1.4291839793625815e+02
+    3 -1.3353630652439830e+02 -3.7923748696131315e+02 -1.4291839793625817e+02
     4 -7.8374717836161762e+00  2.1276610789823409e+00 -5.5845014473820616e+00
-    5 -2.5014258630866721e+00 -4.0250131424704412e+00  1.2103512372025637e+01
+    5 -2.5014258630866735e+00 -4.0250131424704412e+00  1.2103512372025639e+01
     6 -8.0681462887292457e+02  9.2165637136761688e+02  1.0270795806932783e+03
-    7  5.5780279349903516e+01 -3.1117530951561662e+02 -1.5746991292869018e+03
+    7  5.5780279349903523e+01 -3.1117530951561656e+02 -1.5746991292869018e+03
     8  1.3452983055535049e+02 -1.0064659350255911e+02  3.8851791558207651e+02
-    9  7.6746213883425980e+01  8.2501469877402130e+01  3.3944351200617888e+02
+    9  7.6746213883425980e+01  8.2501469877402130e+01  3.3944351200617882e+02
    10  5.2128033527695595e+02 -5.9920098848285863e+02 -1.8126029815043339e+02
-   11 -2.3573118090915250e+00 -5.8616944550888368e+00 -9.6049808811326240e+00
-   12  1.7503975847822900e+01  1.0626930310560816e+01 -8.0603160272054986e+00
-   13  8.0530313322973104e+00 -3.1756495170399108e+00 -1.4618315664740528e-01
-   14 -3.3416065168069768e+00  6.6492606336082150e-01 -8.6345131440469700e+00
-   15 -2.2253843262374914e-01  8.5025661635348762e+00  3.0369735873081622e+00
+   11 -2.3573118090915246e+00 -5.8616944550888359e+00 -9.6049808811326205e+00
+   12  1.7503975847822900e+01  1.0626930310560814e+01 -8.0603160272054968e+00
+   13  8.0530313322973104e+00 -3.1756495170399117e+00 -1.4618315664740528e-01
+   14 -3.3416065168069773e+00  6.6492606336082150e-01 -8.6345131440469700e+00
+   15 -2.2253843262374914e-01  8.5025661635348779e+00  3.0369735873081622e+00
    16  4.3476311264989465e+02 -3.1171086735551415e+02 -1.1135217194927448e+03
-   17 -4.2469846140777139e+02  2.9615411776780593e+02  1.1302573488400665e+03
-   18 -1.8849981672825911e-02 -3.3371636477421307e-02  3.0986293443778734e-02
-   19  3.0940277774414016e-04 -2.4634536455373038e-04  1.7433360008861014e-03
-   20 -9.8648131277150747e-04 -1.0112587134526948e-03  3.6932948773965417e-04
+   17 -4.2469846140777133e+02  2.9615411776780593e+02  1.1302573488400669e+03
+   18 -1.8849981672825908e-02 -3.3371636477421307e-02  3.0986293443778727e-02
+   19  3.0940277774414027e-04 -2.4634536455373044e-04  1.7433360008861016e-03
+   20 -9.8648131277150790e-04 -1.0112587134526946e-03  3.6932948773965417e-04
    21 -7.0490745283106378e+01 -7.9749153581142139e+01  2.2171003384646431e+02
-   22 -1.0638717908920071e+02 -2.5949502163177975e+01 -1.6645589526812273e+02
-   23  1.7686797710735033e+02  1.0571018898885515e+02 -5.5243337084099373e+01
+   22 -1.0638717908920071e+02 -2.5949502163177968e+01 -1.6645589526812276e+02
+   23  1.7686797710735027e+02  1.0571018898885514e+02 -5.5243337084099387e+01
    24  3.8206017656281375e+01 -2.1022820141992960e+02  1.1260711266189014e+02
    25 -1.4918881473530880e+02  2.3762151395876508e+01 -1.2549188139143085e+02
    26  1.1097059498808308e+02  1.8645503634228518e+02  1.2861559677865248e+01

From 8511aae2110870d9a45ffc58d5bb965e523ef6f0 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 21 Dec 2022 21:18:20 -0500
Subject: [PATCH 09/79] add OPENMP package version of pair style lepton

---
 src/OPENMP/pair_lepton_omp.cpp | 168 +++++++++++++++++++++++++++++++++
 src/OPENMP/pair_lepton_omp.h   |  48 ++++++++++
 2 files changed, 216 insertions(+)
 create mode 100644 src/OPENMP/pair_lepton_omp.cpp
 create mode 100644 src/OPENMP/pair_lepton_omp.h

diff --git a/src/OPENMP/pair_lepton_omp.cpp b/src/OPENMP/pair_lepton_omp.cpp
new file mode 100644
index 0000000000..35f8e2c89d
--- /dev/null
+++ b/src/OPENMP/pair_lepton_omp.cpp
@@ -0,0 +1,168 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pair_lepton_omp.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neigh_list.h"
+#include "suffix.h"
+
+#include <cmath>
+
+#include "LMP_Lepton.h"
+#include "omp_compat.h"
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairLeptonOMP::PairLeptonOMP(LAMMPS *lmp) : PairLepton(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  suffix_flag |= Suffix::OMP;
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLeptonOMP::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag, vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
+
+    if (evflag) {
+      if (eflag) {
+        if (force->newton_pair)
+          eval<1, 1, 1>(ifrom, ito, thr);
+        else
+          eval<1, 1, 0>(ifrom, ito, thr);
+      } else {
+        if (force->newton_pair)
+          eval<1, 0, 1>(ifrom, ito, thr);
+        else
+          eval<1, 0, 0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair)
+        eval<0, 0, 1>(ifrom, ito, thr);
+      else
+        eval<0, 0, 0>(ifrom, ito, thr);
+    }
+
+    thr->timer(Timer::PAIR);
+    reduce_thr(this, eflag, vflag, thr);
+  }    // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr)
+{
+  const auto *_noalias const x = (dbl3_t *) atom->x[0];
+  auto *_noalias const f = (dbl3_t *) thr->get_f()[0];
+  const int *_noalias const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double *_noalias const special_lj = force->special_lj;
+
+  const int *const ilist = list->ilist;
+  const int *const numneigh = list->numneigh;
+  const int *const *const firstneigh = list->firstneigh;
+  double fxtmp, fytmp, fztmp;
+
+  std::vector<LMP_Lepton::CompiledExpression> force;
+  std::vector<LMP_Lepton::CompiledExpression> epot;
+  for (const auto &expr : expressions) {
+    auto parsed = LMP_Lepton::Parser::parse(expr);
+    force.emplace_back(parsed.differentiate("r").createCompiledExpression());
+    if (EFLAG) epot.emplace_back(parsed.createCompiledExpression());
+  }
+
+  // loop over neighbors of my atoms
+
+  for (int ii = iifrom; ii < iito; ++ii) {
+    const int i = ilist[ii];
+    const double xtmp = x[i].x;
+    const double ytmp = x[i].y;
+    const double ztmp = x[i].z;
+    const int itype = type[i];
+    const int *jlist = firstneigh[i];
+    const int jnum = numneigh[i];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = jlist[jj];
+      const double factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+      const int jtype = type[j];
+
+      const double delx = xtmp - x[j].x;
+      const double dely = ytmp - x[j].y;
+      const double delz = ztmp - x[j].z;
+      const double rsq = delx * delx + dely * dely + delz * delz;
+
+      if (rsq < cutsq[itype][jtype]) {
+        const double r = sqrt(rsq);
+        const int idx = type2expression[itype][jtype];
+        double &r_for = force[idx].getVariableReference("r");
+        r_for = r;
+        const double fpair = -force[idx].evaluate() / r * factor_lj;
+
+        fxtmp += delx * fpair;
+        fytmp += dely * fpair;
+        fztmp += delz * fpair;
+        if (NEWTON_PAIR || j < nlocal) {
+          f[j].x -= delx * fpair;
+          f[j].y -= dely * fpair;
+          f[j].z -= delz * fpair;
+        }
+
+        double evdwl = 0.0;
+        if (EFLAG) {
+          double &r_pot = epot[idx].getVariableReference("r");
+          r_pot = r;
+          evdwl = factor_lj * epot[idx].evaluate();
+        }
+
+        if (EVFLAG)
+          ev_tally_thr(this, i, j, nlocal, NEWTON_PAIR, evdwl, 0.0, fpair, delx, dely, delz, thr);
+      }
+    }
+    f[i].x += fxtmp;
+    f[i].y += fytmp;
+    f[i].z += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLeptonOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLepton::memory_usage();
+
+  return bytes;
+}
diff --git a/src/OPENMP/pair_lepton_omp.h b/src/OPENMP/pair_lepton_omp.h
new file mode 100644
index 0000000000..7d658dba1c
--- /dev/null
+++ b/src/OPENMP/pair_lepton_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lepton/omp,PairLeptonOMP);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_LEPTON_OMP_H
+#define LMP_PAIR_LEPTON_OMP_H
+
+#include "pair_lepton.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLeptonOMP : public PairLepton, public ThrOMP {
+
+ public:
+  PairLeptonOMP(class LAMMPS *);
+
+  void compute(int, int) override;
+  double memory_usage() override;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData *const thr);
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif

From cf0fb7f5dff01191f590eac84e18519cc93e1027 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 21 Dec 2022 21:25:41 -0500
Subject: [PATCH 10/79] build system updates for presets and dependencies

---
 cmake/presets/all_off.cmake     |  1 +
 cmake/presets/all_on.cmake      |  1 +
 cmake/presets/mingw-cross.cmake |  1 +
 cmake/presets/most.cmake        |  1 +
 cmake/presets/nolib.cmake       |  1 +
 cmake/presets/windows.cmake     |  1 +
 src/.gitignore                  |  3 ++
 src/Depend.sh                   |  4 ++
 src/LEPTON/Install.sh           | 68 +++++++++++++++++++++++++++++++++
 src/Makefile                    |  5 ++-
 10 files changed, 85 insertions(+), 1 deletion(-)
 create mode 100755 src/LEPTON/Install.sh

diff --git a/cmake/presets/all_off.cmake b/cmake/presets/all_off.cmake
index 9127305528..3d5ee95b3d 100644
--- a/cmake/presets/all_off.cmake
+++ b/cmake/presets/all_off.cmake
@@ -44,6 +44,7 @@ set(ALL_PACKAGES
   KSPACE
   LATBOLTZ
   LATTE
+  LEPTON
   MACHDYN
   MANIFOLD
   MANYBODY
diff --git a/cmake/presets/all_on.cmake b/cmake/presets/all_on.cmake
index 0a001bdc56..474051f6ec 100644
--- a/cmake/presets/all_on.cmake
+++ b/cmake/presets/all_on.cmake
@@ -46,6 +46,7 @@ set(ALL_PACKAGES
   KSPACE
   LATBOLTZ
   LATTE
+  LEPTON
   MACHDYN
   MANIFOLD
   MANYBODY
diff --git a/cmake/presets/mingw-cross.cmake b/cmake/presets/mingw-cross.cmake
index 2d74657394..6c6170acd3 100644
--- a/cmake/presets/mingw-cross.cmake
+++ b/cmake/presets/mingw-cross.cmake
@@ -36,6 +36,7 @@ set(WIN_PACKAGES
   INTERLAYER
   KSPACE
   LATTE
+  LEPTON
   MACHDYN
   MANIFOLD
   MANYBODY
diff --git a/cmake/presets/most.cmake b/cmake/presets/most.cmake
index 5dd9a2b78b..0d63140506 100644
--- a/cmake/presets/most.cmake
+++ b/cmake/presets/most.cmake
@@ -35,6 +35,7 @@ set(ALL_PACKAGES
   GRANULAR
   INTERLAYER
   KSPACE
+  LEPTON
   MACHDYN
   MANYBODY
   MC
diff --git a/cmake/presets/nolib.cmake b/cmake/presets/nolib.cmake
index b6567ad617..b022d4bb55 100644
--- a/cmake/presets/nolib.cmake
+++ b/cmake/presets/nolib.cmake
@@ -13,6 +13,7 @@ set(PACKAGES_WITH_LIB
   KOKKOS
   LATBOLTZ
   LATTE
+  LEPTON
   MACHDYN
   MDI
   MESONT
diff --git a/cmake/presets/windows.cmake b/cmake/presets/windows.cmake
index 21be0efefb..68bc5c4335 100644
--- a/cmake/presets/windows.cmake
+++ b/cmake/presets/windows.cmake
@@ -29,6 +29,7 @@ set(WIN_PACKAGES
   GRANULAR
   INTERLAYER
   KSPACE
+  LEPTON
   MANIFOLD
   MANYBODY
   MC
diff --git a/src/.gitignore b/src/.gitignore
index 19f4d924b0..bc77a647a2 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -54,6 +54,9 @@
 /pair_kim.cpp
 /pair_kim.h
 
+/pair_lepton.cpp
+/pair_lepton.h
+
 /pair_pace.cpp
 /pair_pace.h
 
diff --git a/src/Depend.sh b/src/Depend.sh
index 90dfdbba7a..50b34db899 100755
--- a/src/Depend.sh
+++ b/src/Depend.sh
@@ -118,6 +118,10 @@ if (test $1 = "KSPACE") then
   depend FEP
 fi
 
+if (test $1 = "LEPTON") then
+  depend OPENMP
+fi
+
 if (test $1 = "MANYBODY") then
   depend ATC
   depend GPU
diff --git a/src/LEPTON/Install.sh b/src/LEPTON/Install.sh
new file mode 100755
index 0000000000..937ddf28bc
--- /dev/null
+++ b/src/LEPTON/Install.sh
@@ -0,0 +1,68 @@
+# Install/unInstall package files in LAMMPS
+# mode = 0/1/2 for uninstall/install/update
+
+mode=$1
+
+# arg1 = file, arg2 = file it depends on
+
+# enforce using portable C locale
+LC_ALL=C
+export LC_ALL
+
+action () {
+  if (test $mode = 0) then
+    rm -f ../$1
+  elif (! cmp -s $1 ../$1) then
+    if (test -z "$2" || test -e ../$2) then
+      cp $1 ..
+      if (test $mode = 2) then
+        echo "  updating src/$1"
+      fi
+    fi
+  elif (test -n "$2") then
+    if (test ! -e ../$2) then
+      rm -f ../$1
+    fi
+  fi
+}
+
+# all package files with no dependencies
+
+for file in *.cpp *.h; do
+  test -f ${file} && action $file
+done
+
+# edit 2 Makefile.package files to include/exclude package info
+
+if (test $1 = 1) then
+
+  if (test -e ../Makefile.package) then
+    sed -i -e 's/[^ \t]*lepton[^ \t]* //g' ../Makefile.package
+    sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/lepton\/include |' ../Makefile.package
+    sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/lepton$(LIBOBJDIR) |' ../Makefile.package
+    sed -i -e 's|^PKG_LIB =[ \t]*|&-llmplepton |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(lepton_SYSINC) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(lepton_SYSLIB) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(lepton_SYSPATH) |' ../Makefile.package
+  fi
+
+  if (test -e ../Makefile.package.settings) then
+    sed -i -e '/^[ \t]*include.*lepton.*$/d' ../Makefile.package.settings
+    # multiline form needed for BSD sed on Macs
+    sed -i -e '4 i \
+include ..\/..\/lib\/lepton\/Makefile.lammps
+' ../Makefile.package.settings
+
+  fi
+
+elif (test $1 = 0) then
+
+  if (test -e ../Makefile.package) then
+    sed -i -e 's/[^ \t]*lepton[^ \t]* //g' ../Makefile.package
+  fi
+
+  if (test -e ../Makefile.package.settings) then
+    sed -i -e '/^[ \t]*include.*lepton.*$/d' ../Makefile.package.settings
+  fi
+
+fi
diff --git a/src/Makefile b/src/Makefile
index 6f3ece5376..494c64699e 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -88,6 +88,7 @@ PACKAGE = \
 	kspace \
 	latboltz \
 	latte \
+	lepton \
 	machdyn \
 	manifold \
 	manybody \
@@ -212,6 +213,7 @@ PACKLIB = \
 	kim \
 	kokkos \
 	latte \
+	lepton \
 	mpiio \
 	mscg \
 	poems \
@@ -224,6 +226,7 @@ PACKLIB = \
 	h5md \
 	ml-hdnnp \
 	latboltz \
+	lepton \
 	mdi \
 	mesont \
 	molfile \
@@ -240,7 +243,7 @@ PACKLIB = \
 
 PACKSYS = compress latboltz mpiio python
 
-PACKINT = atc awpmd colvars electrode gpu kokkos mesont ml-pod poems
+PACKINT = atc awpmd colvars electrode gpu kokkos lepton mesont ml-pod poems
 
 PACKEXT = \
 	adios \

From e2f9d594840d8c7cec0e354135a797350ac4a8c2 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 21 Dec 2022 21:34:56 -0500
Subject: [PATCH 11/79] whitespace fixes

---
 .../include/lepton/CompiledExpression.h       |  4 +-
 lib/lepton/include/lepton/CustomFunction.h    |  2 +-
 lib/lepton/include/lepton/ExpressionProgram.h |  2 +-
 lib/lepton/include/lepton/Operation.h         |  2 +-
 lib/lepton/src/CompiledExpression.cpp         | 44 +++++++++----------
 lib/lepton/src/MSVC_erfc.h                    |  4 +-
 lib/lepton/src/ParsedExpression.cpp           |  6 +--
 lib/lepton/src/Parser.cpp                     |  2 +-
 8 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/lib/lepton/include/lepton/CompiledExpression.h b/lib/lepton/include/lepton/CompiledExpression.h
index bf076b0d8b..8ead5ce96f 100644
--- a/lib/lepton/include/lepton/CompiledExpression.h
+++ b/lib/lepton/include/lepton/CompiledExpression.h
@@ -52,9 +52,9 @@ class ParsedExpression;
  * A CompiledExpression is a highly optimized representation of an expression for cases when you want to evaluate
  * it many times as quickly as possible.  You should treat it as an opaque object; none of the internal representation
  * is visible.
- * 
+ *
  * A CompiledExpression is created by calling createCompiledExpression() on a ParsedExpression.
- * 
+ *
  * WARNING: CompiledExpression is NOT thread safe.  You should never access a CompiledExpression from two threads at
  * the same time.
  */
diff --git a/lib/lepton/include/lepton/CustomFunction.h b/lib/lepton/include/lepton/CustomFunction.h
index b8cbee8c96..c4b9932cd8 100644
--- a/lib/lepton/include/lepton/CustomFunction.h
+++ b/lib/lepton/include/lepton/CustomFunction.h
@@ -83,7 +83,7 @@ class LEPTON_EXPORT PlaceholderFunction : public CustomFunction {
 public:
     /**
      * Create a Placeholder function.
-     * 
+     *
      * @param numArgs    the number of arguments the function expects
      */
     PlaceholderFunction(int numArgs) : numArgs(numArgs) {
diff --git a/lib/lepton/include/lepton/ExpressionProgram.h b/lib/lepton/include/lepton/ExpressionProgram.h
index 4fba4051e4..3737cf8082 100644
--- a/lib/lepton/include/lepton/ExpressionProgram.h
+++ b/lib/lepton/include/lepton/ExpressionProgram.h
@@ -67,7 +67,7 @@ public:
     const Operation& getOperation(int index) const;
     /**
      * Change an Operation in this program.
-     * 
+     *
      * The Operation must have been allocated on the heap with the "new" operator.
      * The ExpressionProgram assumes ownership of it and will delete it when it
      * is no longer needed.
diff --git a/lib/lepton/include/lepton/Operation.h b/lib/lepton/include/lepton/Operation.h
index b27b25d3d8..848910f6e4 100644
--- a/lib/lepton/include/lepton/Operation.h
+++ b/lib/lepton/include/lepton/Operation.h
@@ -1017,7 +1017,7 @@ public:
     double evaluate(double* args, const std::map<std::string, double>& variables) const {
         if (isIntPower) {
             // Integer powers can be computed much more quickly by repeated multiplication.
-            
+
             int exponent = intValue;
             double base = args[0];
             if (exponent < 0) {
diff --git a/lib/lepton/src/CompiledExpression.cpp b/lib/lepton/src/CompiledExpression.cpp
index 7805c4674d..67cf196ebb 100644
--- a/lib/lepton/src/CompiledExpression.cpp
+++ b/lib/lepton/src/CompiledExpression.cpp
@@ -84,17 +84,17 @@ CompiledExpression& CompiledExpression::operator=(const CompiledExpression& expr
 void CompiledExpression::compileExpression(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps) {
     if (findTempIndex(node, temps) != -1)
         return; // We have already processed a node identical to this one.
-    
+
     // Process the child nodes.
-    
+
     vector<int> args;
     for (int i = 0; i < node.getChildren().size(); i++) {
         compileExpression(node.getChildren()[i], temps);
         args.push_back(findTempIndex(node.getChildren()[i], temps));
     }
-    
+
     // Process this node.
-    
+
     if (node.getOperation().getId() == Operation::VARIABLE) {
         variableIndices[node.getOperation().getName()] = (int) workspace.size();
         variableNames.insert(node.getOperation().getName());
@@ -108,7 +108,7 @@ void CompiledExpression::compileExpression(const ExpressionTreeNode& node, vecto
             arguments[stepIndex].push_back(0); // The value won't actually be used.  We just need something there.
         else {
             // If the arguments are sequential, we can just pass a pointer to the first one.
-            
+
             bool sequential = true;
             for (int i = 1; i < args.size(); i++)
                 if (args[i] != args[i-1]+1)
@@ -148,12 +148,12 @@ void CompiledExpression::setVariableLocations(map<string, double*>& variableLoca
     variablePointers = variableLocations;
 #ifdef LEPTON_USE_JIT
     // Rebuild the JIT code.
-    
+
     if (workspace.size() > 0)
         generateJitCode();
 #else
     // Make a list of all variables we will need to copy before evaluating the expression.
-    
+
     variablesToCopy.clear();
     for (map<string, int>::const_iterator iter = variableIndices.begin(); iter != variableIndices.end(); ++iter) {
         map<string, double*>::iterator pointer = variablePointers.find(iter->first);
@@ -171,7 +171,7 @@ double CompiledExpression::evaluate() const {
         *variablesToCopy[i].first = *variablesToCopy[i].second;
 
     // Loop over the operations and evaluate each one.
-    
+
     for (int step = 0; step < operation.size(); step++) {
         const vector<int>& args = arguments[step];
         if (args.size() == 1)
@@ -202,9 +202,9 @@ void CompiledExpression::generateJitCode() {
         workspaceVar[i] = c.newXmmSd();
     X86Gp argsPointer = c.newIntPtr();
     c.mov(argsPointer, imm_ptr(&argValues[0]));
-    
+
     // Load the arguments into variables.
-    
+
     for (set<string>::const_iterator iter = variableNames.begin(); iter != variableNames.end(); ++iter) {
         map<string, int>::iterator index = variableIndices.find(*iter);
         X86Gp variablePointer = c.newIntPtr();
@@ -213,11 +213,11 @@ void CompiledExpression::generateJitCode() {
     }
 
     // Make a list of all constants that will be needed for evaluation.
-    
+
     vector<int> operationConstantIndex(operation.size(), -1);
     for (int step = 0; step < (int) operation.size(); step++) {
         // Find the constant value (if any) used by this operation.
-        
+
         Operation& op = *operation[step];
         double value;
         if (op.getId() == Operation::CONSTANT)
@@ -234,9 +234,9 @@ void CompiledExpression::generateJitCode() {
             value = 1.0;
         else
             continue;
-        
+
         // See if we already have a variable for this constant.
-        
+
         for (int i = 0; i < (int) constants.size(); i++)
             if (value == constants[i]) {
                 operationConstantIndex[step] = i;
@@ -247,9 +247,9 @@ void CompiledExpression::generateJitCode() {
             constants.push_back(value);
         }
     }
-    
+
     // Load constants into variables.
-    
+
     vector<X86Xmm> constantVar(constants.size());
     if (constants.size() > 0) {
         X86Gp constantsPointer = c.newIntPtr();
@@ -259,21 +259,21 @@ void CompiledExpression::generateJitCode() {
             c.movsd(constantVar[i], x86::ptr(constantsPointer, 8*i, 0));
         }
     }
-    
+
     // Evaluate the operations.
-    
+
     for (int step = 0; step < (int) operation.size(); step++) {
         Operation& op = *operation[step];
         vector<int> args = arguments[step];
         if (args.size() == 1) {
             // One or more sequential arguments.  Fill out the list.
-            
+
             for (int i = 1; i < op.getNumArguments(); i++)
                 args.push_back(args[0]+i);
         }
-        
+
         // Generate instructions to execute this operation.
-        
+
         switch (op.getId()) {
             case Operation::CONSTANT:
                 c.movsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
@@ -382,7 +382,7 @@ void CompiledExpression::generateJitCode() {
                 break;
             default:
                 // Just invoke evaluateOperation().
-                
+
                 for (int i = 0; i < (int) args.size(); i++)
                     c.movsd(x86::ptr(argsPointer, 8*i, 0), workspaceVar[args[i]]);
                 X86Gp fn = c.newIntPtr();
diff --git a/lib/lepton/src/MSVC_erfc.h b/lib/lepton/src/MSVC_erfc.h
index 2c6b619e89..b1cd87a289 100644
--- a/lib/lepton/src/MSVC_erfc.h
+++ b/lib/lepton/src/MSVC_erfc.h
@@ -3,7 +3,7 @@
 
 /*
  * Up to version 11 (VC++ 2012), Microsoft does not support the
- * standard C99 erf() and erfc() functions so we have to fake them here. 
+ * standard C99 erf() and erfc() functions so we have to fake them here.
  * These were added in version 12 (VC++ 2013), which sets _MSC_VER=1800
  * (VC11 has _MSC_VER=1700).
  */
@@ -15,7 +15,7 @@
 #endif
 
 #if defined(_MSC_VER)
-#if _MSC_VER <= 1700 // 1700 is VC11, 1800 is VC12 
+#if _MSC_VER <= 1700 // 1700 is VC11, 1800 is VC12
 /***************************
 *   erf.cpp
 *   author:  Steve Strand
diff --git a/lib/lepton/src/ParsedExpression.cpp b/lib/lepton/src/ParsedExpression.cpp
index 13ebbf2dc2..c6092a2dc2 100644
--- a/lib/lepton/src/ParsedExpression.cpp
+++ b/lib/lepton/src/ParsedExpression.cpp
@@ -121,10 +121,10 @@ ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const Expressio
     vector<ExpressionTreeNode> children(node.getChildren().size());
     for (int i = 0; i < (int) children.size(); i++)
         children[i] = substituteSimplerExpression(node.getChildren()[i]);
-    
+
     // Collect some info on constant expressions in children
     bool first_const = children.size() > 0 && isConstant(children[0]); // is first child constant?
-    bool second_const = children.size() > 1 && isConstant(children[1]); ; // is second child constant?   
+    bool second_const = children.size() > 1 && isConstant(children[1]); ; // is second child constant?
     double first, second; // if yes, value of first and second child
     if (first_const)
         first = getConstantValue(children[0]);
@@ -174,7 +174,7 @@ ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const Expressio
             break;
         }
         case Operation::MULTIPLY:
-        {   
+        {
             if ((first_const && first == 0.0) || (second_const && second == 0.0)) // Multiply by 0
                 return ExpressionTreeNode(new Operation::Constant(0.0));
             if (first_const && first == 1.0) // Multiply by 1
diff --git a/lib/lepton/src/Parser.cpp b/lib/lepton/src/Parser.cpp
index c0b4c185e8..e7d87ba289 100644
--- a/lib/lepton/src/Parser.cpp
+++ b/lib/lepton/src/Parser.cpp
@@ -66,7 +66,7 @@ private:
 
 string Parser::trim(const string& expression) {
     // Remove leading and trailing spaces.
-    
+
     int start, end;
     for (start = 0; start < (int) expression.size() && isspace(expression[start]); start++)
         ;

From 4293771ae8cb5e406f67007ea4e2ad378a7c8718 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 21 Dec 2022 21:53:25 -0500
Subject: [PATCH 12/79] silence compiler warnings

---
 lib/lepton/include/lepton/CustomFunction.h |  4 +-
 lib/lepton/include/lepton/Operation.h      | 78 ++++++++++----------
 lib/lepton/src/CompiledExpression.cpp      | 10 +--
 lib/lepton/src/ExpressionTreeNode.cpp      |  8 +-
 lib/lepton/src/Operation.cpp               | 86 +++++++++++-----------
 lib/lepton/src/ParsedExpression.cpp        |  2 +
 lib/lepton/src/Parser.cpp                  | 10 +--
 7 files changed, 100 insertions(+), 98 deletions(-)

diff --git a/lib/lepton/include/lepton/CustomFunction.h b/lib/lepton/include/lepton/CustomFunction.h
index c4b9932cd8..4b8121a87f 100644
--- a/lib/lepton/include/lepton/CustomFunction.h
+++ b/lib/lepton/include/lepton/CustomFunction.h
@@ -91,10 +91,10 @@ public:
     int getNumArguments() const {
         return numArgs;
     }
-    double evaluate(const double* arguments) const {
+    double evaluate(const double* ) const {
         return 0.0;
     }
-    double evaluateDerivative(const double* arguments, const int* derivOrder) const {
+    double evaluateDerivative(const double* , const int* ) const {
         return 0.0;
     }
     CustomFunction* clone() const {
diff --git a/lib/lepton/include/lepton/Operation.h b/lib/lepton/include/lepton/Operation.h
index 848910f6e4..bde9cfe37f 100644
--- a/lib/lepton/include/lepton/Operation.h
+++ b/lib/lepton/include/lepton/Operation.h
@@ -177,7 +177,7 @@ public:
     Operation* clone() const {
         return new Constant(value);
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* , const std::map<std::string, double>& ) const {
         return value;
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -208,7 +208,7 @@ public:
     Operation* clone() const {
         return new Variable(name);
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* , const std::map<std::string, double>& variables) const {
         std::map<std::string, double>::const_iterator iter = variables.find(name);
         if (iter == variables.end())
             throw Exception("No value specified for variable "+name);
@@ -253,7 +253,7 @@ public:
         clone->derivOrder = derivOrder;
         return clone;
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         if (isDerivative)
             return function->evaluateDerivative(args, &derivOrder[0]);
         return function->evaluate(args);
@@ -289,7 +289,7 @@ public:
     Operation* clone() const {
         return new Add();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]+args[1];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -317,7 +317,7 @@ public:
     Operation* clone() const {
         return new Subtract();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]-args[1];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -342,7 +342,7 @@ public:
     Operation* clone() const {
         return new Multiply();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]*args[1];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -370,7 +370,7 @@ public:
     Operation* clone() const {
         return new Divide();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]/args[1];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -395,7 +395,7 @@ public:
     Operation* clone() const {
         return new Power();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::pow(args[0], args[1]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -420,7 +420,7 @@ public:
     Operation* clone() const {
         return new Negate();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return -args[0];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -442,7 +442,7 @@ public:
     Operation* clone() const {
         return new Sqrt();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::sqrt(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -464,7 +464,7 @@ public:
     Operation* clone() const {
         return new Exp();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::exp(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -486,7 +486,7 @@ public:
     Operation* clone() const {
         return new Log();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::log(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -508,7 +508,7 @@ public:
     Operation* clone() const {
         return new Sin();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::sin(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -530,7 +530,7 @@ public:
     Operation* clone() const {
         return new Cos();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::cos(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -552,7 +552,7 @@ public:
     Operation* clone() const {
         return new Sec();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return 1.0/std::cos(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -574,7 +574,7 @@ public:
     Operation* clone() const {
         return new Csc();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return 1.0/std::sin(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -596,7 +596,7 @@ public:
     Operation* clone() const {
         return new Tan();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::tan(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -618,7 +618,7 @@ public:
     Operation* clone() const {
         return new Cot();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return 1.0/std::tan(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -640,7 +640,7 @@ public:
     Operation* clone() const {
         return new Asin();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::asin(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -662,7 +662,7 @@ public:
     Operation* clone() const {
         return new Acos();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::acos(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -684,7 +684,7 @@ public:
     Operation* clone() const {
         return new Atan();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::atan(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -706,7 +706,7 @@ public:
     Operation* clone() const {
         return new Atan2();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::atan2(args[0], args[1]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -728,7 +728,7 @@ public:
     Operation* clone() const {
         return new Sinh();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::sinh(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -750,7 +750,7 @@ public:
     Operation* clone() const {
         return new Cosh();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::cosh(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -772,7 +772,7 @@ public:
     Operation* clone() const {
         return new Tanh();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::tanh(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -834,7 +834,7 @@ public:
     Operation* clone() const {
         return new Step();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return (args[0] >= 0.0 ? 1.0 : 0.0);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -856,7 +856,7 @@ public:
     Operation* clone() const {
         return new Delta();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return (args[0] == 0.0 ? 1.0 : 0.0);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -878,7 +878,7 @@ public:
     Operation* clone() const {
         return new Square();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]*args[0];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -900,7 +900,7 @@ public:
     Operation* clone() const {
         return new Cube();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]*args[0]*args[0];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -922,7 +922,7 @@ public:
     Operation* clone() const {
         return new Reciprocal();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return 1.0/args[0];
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -946,7 +946,7 @@ public:
     Operation* clone() const {
         return new AddConstant(value);
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]+value;
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -979,7 +979,7 @@ public:
     Operation* clone() const {
         return new MultiplyConstant(value);
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return args[0]*value;
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -1014,7 +1014,7 @@ public:
     Operation* clone() const {
         return new PowerConstant(value);
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         if (isIntPower) {
             // Integer powers can be computed much more quickly by repeated multiplication.
 
@@ -1069,7 +1069,7 @@ public:
     Operation* clone() const {
         return new Min();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         // parens around (std::min) are workaround for horrible microsoft max/min macro trouble
         return (std::min)(args[0], args[1]);
     }
@@ -1092,7 +1092,7 @@ public:
     Operation* clone() const {
         return new Max();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         // parens around (std::min) are workaround for horrible microsoft max/min macro trouble
         return (std::max)(args[0], args[1]);
     }
@@ -1115,7 +1115,7 @@ public:
     Operation* clone() const {
         return new Abs();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::abs(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -1138,7 +1138,7 @@ public:
     Operation* clone() const {
         return new Floor();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::floor(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -1160,7 +1160,7 @@ public:
     Operation* clone() const {
         return new Ceil();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>& ) const {
         return std::ceil(args[0]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
@@ -1182,7 +1182,7 @@ public:
     Operation* clone() const {
         return new Select();
     }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+    double evaluate(double* args, const std::map<std::string, double>&) const {
         return (args[0] != 0.0 ? args[1] : args[2]);
     }
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
diff --git a/lib/lepton/src/CompiledExpression.cpp b/lib/lepton/src/CompiledExpression.cpp
index 67cf196ebb..c6c1543ce4 100644
--- a/lib/lepton/src/CompiledExpression.cpp
+++ b/lib/lepton/src/CompiledExpression.cpp
@@ -88,7 +88,7 @@ void CompiledExpression::compileExpression(const ExpressionTreeNode& node, vecto
     // Process the child nodes.
 
     vector<int> args;
-    for (int i = 0; i < node.getChildren().size(); i++) {
+    for (int i = 0; i < (int)node.getChildren().size(); i++) {
         compileExpression(node.getChildren()[i], temps);
         args.push_back(findTempIndex(node.getChildren()[i], temps));
     }
@@ -110,7 +110,7 @@ void CompiledExpression::compileExpression(const ExpressionTreeNode& node, vecto
             // If the arguments are sequential, we can just pass a pointer to the first one.
 
             bool sequential = true;
-            for (int i = 1; i < args.size(); i++)
+            for (int i = 1; i < (int)args.size(); i++)
                 if (args[i] != args[i-1]+1)
                     sequential = false;
             if (sequential)
@@ -167,17 +167,17 @@ double CompiledExpression::evaluate() const {
 #ifdef LEPTON_USE_JIT
     return jitCode();
 #else
-    for (int i = 0; i < variablesToCopy.size(); i++)
+    for (int i = 0; i < (int)variablesToCopy.size(); i++)
         *variablesToCopy[i].first = *variablesToCopy[i].second;
 
     // Loop over the operations and evaluate each one.
 
-    for (int step = 0; step < operation.size(); step++) {
+    for (int step = 0; step < (int)operation.size(); step++) {
         const vector<int>& args = arguments[step];
         if (args.size() == 1)
             workspace[target[step]] = operation[step]->evaluate(&workspace[args[0]], dummyVariables);
         else {
-            for (int i = 0; i < args.size(); i++)
+          for (int i = 0; i < (int)args.size(); i++)
                 argValues[i] = workspace[args[i]];
             workspace[target[step]] = operation[step]->evaluate(&argValues[0], dummyVariables);
         }
diff --git a/lib/lepton/src/ExpressionTreeNode.cpp b/lib/lepton/src/ExpressionTreeNode.cpp
index e4fbbc6f50..90020aa373 100644
--- a/lib/lepton/src/ExpressionTreeNode.cpp
+++ b/lib/lepton/src/ExpressionTreeNode.cpp
@@ -37,25 +37,25 @@ using namespace LMP_Lepton;
 using namespace std;
 
 ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const vector<ExpressionTreeNode>& children) : operation(operation), children(children) {
-    if (operation->getNumArguments() != children.size())
+  if (operation->getNumArguments() != (int)children.size())
         throw Exception("wrong number of arguments to function: "+operation->getName());
 }
 
 ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child1, const ExpressionTreeNode& child2) : operation(operation) {
     children.push_back(child1);
     children.push_back(child2);
-    if (operation->getNumArguments() != children.size())
+    if (operation->getNumArguments() != (int)children.size())
         throw Exception("wrong number of arguments to function: "+operation->getName());
 }
 
 ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child) : operation(operation) {
     children.push_back(child);
-    if (operation->getNumArguments() != children.size())
+    if (operation->getNumArguments() != (int)children.size())
         throw Exception("wrong number of arguments to function: "+operation->getName());
 }
 
 ExpressionTreeNode::ExpressionTreeNode(Operation* operation) : operation(operation) {
-    if (operation->getNumArguments() != children.size())
+  if (operation->getNumArguments() != (int)children.size())
         throw Exception("wrong number of arguments to function: "+operation->getName());
 }
 
diff --git a/lib/lepton/src/Operation.cpp b/lib/lepton/src/Operation.cpp
index 512f5db321..bec5686a74 100644
--- a/lib/lepton/src/Operation.cpp
+++ b/lib/lepton/src/Operation.cpp
@@ -37,25 +37,25 @@
 using namespace LMP_Lepton;
 using namespace std;
 
-double Operation::Erf::evaluate(double* args, const map<string, double>& variables) const {
+double Operation::Erf::evaluate(double* args, const map<string, double>& ) const {
     return erf(args[0]);
 }
 
-double Operation::Erfc::evaluate(double* args, const map<string, double>& variables) const {
+double Operation::Erfc::evaluate(double* args, const map<string, double>& ) const {
     return erfc(args[0]);
 }
 
-ExpressionTreeNode Operation::Constant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Constant::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& , const std::string& ) const {
     return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
-ExpressionTreeNode Operation::Variable::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Variable::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& , const std::string& variable) const {
     if (variable == name)
         return ExpressionTreeNode(new Operation::Constant(1.0));
     return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
-ExpressionTreeNode Operation::Custom::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Custom::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (function->getNumArguments() == 0)
         return ExpressionTreeNode(new Operation::Constant(0.0));
     ExpressionTreeNode result = ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::Custom(*this, 0), children), childDerivs[0]);
@@ -67,21 +67,21 @@ ExpressionTreeNode Operation::Custom::differentiate(const std::vector<Expression
     return result;
 }
 
-ExpressionTreeNode Operation::Add::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Add::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Add(), childDerivs[0], childDerivs[1]);
 }
 
-ExpressionTreeNode Operation::Subtract::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Subtract::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Subtract(), childDerivs[0], childDerivs[1]);
 }
 
-ExpressionTreeNode Operation::Multiply::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Multiply::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Add(),
                               ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1]),
                               ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]));
 }
 
-ExpressionTreeNode Operation::Divide::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Divide::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Divide(),
                               ExpressionTreeNode(new Operation::Subtract(),
                                                  ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]),
@@ -89,7 +89,7 @@ ExpressionTreeNode Operation::Divide::differentiate(const std::vector<Expression
                               ExpressionTreeNode(new Operation::Square(), children[1]));
 }
 
-ExpressionTreeNode Operation::Power::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Power::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Add(),
                               ExpressionTreeNode(new Operation::Multiply(),
                                                  ExpressionTreeNode(new Operation::Multiply(),
@@ -104,11 +104,11 @@ ExpressionTreeNode Operation::Power::differentiate(const std::vector<ExpressionT
                                                  childDerivs[1]));
 }
 
-ExpressionTreeNode Operation::Negate::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Negate::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Negate(), childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Sqrt::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Sqrt::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::MultiplyConstant(0.5),
                                                  ExpressionTreeNode(new Operation::Reciprocal(),
@@ -116,32 +116,32 @@ ExpressionTreeNode Operation::Sqrt::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Exp::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Exp::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Exp(), children[0]),
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Log::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Log::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Reciprocal(), children[0]),
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Sin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Sin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Cos(), children[0]),
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Cos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Cos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Negate(),
                                                  ExpressionTreeNode(new Operation::Sin(), children[0])),
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Sec::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Sec::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Multiply(),
                                                  ExpressionTreeNode(new Operation::Sec(), children[0]),
@@ -149,7 +149,7 @@ ExpressionTreeNode Operation::Sec::differentiate(const std::vector<ExpressionTre
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Csc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Csc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Negate(),
                                                  ExpressionTreeNode(new Operation::Multiply(),
@@ -158,14 +158,14 @@ ExpressionTreeNode Operation::Csc::differentiate(const std::vector<ExpressionTre
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Tan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Tan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Square(),
                                                  ExpressionTreeNode(new Operation::Sec(), children[0])),
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Cot::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Cot::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Negate(),
                                                  ExpressionTreeNode(new Operation::Square(),
@@ -173,7 +173,7 @@ ExpressionTreeNode Operation::Cot::differentiate(const std::vector<ExpressionTre
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Asin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Asin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Reciprocal(),
                                                  ExpressionTreeNode(new Operation::Sqrt(),
@@ -183,7 +183,7 @@ ExpressionTreeNode Operation::Asin::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Acos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Acos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Negate(),
                                                  ExpressionTreeNode(new Operation::Reciprocal(),
@@ -194,7 +194,7 @@ ExpressionTreeNode Operation::Acos::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Atan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Atan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Reciprocal(),
                                                  ExpressionTreeNode(new Operation::AddConstant(1.0),
@@ -202,7 +202,7 @@ ExpressionTreeNode Operation::Atan::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Atan2::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Atan2::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Divide(),
                               ExpressionTreeNode(new Operation::Subtract(),
                                                  ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]),
@@ -212,21 +212,21 @@ ExpressionTreeNode Operation::Atan2::differentiate(const std::vector<ExpressionT
                                                  ExpressionTreeNode(new Operation::Square(), children[1])));
 }
 
-ExpressionTreeNode Operation::Sinh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Sinh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Cosh(),
                                                  children[0]),
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Cosh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Cosh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Sinh(),
                                                  children[0]),
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Tanh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Tanh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Subtract(),
                                                  ExpressionTreeNode(new Operation::Constant(1.0)),
@@ -235,7 +235,7 @@ ExpressionTreeNode Operation::Tanh::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Erf::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Erf::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Multiply(),
                                                  ExpressionTreeNode(new Operation::Constant(2.0/sqrt(M_PI))),
@@ -245,7 +245,7 @@ ExpressionTreeNode Operation::Erf::differentiate(const std::vector<ExpressionTre
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Erfc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Erfc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Multiply(),
                                                  ExpressionTreeNode(new Operation::Constant(-2.0/sqrt(M_PI))),
@@ -255,29 +255,29 @@ ExpressionTreeNode Operation::Erfc::differentiate(const std::vector<ExpressionTr
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Step::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Step::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& , const std::string& ) const {
     return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
-ExpressionTreeNode Operation::Delta::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Delta::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& , const std::string& ) const {
     return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
-ExpressionTreeNode Operation::Square::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Square::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::MultiplyConstant(2.0),
                                                  children[0]),
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Cube::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Cube::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::MultiplyConstant(3.0),
                                                  ExpressionTreeNode(new Operation::Square(), children[0])),
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Reciprocal::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Reciprocal::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Negate(),
                                                  ExpressionTreeNode(new Operation::Reciprocal(),
@@ -285,16 +285,16 @@ ExpressionTreeNode Operation::Reciprocal::differentiate(const std::vector<Expres
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::AddConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::AddConstant::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return childDerivs[0];
 }
 
-ExpressionTreeNode Operation::MultiplyConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::MultiplyConstant::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::MultiplyConstant(value),
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::PowerConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::PowerConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::MultiplyConstant(value),
                                                  ExpressionTreeNode(new Operation::PowerConstant(value-1),
@@ -302,7 +302,7 @@ ExpressionTreeNode Operation::PowerConstant::differentiate(const std::vector<Exp
                               childDerivs[0]);
 }
 
-ExpressionTreeNode Operation::Min::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Min::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     ExpressionTreeNode step(new Operation::Step(),
                             ExpressionTreeNode(new Operation::Subtract(), children[0], children[1]));
     return ExpressionTreeNode(new Operation::Subtract(),
@@ -311,7 +311,7 @@ ExpressionTreeNode Operation::Min::differentiate(const std::vector<ExpressionTre
                                                  ExpressionTreeNode(new Operation::AddConstant(-1), step)));
 }
 
-ExpressionTreeNode Operation::Max::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Max::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     ExpressionTreeNode step(new Operation::Step(),
                             ExpressionTreeNode(new Operation::Subtract(), children[0], children[1]));
     return ExpressionTreeNode(new Operation::Subtract(),
@@ -320,7 +320,7 @@ ExpressionTreeNode Operation::Max::differentiate(const std::vector<ExpressionTre
                                                  ExpressionTreeNode(new Operation::AddConstant(-1), step)));
 }
 
-ExpressionTreeNode Operation::Abs::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Abs::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     ExpressionTreeNode step(new Operation::Step(), children[0]);
     return ExpressionTreeNode(new Operation::Multiply(),
                               childDerivs[0],
@@ -328,15 +328,15 @@ ExpressionTreeNode Operation::Abs::differentiate(const std::vector<ExpressionTre
                                                  ExpressionTreeNode(new Operation::MultiplyConstant(2), step)));
 }
 
-ExpressionTreeNode Operation::Floor::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Floor::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& , const std::string& ) const {
     return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
-ExpressionTreeNode Operation::Ceil::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Ceil::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& , const std::string& ) const {
     return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
-ExpressionTreeNode Operation::Select::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+ExpressionTreeNode Operation::Select::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     vector<ExpressionTreeNode> derivChildren;
     derivChildren.push_back(children[0]);
     derivChildren.push_back(childDerivs[1]);
diff --git a/lib/lepton/src/ParsedExpression.cpp b/lib/lepton/src/ParsedExpression.cpp
index c6092a2dc2..1417551011 100644
--- a/lib/lepton/src/ParsedExpression.cpp
+++ b/lib/lepton/src/ParsedExpression.cpp
@@ -288,11 +288,13 @@ ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const Expressio
         {
             if (children[0].getOperation().getId() == Operation::SQUARE) // sqrt(square(x)) = abs(x)
                 return ExpressionTreeNode(new Operation::Abs(), children[0].getChildren()[0]);
+            break;
         }
         case Operation::SQUARE:
         {
             if (children[0].getOperation().getId() == Operation::SQRT) // square(sqrt(x)) = x
                 return children[0].getChildren()[0];
+            break;
         }
         default:
         {
diff --git a/lib/lepton/src/Parser.cpp b/lib/lepton/src/Parser.cpp
index e7d87ba289..d094b8e4e4 100644
--- a/lib/lepton/src/Parser.cpp
+++ b/lib/lepton/src/Parser.cpp
@@ -178,7 +178,7 @@ ParsedExpression Parser::parse(const string& expression, const map<string, Custo
             vector<ParseToken> tokens = tokenize(subexpressions[i].substr(equalsPos+1));
             int pos = 0;
             subexpDefs[name] = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
-            if (pos != tokens.size())
+            if (pos != (int)tokens.size())
                 throw Exception("unexpected text at end of subexpression: "+tokens[pos].getText());
         }
 
@@ -187,7 +187,7 @@ ParsedExpression Parser::parse(const string& expression, const map<string, Custo
         vector<ParseToken> tokens = tokenize(primaryExpression);
         int pos = 0;
         ExpressionTreeNode result = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
-        if (pos != tokens.size())
+        if (pos != (int)tokens.size())
             throw Exception("unexpected text at end of expression: "+tokens[pos].getText());
         return ParsedExpression(result);
     }
@@ -198,7 +198,7 @@ ParsedExpression Parser::parse(const string& expression, const map<string, Custo
 
 ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int& pos, const map<string, CustomFunction*>& customFunctions,
             const map<string, ExpressionTreeNode>& subexpressionDefs, int precedence) {
-    if (pos == tokens.size())
+  if (pos == (int)tokens.size())
         throw Exception("unexpected end of expression");
 
     // Parse the next value (number, variable, function, parenthesized expression)
@@ -224,7 +224,7 @@ ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int
     else if (token.getType() == ParseToken::LeftParen) {
         pos++;
         result = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0);
-        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
+        if (pos == (int)tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
             throw Exception("unbalanced parentheses");
         pos++;
     }
@@ -238,7 +238,7 @@ ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int
             if (moreArgs)
                 pos++;
         } while (moreArgs);
-        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
+        if (pos == (int)tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
             throw Exception("unbalanced parentheses");
         pos++;
         Operation* op = getFunctionOperation(token.getText(), customFunctions);

From 46f514d2cabb20150502520546843d106ce98b58 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 21 Dec 2022 22:30:05 -0500
Subject: [PATCH 13/79] add support for writing binary restart files

---
 src/LEPTON/pair_lepton.cpp | 100 ++++++++++++++++++++++++++++++++++++-
 src/LEPTON/pair_lepton.h   |   4 ++
 2 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index 11587d637c..35264962e2 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -18,6 +18,7 @@
 #include "pair_lepton.h"
 
 #include "atom.h"
+#include "comm.h"
 #include "error.h"
 #include "force.h"
 #include "memory.h"
@@ -38,7 +39,7 @@ PairLepton::PairLepton(LAMMPS *lmp) : Pair(lmp), cut(nullptr), type2expression(n
   respa_enable = 0;
   single_enable = 1;
   writedata = 1;
-  restartinfo = 0;
+  restartinfo = 1;
   reinitflag = 0;
   cut_global = 0.0;
   centroidstressflag = CENTROID_SAME;
@@ -258,6 +259,103 @@ double PairLepton::init_one(int i, int j)
   return cut[i][j];
 }
 
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairLepton::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+
+  for (int i = 1; i <= atom->ntypes; i++)
+    for (int j = i; j <= atom->ntypes; j++) {
+      fwrite(&setflag[i][j], sizeof(int), 1, fp);
+      if (setflag[i][j]) {
+        fwrite(&cut[i][j], sizeof(double), 1, fp);
+        fwrite(&type2expression[i][j], sizeof(int), 1, fp);
+      }
+    }
+
+  int num = expressions.size();
+  int maxlen = 0;
+  for (const auto &exp : expressions) maxlen = MAX(maxlen, (int) exp.size());
+  ++maxlen;
+
+  fwrite(&num, sizeof(int), 1, fp);
+  fwrite(&maxlen, sizeof(int), 1, fp);
+  for (const auto &exp : expressions) {
+    int n = exp.size() + 1;
+    fwrite(&n, sizeof(int), 1, fp);
+    fwrite(exp.c_str(), sizeof(char), n, fp);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairLepton::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+
+  allocate();
+  expressions.clear();
+
+  const int me = comm->me;
+  for (int i = 1; i <= atom->ntypes; i++)
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (me == 0) utils::sfread(FLERR, &setflag[i][j], sizeof(int), 1, fp, nullptr, error);
+      MPI_Bcast(&setflag[i][j], 1, MPI_INT, 0, world);
+      if (setflag[i][j]) {
+        if (me == 0) {
+          utils::sfread(FLERR, &cut[i][j], sizeof(double), 1, fp, nullptr, error);
+          utils::sfread(FLERR, &type2expression[i][j], sizeof(int), 1, fp, nullptr, error);
+        }
+        MPI_Bcast(&cut[i][j], 1, MPI_DOUBLE, 0, world);
+        MPI_Bcast(&type2expression[i][j], 1, MPI_INT, 0, world);
+      }
+    }
+
+  int num, maxlen, len;
+  if (me == 0) {
+    utils::sfread(FLERR, &num, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &maxlen, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&num, 1, MPI_INT, 0, world);
+  MPI_Bcast(&maxlen, 1, MPI_INT, 0, world);
+  char *buf = new char[maxlen];
+
+  for (int i = 0; i < num; ++i) {
+    if (me == 0) {
+      utils::sfread(FLERR, &len, sizeof(int), 1, fp, nullptr, error);
+      utils::sfread(FLERR, buf, sizeof(char), len, fp, nullptr, error);
+    }
+    MPI_Bcast(buf, maxlen, MPI_CHAR, 0, world);
+    expressions.push_back(buf);
+  }
+
+  delete[] buf;
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairLepton::write_restart_settings(FILE *fp)
+{
+  fwrite(&cut_global, sizeof(double), 1, fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairLepton::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) { utils::sfread(FLERR, &cut_global, sizeof(double), 1, fp, nullptr, error); }
+  MPI_Bcast(&cut_global, 1, MPI_DOUBLE, 0, world);
+}
+
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
diff --git a/src/LEPTON/pair_lepton.h b/src/LEPTON/pair_lepton.h
index dd1f40d1d3..c6d62c1adc 100644
--- a/src/LEPTON/pair_lepton.h
+++ b/src/LEPTON/pair_lepton.h
@@ -42,6 +42,10 @@ class PairLepton : public Pair {
   void settings(int, char **) override;
   void coeff(int, char **) override;
   double init_one(int, int) override;
+  void write_restart(FILE *) override;
+  void read_restart(FILE *) override;
+  void write_restart_settings(FILE *) override;
+  void read_restart_settings(FILE *) override;
   void write_data(FILE *) override;
   void write_data_all(FILE *) override;
   double single(int, int, int, int, double, double, double, double &) override;

From 966211bb53ccd5abe91de6e01cba00e6414fe74e Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 02:13:28 -0500
Subject: [PATCH 14/79] avoid conflicting names

---
 src/LEPTON/pair_lepton.cpp     | 42 ++++++++++++++++++----------------
 src/LEPTON/pair_lepton.h       |  6 ++---
 src/OPENMP/pair_lepton_omp.cpp | 16 ++++++-------
 3 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index 35264962e2..c2f8b35b9f 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -26,6 +26,7 @@
 #include "update.h"
 
 #include <cctype>
+#include <cmath>
 #include <cstring>
 
 #include "LMP_Lepton.h"
@@ -98,12 +99,12 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
   const int *const *const firstneigh = list->firstneigh;
   double fxtmp, fytmp, fztmp;
 
-  std::vector<LMP_Lepton::CompiledExpression> force;
-  std::vector<LMP_Lepton::CompiledExpression> epot;
+  std::vector<LMP_Lepton::CompiledExpression> pairforce;
+  std::vector<LMP_Lepton::CompiledExpression> pairpot;
   for (const auto &expr : expressions) {
     auto parsed = LMP_Lepton::Parser::parse(expr);
-    force.emplace_back(parsed.differentiate("r").createCompiledExpression());
-    if (EFLAG) epot.emplace_back(parsed.createCompiledExpression());
+    pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+    if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
   }
 
   // loop over neighbors of my atoms
@@ -132,9 +133,9 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
       if (rsq < cutsq[itype][jtype]) {
         const double r = sqrt(rsq);
         const int idx = type2expression[itype][jtype];
-        double &r_for = force[idx].getVariableReference("r");
+        double &r_for = pairforce[idx].getVariableReference("r");
         r_for = r;
-        const double fpair = -force[idx].evaluate() / r * factor_lj;
+        const double fpair = -pairforce[idx].evaluate() / r * factor_lj;
 
         fxtmp += delx * fpair;
         fytmp += dely * fpair;
@@ -147,9 +148,9 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
 
         double evdwl = 0.0;
         if (EFLAG) {
-          double &r_pot = epot[idx].getVariableReference("r");
+          double &r_pot = pairpot[idx].getVariableReference("r");
           r_pot = r;
-          evdwl = factor_lj * epot[idx].evaluate();
+          evdwl = factor_lj * pairpot[idx].evaluate();
         }
 
         if (EVFLAG) ev_tally(i, j, nlocal, NEWTON_PAIR, evdwl, 0.0, fpair, delx, dely, delz);
@@ -214,13 +215,13 @@ void PairLepton::coeff(int narg, char **arg)
 
   try {
     auto parsed = LMP_Lepton::Parser::parse(exp_one);
-    auto epot = parsed.createCompiledExpression();
-    auto force = parsed.differentiate("r").createCompiledExpression();
-    double &r_pot = epot.getVariableReference("r");
-    double &r_for = force.getVariableReference("r");
+    auto pairpot = parsed.createCompiledExpression();
+    auto pairforce = parsed.differentiate("r").createCompiledExpression();
+    double &r_pot = pairpot.getVariableReference("r");
+    double &r_for = pairforce.getVariableReference("r");
     r_for = r_pot = 1.0;
-    epot.evaluate();
-    force.evaluate();
+    pairpot.evaluate();
+    pairforce.evaluate();
   } catch (std::exception &e) {
     error->all(FLERR, e.what());
   }
@@ -323,6 +324,7 @@ void PairLepton::read_restart(FILE *fp)
   }
   MPI_Bcast(&num, 1, MPI_INT, 0, world);
   MPI_Bcast(&maxlen, 1, MPI_INT, 0, world);
+
   char *buf = new char[maxlen];
 
   for (int i = 0; i < num; ++i) {
@@ -383,14 +385,14 @@ double PairLepton::single(int /* i */, int /* j */, int itype, int jtype, double
                           double /* factor_coul */, double factor_lj, double &fforce)
 {
   auto parsed = LMP_Lepton::Parser::parse(expressions[type2expression[itype][jtype]]);
-  auto epot = parsed.createCompiledExpression();
-  auto force = parsed.differentiate("r").createCompiledExpression();
+  auto pairpot = parsed.createCompiledExpression();
+  auto pairforce = parsed.differentiate("r").createCompiledExpression();
 
   double r = sqrt(rsq);
-  double &r_pot = epot.getVariableReference("r");
-  double &r_for = force.getVariableReference("r");
+  double &r_pot = pairpot.getVariableReference("r");
+  double &r_for = pairforce.getVariableReference("r");
 
   r_pot = r_for = r;
-  fforce = -force.evaluate() / r * factor_lj;
-  return epot.evaluate() * factor_lj;
+  fforce = -pairforce.evaluate() / r * factor_lj;
+  return pairpot.evaluate() * factor_lj;
 }
diff --git a/src/LEPTON/pair_lepton.h b/src/LEPTON/pair_lepton.h
index c6d62c1adc..6a55695bc3 100644
--- a/src/LEPTON/pair_lepton.h
+++ b/src/LEPTON/pair_lepton.h
@@ -56,13 +56,11 @@ class PairLepton : public Pair {
   int **type2expression;
   double cut_global;
 
+  virtual void allocate();
+
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void eval();
-
-  virtual void allocate();
 };
-
 }    // namespace LAMMPS_NS
-
 #endif
 #endif
diff --git a/src/OPENMP/pair_lepton_omp.cpp b/src/OPENMP/pair_lepton_omp.cpp
index 35f8e2c89d..2acb9e8b9f 100644
--- a/src/OPENMP/pair_lepton_omp.cpp
+++ b/src/OPENMP/pair_lepton_omp.cpp
@@ -93,12 +93,12 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr)
   const int *const *const firstneigh = list->firstneigh;
   double fxtmp, fytmp, fztmp;
 
-  std::vector<LMP_Lepton::CompiledExpression> force;
-  std::vector<LMP_Lepton::CompiledExpression> epot;
+  std::vector<LMP_Lepton::CompiledExpression> pairforce;
+  std::vector<LMP_Lepton::CompiledExpression> pairpot;
   for (const auto &expr : expressions) {
     auto parsed = LMP_Lepton::Parser::parse(expr);
-    force.emplace_back(parsed.differentiate("r").createCompiledExpression());
-    if (EFLAG) epot.emplace_back(parsed.createCompiledExpression());
+    pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+    if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
   }
 
   // loop over neighbors of my atoms
@@ -127,9 +127,9 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr)
       if (rsq < cutsq[itype][jtype]) {
         const double r = sqrt(rsq);
         const int idx = type2expression[itype][jtype];
-        double &r_for = force[idx].getVariableReference("r");
+        double &r_for = pairforce[idx].getVariableReference("r");
         r_for = r;
-        const double fpair = -force[idx].evaluate() / r * factor_lj;
+        const double fpair = -pairforce[idx].evaluate() / r * factor_lj;
 
         fxtmp += delx * fpair;
         fytmp += dely * fpair;
@@ -142,9 +142,9 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr)
 
         double evdwl = 0.0;
         if (EFLAG) {
-          double &r_pot = epot[idx].getVariableReference("r");
+          double &r_pot = pairpot[idx].getVariableReference("r");
           r_pot = r;
-          evdwl = factor_lj * epot[idx].evaluate();
+          evdwl = factor_lj * pairpot[idx].evaluate();
         }
 
         if (EVFLAG)

From 5da8242690f837f8380c691b87cb05b70fe4df3e Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 02:13:51 -0500
Subject: [PATCH 15/79] add bond style lepton

---
 src/LEPTON/bond_lepton.cpp                   | 322 +++++++++++++++++++
 src/LEPTON/bond_lepton.h                     |  52 +++
 src/OPENMP/bond_lepton_omp.cpp               | 148 +++++++++
 src/OPENMP/bond_lepton_omp.h                 |  44 +++
 unittest/force-styles/tests/bond-lepton.yaml |  90 ++++++
 5 files changed, 656 insertions(+)
 create mode 100644 src/LEPTON/bond_lepton.cpp
 create mode 100644 src/LEPTON/bond_lepton.h
 create mode 100644 src/OPENMP/bond_lepton_omp.cpp
 create mode 100644 src/OPENMP/bond_lepton_omp.h
 create mode 100644 unittest/force-styles/tests/bond-lepton.yaml

diff --git a/src/LEPTON/bond_lepton.cpp b/src/LEPTON/bond_lepton.cpp
new file mode 100644
index 0000000000..ad2ac0af12
--- /dev/null
+++ b/src/LEPTON/bond_lepton.cpp
@@ -0,0 +1,322 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "bond_lepton.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+
+#include <cctype>
+#include <cmath>
+#include <cstring>
+
+#include "LMP_Lepton.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+BondLepton::BondLepton(LAMMPS *_lmp) : Bond(_lmp), r0(nullptr), type2expression(nullptr)
+{
+  writedata = 1;
+  reinitflag = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+BondLepton::~BondLepton()
+{
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(r0);
+    memory->destroy(type2expression);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void BondLepton::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+  ev_init(eflag, vflag);
+  if (evflag) {
+    if (eflag) {
+      if (force->newton_bond)
+        eval<1, 1, 1>();
+      else
+        eval<1, 1, 0>();
+    } else {
+      if (force->newton_bond)
+        eval<1, 0, 1>();
+      else
+        eval<1, 0, 0>();
+    }
+  } else {
+    if (force->newton_bond)
+      eval<0, 0, 1>();
+    else
+      eval<0, 0, 0>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+template <int EVFLAG, int EFLAG, int NEWTON_BOND> void BondLepton::eval()
+{
+  std::vector<LMP_Lepton::CompiledExpression> bondforce;
+  std::vector<LMP_Lepton::CompiledExpression> bondpot;
+  for (const auto &expr : expressions) {
+    auto parsed = LMP_Lepton::Parser::parse(expr);
+    bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+    if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression());
+  }
+
+  const double *const *const x = atom->x;
+  double *const *const f = atom->f;
+  const int *const *const bondlist = neighbor->bondlist;
+  const int nbondlist = neighbor->nbondlist;
+  const int nlocal = atom->nlocal;
+
+  for (int n = 0; n < nbondlist; n++) {
+    const int i1 = bondlist[n][0];
+    const int i2 = bondlist[n][1];
+    const int type = bondlist[n][2];
+
+    const double delx = x[i1][0] - x[i2][0];
+    const double dely = x[i1][1] - x[i2][1];
+    const double delz = x[i1][2] - x[i2][2];
+
+    const double rsq = delx * delx + dely * dely + delz * delz;
+    const double r = sqrt(rsq);
+    const double dr = r - r0[type];
+    const int idx = type2expression[type];
+
+    // force and energy
+
+    double fbond = 0.0;
+    if (r > 0.0) {
+      double &r_for = bondforce[idx].getVariableReference("r");
+      r_for = dr;
+      fbond = -bondforce[idx].evaluate() / r;
+    }
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx * fbond;
+      f[i1][1] += dely * fbond;
+      f[i1][2] += delz * fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx * fbond;
+      f[i2][1] -= dely * fbond;
+      f[i2][2] -= delz * fbond;
+    }
+
+    double ebond = 0.0;
+    if (EFLAG) {
+      double &r_pot = bondpot[idx].getVariableReference("r");
+      r_pot = dr;
+      ebond = bondpot[idx].evaluate();
+    }
+    if (EVFLAG) ev_tally(i1, i2, nlocal, NEWTON_BOND, ebond, fbond, delx, dely, delz);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void BondLepton::allocate()
+{
+  allocated = 1;
+  const int np1 = atom->nbondtypes + 1;
+
+  memory->create(r0, np1, "bond:r0");
+  memory->create(type2expression, np1, "bond:type2expression");
+  memory->create(setflag, np1, "bond:setflag");
+  for (int i = 1; i < np1; i++) setflag[i] = 0;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one type
+------------------------------------------------------------------------- */
+
+void BondLepton::coeff(int narg, char **arg)
+{
+  if (narg != 3) error->all(FLERR, "Incorrect number of args for bond coefficients");
+  if (!allocated) allocate();
+
+  int ilo, ihi;
+  utils::bounds(FLERR, arg[0], 1, atom->nbondtypes, ilo, ihi, error);
+
+  double r0_one = utils::numeric(FLERR, arg[1], false, lmp);
+
+  // remove whitespace and quotes from expression string and then
+  // check if the expression can be parsed and evaluated without error
+  std::string exp_one;
+  for (const auto &c : std::string(arg[2]))
+    if (!isspace(c) && (c != '"') && (c != '\'')) exp_one.push_back(c);
+
+  try {
+    auto parsed = LMP_Lepton::Parser::parse(exp_one);
+    auto bondpot = parsed.createCompiledExpression();
+    auto bondforce = parsed.differentiate("r").createCompiledExpression();
+    double &r_pot = bondpot.getVariableReference("r");
+    double &r_for = bondforce.getVariableReference("r");
+    r_for = r_pot = 1.0;
+    bondpot.evaluate();
+    bondforce.evaluate();
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  std::size_t idx = 0;
+  for (const auto &exp : expressions) {
+    if (exp == exp_one) break;
+    ++idx;
+  }
+
+  // not found, add to list
+  if ((expressions.size() == 0) || (idx == expressions.size())) expressions.push_back(exp_one);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    r0[i] = r0_one;
+    type2expression[i] = idx;
+    setflag[i] = 1;
+    count++;
+  }
+
+  if (count == 0) error->all(FLERR, "Incorrect args for bond coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   return an equilbrium bond length
+------------------------------------------------------------------------- */
+
+double BondLepton::equilibrium_distance(int i)
+{
+  return r0[i];
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void BondLepton::write_restart(FILE *fp)
+{
+  fwrite(&r0[1], sizeof(double), atom->nbondtypes, fp);
+  fwrite(&type2expression[1], sizeof(int), atom->nbondtypes, fp);
+
+  int num = expressions.size();
+  int maxlen = 0;
+  for (const auto &exp : expressions) maxlen = MAX(maxlen, (int) exp.size());
+  ++maxlen;
+
+  fwrite(&num, sizeof(int), 1, fp);
+  fwrite(&maxlen, sizeof(int), 1, fp);
+  for (const auto &exp : expressions) {
+    int n = exp.size() + 1;
+    fwrite(&n, sizeof(int), 1, fp);
+    fwrite(exp.c_str(), sizeof(char), n, fp);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void BondLepton::read_restart(FILE *fp)
+{
+  allocate();
+
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &r0[1], sizeof(double), atom->nbondtypes, fp, nullptr, error);
+    utils::sfread(FLERR, &type2expression[1], sizeof(int), atom->nbondtypes, fp, nullptr, error);
+  }
+  MPI_Bcast(&r0[1], atom->nbondtypes, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&type2expression[1], atom->nbondtypes, MPI_INT, 0, world);
+  for (int i = 1; i <= atom->nbondtypes; i++) setflag[i] = 1;
+
+  int num, maxlen, len;
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &num, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &maxlen, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&num, 1, MPI_INT, 0, world);
+  MPI_Bcast(&maxlen, 1, MPI_INT, 0, world);
+
+  char *buf = new char[maxlen];
+
+  for (int i = 0; i < num; ++i) {
+    if (comm->me == 0) {
+      utils::sfread(FLERR, &len, sizeof(int), 1, fp, nullptr, error);
+      utils::sfread(FLERR, buf, sizeof(char), len, fp, nullptr, error);
+    }
+    MPI_Bcast(buf, maxlen, MPI_CHAR, 0, world);
+    expressions.push_back(buf);
+  }
+
+  delete[] buf;
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to data file
+------------------------------------------------------------------------- */
+
+void BondLepton::write_data(FILE *fp)
+{
+  for (int i = 1; i <= atom->nbondtypes; i++)
+    fprintf(fp, "%d %g %s\n", i, r0[i], expressions[type2expression[i]].c_str());
+}
+
+/* ---------------------------------------------------------------------- */
+
+double BondLepton::single(int type, double rsq, int /*i*/, int /*j*/, double &fforce)
+{
+  const double r = sqrt(rsq);
+  const double dr = r - r0[type];
+
+  auto parsed = LMP_Lepton::Parser::parse(expressions[type2expression[type]]);
+  auto bondpot = parsed.createCompiledExpression();
+  auto bondforce = parsed.differentiate("r").createCompiledExpression();
+  double &r_for = bondforce.getVariableReference("r");
+  double &r_pot = bondpot.getVariableReference("r");
+  r_for = r_pot = dr;
+
+  // force and energy
+
+  fforce = 0.0;
+  double ebond = 0.0;
+  if (r > 0.0) {
+    fforce = -bondforce.evaluate() / r;
+    ebond = bondpot.evaluate();
+  }
+  return ebond;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void *BondLepton::extract(const char *str, int &dim)
+{
+  dim = 1;
+  if (strcmp(str, "r0") == 0) return (void *) r0;
+  return nullptr;
+}
diff --git a/src/LEPTON/bond_lepton.h b/src/LEPTON/bond_lepton.h
new file mode 100644
index 0000000000..5c430b7e63
--- /dev/null
+++ b/src/LEPTON/bond_lepton.h
@@ -0,0 +1,52 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+// clang-format off
+BondStyle(lepton,BondLepton);
+// clang-format on
+#else
+
+#ifndef LMP_BOND_LEPTON_H
+#define LMP_BOND_LEPTON_H
+
+#include "bond.h"
+
+namespace LAMMPS_NS {
+
+class BondLepton : public Bond {
+ public:
+  BondLepton(class LAMMPS *);
+  ~BondLepton() override;
+  void compute(int, int) override;
+  void coeff(int, char **) override;
+  double equilibrium_distance(int) override;
+  void write_restart(FILE *) override;
+  void read_restart(FILE *) override;
+  void write_data(FILE *) override;
+  double single(int, double, int, int, double &) override;
+  void *extract(const char *, int &) override;
+
+ protected:
+  std::vector<std::string> expressions;
+  double *r0;
+  int *type2expression;
+
+  virtual void allocate();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void eval();
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/OPENMP/bond_lepton_omp.cpp b/src/OPENMP/bond_lepton_omp.cpp
new file mode 100644
index 0000000000..3171aaa51c
--- /dev/null
+++ b/src/OPENMP/bond_lepton_omp.cpp
@@ -0,0 +1,148 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "bond_lepton_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "omp_compat.h"
+
+#include <cmath>
+
+#include "LMP_Lepton.h"
+#include "suffix.h"
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+BondLeptonOMP::BondLeptonOMP(class LAMMPS *lmp) : BondLepton(lmp), ThrOMP(lmp, THR_BOND)
+{
+  suffix_flag |= Suffix::OMP;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void BondLeptonOMP::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag, vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
+
+    if (inum > 0) {
+      if (evflag) {
+        if (eflag) {
+          if (force->newton_bond)
+            eval<1, 1, 1>(ifrom, ito, thr);
+          else
+            eval<1, 1, 0>(ifrom, ito, thr);
+        } else {
+          if (force->newton_bond)
+            eval<1, 0, 1>(ifrom, ito, thr);
+          else
+            eval<1, 0, 0>(ifrom, ito, thr);
+        }
+      } else {
+        if (force->newton_bond)
+          eval<0, 0, 1>(ifrom, ito, thr);
+        else
+          eval<0, 0, 0>(ifrom, ito, thr);
+      }
+    }
+    thr->timer(Timer::BOND);
+    reduce_thr(this, eflag, vflag, thr);
+  }    // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
+{
+  std::vector<LMP_Lepton::CompiledExpression> bondforce;
+  std::vector<LMP_Lepton::CompiledExpression> bondpot;
+  for (const auto &expr : expressions) {
+    auto parsed = LMP_Lepton::Parser::parse(expr);
+    bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+    if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression());
+  }
+
+  const auto *_noalias const x = (dbl3_t *) atom->x[0];
+  auto *_noalias const f = (dbl3_t *) thr->get_f()[0];
+  const int3_t *_noalias const bondlist = (int3_t *) neighbor->bondlist[0];
+  const int nlocal = atom->nlocal;
+
+  for (int n = nfrom; n < nto; n++) {
+    const int i1 = bondlist[n].a;
+    const int i2 = bondlist[n].b;
+    const int type = bondlist[n].t;
+
+    const double delx = x[i1].x - x[i2].x;
+    const double dely = x[i1].y - x[i2].y;
+    const double delz = x[i1].z - x[i2].z;
+
+    const double rsq = delx * delx + dely * dely + delz * delz;
+    const double r = sqrt(rsq);
+    const double dr = r - r0[type];
+    const int idx = type2expression[type];
+
+    // force and energy
+
+    double fbond = 0.0;
+    if (r > 0.0) {
+      double &r_for = bondforce[idx].getVariableReference("r");
+      r_for = dr;
+      fbond = -bondforce[idx].evaluate() / r;
+    }
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1].x += delx * fbond;
+      f[i1].y += dely * fbond;
+      f[i1].z += delz * fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2].x -= delx * fbond;
+      f[i2].y -= dely * fbond;
+      f[i2].z -= delz * fbond;
+    }
+
+    double ebond = 0.0;
+    if (EFLAG) {
+      double &r_pot = bondpot[idx].getVariableReference("r");
+      r_pot = dr;
+      ebond = bondpot[idx].evaluate();
+    }
+    if (EVFLAG)
+      ev_tally_thr(this, i1, i2, nlocal, NEWTON_BOND, ebond, fbond, delx, dely, delz, thr);
+  }
+}
diff --git a/src/OPENMP/bond_lepton_omp.h b/src/OPENMP/bond_lepton_omp.h
new file mode 100644
index 0000000000..bdcc36434e
--- /dev/null
+++ b/src/OPENMP/bond_lepton_omp.h
@@ -0,0 +1,44 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+// clang-format off
+BondStyle(lepton/omp,BondLeptonOMP);
+// clang-format on
+#else
+
+#ifndef LMP_BOND_LEPTON_OMP_H
+#define LMP_BOND_LEPTON_OMP_H
+
+#include "bond_lepton.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondLeptonOMP : public BondLepton, public ThrOMP {
+
+ public:
+  BondLeptonOMP(class LAMMPS *lmp);
+  void compute(int, int) override;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData *const thr);
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/unittest/force-styles/tests/bond-lepton.yaml b/unittest/force-styles/tests/bond-lepton.yaml
new file mode 100644
index 0000000000..a220dd8b0c
--- /dev/null
+++ b/unittest/force-styles/tests/bond-lepton.yaml
@@ -0,0 +1,90 @@
+---
+lammps_version: 3 Nov 2022
+tags: generated
+date_generated: Thu Dec 22 02:03:59 2022
+epsilon: 2.5e-13
+skip_tests:
+prerequisites: ! |
+  atom full
+  bond lepton
+pre_commands: ! ""
+post_commands: ! ""
+input_file: in.fourmol
+bond_style: lepton
+bond_coeff: ! |
+  1 1.5 "k*r^2; k=250.0"
+  2 1.1 "k2*r^2 + k3*r^3 + k4*r^4; k2=300.0; k3=-100.0; k4=50.0"
+  3 1.3 "k*r^2; k=350.0"
+  4 1.2 "k*(r-0.2)^2; k=500.0"
+  5 1.0 "k*r^2; k=450.0"
+equilibrium: 5 1.5 1.1 1.3 1.2 1
+extract: ! |
+  r0 1
+natoms: 29
+init_energy: 38.295825321689215
+init_stress: ! |-
+  -4.7778964706834920e+01 -9.3066674567350432e+01  3.4789470658440035e+02 -3.0023920169312170e+01 -8.0421418879842847e+01  5.8592449335969732e+01
+init_forces: ! |2
+    1 -5.9149914305071416e+00 -3.7728809612345245e+01 -2.7769433362963369e+01
+    2 -9.4281609567839944e+00 -7.7586487054273015e+00  1.1096676787527940e+01
+    3  3.2211742366572125e+01  2.7682361264425523e+01 -7.0109911672970497e+00
+    4  4.9260777576375503e+00 -1.3809750102765932e+00  3.4951785613141868e+00
+    5 -1.2606902198593501e+00 -1.9373397933007170e+00  6.4372463095041841e+00
+    6 -3.8858476307965482e+01  6.8567296300319640e+01  1.9889888806614337e+02
+    7  7.5297927100028144e+00 -3.8622600737556944e+01 -1.9268793182212875e+02
+    8  1.3018665172824681e+01 -1.2902789438539877e+01  3.2406676637830003e+00
+    9  7.4343536239661590e-01  8.0072549738604493e-01  3.2899591078538779e+00
+   10  6.1558871886113291e+00 -2.2419470219698296e+00  1.0080175092279852e+01
+   11 -3.7020922615305768e-01 -9.1704102274126453e-01 -1.5046795827370363e+00
+   12  5.2437190958790678e+00  3.4225915524442998e+00 -2.5523597276998897e+00
+   13 -1.1277007635800260e+01  4.4610677459696646e+00  2.1195215396108269e-01
+   14  2.9813926585641828e+00 -6.0667387499775116e-01  7.7317115100728788e+00
+   15  2.5872825164662799e-01 -9.9415365173790704e+00 -3.5428115826174169e+00
+   16  5.2775953236493464e+01 -3.1855535724919463e+01 -1.6524229620195118e+02
+   17 -5.8735858023559175e+01  4.0959855098908882e+01  1.5582804819495431e+02
+   18 -9.0963607969319646e+00 -4.3343406270234155e+00 -1.7623055551859267e+01
+   19  1.2597490501067170e+01  8.0591915019111742e+00  1.5261489294231819e+01
+   20 -3.5011297041352050e+00 -3.7248508748877587e+00  2.3615662576274494e+00
+   21 -1.5332952658285048e+00  5.9630208068632040e-01 -7.4967230017303281e+00
+   22  4.2380253233105529e+00  1.0270453290850614e+00  6.6489894421385651e+00
+   23 -2.7047300574820481e+00 -1.6233474097713818e+00  8.4773355959176278e-01
+   24 -6.6588083188726532e+00  3.5110922792825918e+00 -6.5625174267043489e+00
+   25  7.9844426562464141e+00 -1.2853795683286129e+00  6.7123710742192300e+00
+   26 -1.3256343373737607e+00 -2.2257127109539789e+00 -1.4985364751488087e-01
+   27  6.6999960289138851e+00  6.3808952243186141e+00  2.0100808779497248e+00
+   28 -8.8466157439236681e-01  3.8018717064230995e-01 -5.9857060538593476e-01
+   29 -5.8153344545215182e+00 -6.7610823949609244e+00 -1.4115102725637900e+00
+run_energy: 37.78424389351509
+run_stress: ! |-
+  -4.6127506998693484e+01 -9.2129732247211749e+01  3.4548310342284810e+02 -2.9841348469661163e+01 -7.8434962689387717e+01  5.9253167412123155e+01
+run_forces: ! |2
+    1 -5.8451208652159004e+00 -3.7483084455000643e+01 -2.7706576989352534e+01
+    2 -9.4646964278974774e+00 -7.8058897724822449e+00  1.1098831256058579e+01
+    3  3.1827086102630346e+01  2.7573911030624821e+01 -6.9576662575837211e+00
+    4  5.1502169659901655e+00 -1.4367546726785101e+00  3.6631301025186187e+00
+    5 -1.2208420775139264e+00 -1.8781699435112362e+00  6.2332639085051911e+00
+    6 -3.8491523409043303e+01  6.8063273218541468e+01  1.9723141045830272e+02
+    7  7.4838209349394775e+00 -3.8394258853636330e+01 -1.9092625515909930e+02
+    8  1.2676329319901857e+01 -1.2475162287097550e+01  3.3659783337736577e+00
+    9  6.8845241565874460e-01  7.3814593866184031e-01  3.0434095400342533e+00
+   10  6.2545583994797553e+00 -2.9600470917047201e+00  9.4247125735981765e+00
+   11 -1.9554747834212524e-01 -4.8434314068172696e-01 -7.9452259566032057e-01
+   12  5.2092795750960841e+00  3.1431929551776721e+00 -3.1346654851373348e+00
+   13 -1.1496483840617872e+01  4.5245217971580018e+00  2.1348220240918236e-01
+   14  3.1913399826660909e+00 -6.3760720126489068e-01  8.2740980433927742e+00
+   15  2.7338564489784484e-01 -9.7206665011069671e+00 -3.4841809697094543e+00
+   16  5.2461611410918316e+01 -3.1639255494702798e+01 -1.6483607587596811e+02
+   17 -5.8501866653548078e+01  4.0872194473703807e+01  1.5529162691391761e+02
+   18 -7.0990354207248405e+00 -2.4743922643289666e+00 -1.7824398936159682e+01
+   19  1.2019842510974870e+01  7.7105128268768715e+00  1.4523712108141252e+01
+   20 -4.9208070902500296e+00 -5.2361205625479048e+00  3.3006868280184283e+00
+   21 -1.8548628650934149e+00  2.7467524264262122e-01 -6.7601469408617412e+00
+   22  3.9136757840663186e+00  9.5561415744904055e-01  6.1181929861632272e+00
+   23 -2.0588129189729036e+00 -1.2302894000916618e+00  6.4195395469851357e-01
+   24 -5.7681973234153086e+00  2.0209144998436366e+00 -5.2864044021513967e+00
+   25  6.3696975292216704e+00 -1.0109756418053095e+00  5.3564043759405795e+00
+   26 -6.0150020580636188e-01 -1.0099388580383271e+00 -6.9999973789182365e-02
+   27  6.8467535469188450e+00  5.7500299184200578e+00  2.2775780974490298e+00
+   28 -1.3929430925479587e+00  5.9772788540443345e-01 -9.4056106886485980e-01
+   29 -5.4538104543708865e+00 -6.3477578038244911e+00 -1.3370170285841700e+00
+...

From 4cbe8b353be849f1b62302ac6698f81cdce8a406 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 05:37:59 -0500
Subject: [PATCH 16/79] move shared functionality to utility function added to
 Lepton library

---
 lib/lepton/include/LMP_Lepton.h |  8 ++++++++
 lib/lepton/src/Utils.cpp        | 31 +++++++++++++++++++++++++++++++
 src/LEPTON/bond_lepton.cpp      | 12 +++++-------
 src/LEPTON/pair_lepton.cpp      |  7 +------
 4 files changed, 45 insertions(+), 13 deletions(-)
 create mode 100644 lib/lepton/src/Utils.cpp

diff --git a/lib/lepton/include/LMP_Lepton.h b/lib/lepton/include/LMP_Lepton.h
index 73b6b6fa38..d277bd2761 100644
--- a/lib/lepton/include/LMP_Lepton.h
+++ b/lib/lepton/include/LMP_Lepton.h
@@ -32,6 +32,8 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
  * -------------------------------------------------------------------------- */
 
+#include <string>
+
 #include "lepton/CompiledExpression.h"
 #include "lepton/CustomFunction.h"
 #include "lepton/ExpressionProgram.h"
@@ -40,4 +42,10 @@
 #include "lepton/ParsedExpression.h"
 #include "lepton/Parser.h"
 
+// utility functions
+namespace LMP_Lepton
+{
+  /// remove whitespace and quotes from expression string
+  std::string condense(const std::string &);
+}
 #endif /*LMP_LEPTON_H_*/
diff --git a/lib/lepton/src/Utils.cpp b/lib/lepton/src/Utils.cpp
new file mode 100644
index 0000000000..839da6cda2
--- /dev/null
+++ b/lib/lepton/src/Utils.cpp
@@ -0,0 +1,31 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "LMP_Lepton.h"
+
+#include <cctype>
+
+/// remove whitespace and quotes from expression string
+std::string LMP_Lepton::condense(const std::string & in)
+{
+  std::string out;
+  for (const auto &c : in)
+    if (!isspace(c) && (c != '"') && (c != '\'')) out.push_back(c);
+  return out;
+}
+
+
diff --git a/src/LEPTON/bond_lepton.cpp b/src/LEPTON/bond_lepton.cpp
index ad2ac0af12..b19a7ae50c 100644
--- a/src/LEPTON/bond_lepton.cpp
+++ b/src/LEPTON/bond_lepton.cpp
@@ -24,9 +24,7 @@
 #include "memory.h"
 #include "neighbor.h"
 
-#include <cctype>
 #include <cmath>
-#include <cstring>
 
 #include "LMP_Lepton.h"
 
@@ -170,10 +168,7 @@ void BondLepton::coeff(int narg, char **arg)
 
   // remove whitespace and quotes from expression string and then
   // check if the expression can be parsed and evaluated without error
-  std::string exp_one;
-  for (const auto &c : std::string(arg[2]))
-    if (!isspace(c) && (c != '"') && (c != '\'')) exp_one.push_back(c);
-
+  std::string exp_one = LMP_Lepton::condense(arg[2]);
   try {
     auto parsed = LMP_Lepton::Parser::parse(exp_one);
     auto bondpot = parsed.createCompiledExpression();
@@ -317,6 +312,9 @@ double BondLepton::single(int type, double rsq, int /*i*/, int /*j*/, double &ff
 void *BondLepton::extract(const char *str, int &dim)
 {
   dim = 1;
-  if (strcmp(str, "r0") == 0) return (void *) r0;
+  if (str) {
+    std::string keyword(str);
+    if (keyword == "r0") return (void *) r0;
+  }
   return nullptr;
 }
diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index c2f8b35b9f..4a53e6985c 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -25,9 +25,7 @@
 #include "neigh_list.h"
 #include "update.h"
 
-#include <cctype>
 #include <cmath>
-#include <cstring>
 
 #include "LMP_Lepton.h"
 
@@ -209,10 +207,7 @@ void PairLepton::coeff(int narg, char **arg)
 
   // remove whitespace and quotes from expression string and then
   // check if the expression can be parsed and evaluated without error
-  std::string exp_one;
-  for (const auto &c : std::string(arg[2]))
-    if (!isspace(c) && (c != '"') && (c != '\'')) exp_one.push_back(c);
-
+  std::string exp_one = LMP_Lepton::condense(arg[2]);
   try {
     auto parsed = LMP_Lepton::Parser::parse(exp_one);
     auto pairpot = parsed.createCompiledExpression();

From 48c23788f2d0ae8b697c7aa6e1f25720ce14e583 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 07:10:48 -0500
Subject: [PATCH 17/79] handle pair_modify shift and enforce the bond lepton
 has zero energy at r0

---
 src/LEPTON/bond_lepton.cpp     | 15 ++++++++++-----
 src/LEPTON/bond_lepton.h       |  1 +
 src/LEPTON/pair_lepton.cpp     | 27 +++++++++++++++++++++++----
 src/LEPTON/pair_lepton.h       |  1 +
 src/OPENMP/bond_lepton_omp.cpp |  2 +-
 src/OPENMP/pair_lepton_omp.cpp |  3 ++-
 6 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/src/LEPTON/bond_lepton.cpp b/src/LEPTON/bond_lepton.cpp
index b19a7ae50c..7ebca00012 100644
--- a/src/LEPTON/bond_lepton.cpp
+++ b/src/LEPTON/bond_lepton.cpp
@@ -32,7 +32,8 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-BondLepton::BondLepton(LAMMPS *_lmp) : Bond(_lmp), r0(nullptr), type2expression(nullptr)
+BondLepton::BondLepton(LAMMPS *_lmp) :
+    Bond(_lmp), r0(nullptr), type2expression(nullptr), offset(nullptr)
 {
   writedata = 1;
   reinitflag = 0;
@@ -46,6 +47,7 @@ BondLepton::~BondLepton()
     memory->destroy(setflag);
     memory->destroy(r0);
     memory->destroy(type2expression);
+    memory->destroy(offset);
   }
 }
 
@@ -133,7 +135,7 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void BondLepton::eval()
     if (EFLAG) {
       double &r_pot = bondpot[idx].getVariableReference("r");
       r_pot = dr;
-      ebond = bondpot[idx].evaluate();
+      ebond = bondpot[idx].evaluate() - offset[type];
     }
     if (EVFLAG) ev_tally(i1, i2, nlocal, NEWTON_BOND, ebond, fbond, delx, dely, delz);
   }
@@ -148,6 +150,7 @@ void BondLepton::allocate()
 
   memory->create(r0, np1, "bond:r0");
   memory->create(type2expression, np1, "bond:type2expression");
+  memory->create(offset, np1, "bond:offset");
   memory->create(setflag, np1, "bond:setflag");
   for (int i = 1; i < np1; i++) setflag[i] = 0;
 }
@@ -169,14 +172,15 @@ void BondLepton::coeff(int narg, char **arg)
   // remove whitespace and quotes from expression string and then
   // check if the expression can be parsed and evaluated without error
   std::string exp_one = LMP_Lepton::condense(arg[2]);
+  double offset_one = 0.0;
   try {
     auto parsed = LMP_Lepton::Parser::parse(exp_one);
     auto bondpot = parsed.createCompiledExpression();
     auto bondforce = parsed.differentiate("r").createCompiledExpression();
     double &r_pot = bondpot.getVariableReference("r");
     double &r_for = bondforce.getVariableReference("r");
-    r_for = r_pot = 1.0;
-    bondpot.evaluate();
+    r_for = r_pot = r0_one;
+    offset_one = bondpot.evaluate();
     bondforce.evaluate();
   } catch (std::exception &e) {
     error->all(FLERR, e.what());
@@ -195,6 +199,7 @@ void BondLepton::coeff(int narg, char **arg)
   for (int i = ilo; i <= ihi; i++) {
     r0[i] = r0_one;
     type2expression[i] = idx;
+    offset[i] = offset_one;
     setflag[i] = 1;
     count++;
   }
@@ -302,7 +307,7 @@ double BondLepton::single(int type, double rsq, int /*i*/, int /*j*/, double &ff
   double ebond = 0.0;
   if (r > 0.0) {
     fforce = -bondforce.evaluate() / r;
-    ebond = bondpot.evaluate();
+    ebond = bondpot.evaluate() - offset[type];
   }
   return ebond;
 }
diff --git a/src/LEPTON/bond_lepton.h b/src/LEPTON/bond_lepton.h
index 5c430b7e63..e91dda3187 100644
--- a/src/LEPTON/bond_lepton.h
+++ b/src/LEPTON/bond_lepton.h
@@ -41,6 +41,7 @@ class BondLepton : public Bond {
   std::vector<std::string> expressions;
   double *r0;
   int *type2expression;
+  double *offset;
 
   virtual void allocate();
 
diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index 4a53e6985c..c159db6388 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -33,7 +33,8 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLepton::PairLepton(LAMMPS *lmp) : Pair(lmp), cut(nullptr), type2expression(nullptr)
+PairLepton::PairLepton(LAMMPS *lmp) :
+    Pair(lmp), cut(nullptr), type2expression(nullptr), offset(nullptr)
 {
   respa_enable = 0;
   single_enable = 1;
@@ -53,6 +54,7 @@ PairLepton::~PairLepton()
     memory->destroy(cutsq);
     memory->destroy(setflag);
     memory->destroy(type2expression);
+    memory->destroy(offset);
   }
 }
 
@@ -148,7 +150,8 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
         if (EFLAG) {
           double &r_pot = pairpot[idx].getVariableReference("r");
           r_pot = r;
-          evdwl = factor_lj * pairpot[idx].evaluate();
+          evdwl = pairpot[idx].evaluate() - offset[itype][jtype];
+          evdwl *= factor_lj;
         }
 
         if (EVFLAG) ev_tally(i, j, nlocal, NEWTON_PAIR, evdwl, 0.0, fpair, delx, dely, delz);
@@ -176,6 +179,7 @@ void PairLepton::allocate()
   memory->create(cut, np1, np1, "pair:cut");
   memory->create(cutsq, np1, np1, "pair:cutsq");
   memory->create(type2expression, np1, np1, "pair:type2expression");
+  memory->create(offset, np1, np1, "pair:offset");
 }
 
 /* ----------------------------------------------------------------------
@@ -249,8 +253,18 @@ double PairLepton::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
 
+  if (offset_flag) {
+    auto parsed = LMP_Lepton::Parser::parse(expressions[type2expression[i][j]]);
+    auto pairpot = parsed.createCompiledExpression();
+    double &r_pot = pairpot.getVariableReference("r");
+    r_pot = 1.0;
+    offset[i][j] = pairpot.evaluate();
+  } else
+    offset[i][j] = 0.0;
+
   cut[j][i] = cut[i][j];
   type2expression[j][i] = type2expression[i][j];
+  offset[j][i] = offset[i][j];
 
   return cut[i][j];
 }
@@ -341,6 +355,7 @@ void PairLepton::read_restart(FILE *fp)
 void PairLepton::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_global, sizeof(double), 1, fp);
+  fwrite(&offset_flag, sizeof(int), 1, fp);
 }
 
 /* ----------------------------------------------------------------------
@@ -349,8 +364,12 @@ void PairLepton::write_restart_settings(FILE *fp)
 
 void PairLepton::read_restart_settings(FILE *fp)
 {
-  if (comm->me == 0) { utils::sfread(FLERR, &cut_global, sizeof(double), 1, fp, nullptr, error); }
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &cut_global, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &offset_flag, sizeof(int), 1, fp, nullptr, error);
+  }
   MPI_Bcast(&cut_global, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&offset_flag, 1, MPI_INT, 0, world);
 }
 
 /* ----------------------------------------------------------------------
@@ -389,5 +408,5 @@ double PairLepton::single(int /* i */, int /* j */, int itype, int jtype, double
 
   r_pot = r_for = r;
   fforce = -pairforce.evaluate() / r * factor_lj;
-  return pairpot.evaluate() * factor_lj;
+  return (pairpot.evaluate() - offset[itype][jtype]) * factor_lj;
 }
diff --git a/src/LEPTON/pair_lepton.h b/src/LEPTON/pair_lepton.h
index 6a55695bc3..02105d1f27 100644
--- a/src/LEPTON/pair_lepton.h
+++ b/src/LEPTON/pair_lepton.h
@@ -54,6 +54,7 @@ class PairLepton : public Pair {
   std::vector<std::string> expressions;
   double **cut;
   int **type2expression;
+  double **offset;
   double cut_global;
 
   virtual void allocate();
diff --git a/src/OPENMP/bond_lepton_omp.cpp b/src/OPENMP/bond_lepton_omp.cpp
index 3171aaa51c..560256076f 100644
--- a/src/OPENMP/bond_lepton_omp.cpp
+++ b/src/OPENMP/bond_lepton_omp.cpp
@@ -140,7 +140,7 @@ void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
     if (EFLAG) {
       double &r_pot = bondpot[idx].getVariableReference("r");
       r_pot = dr;
-      ebond = bondpot[idx].evaluate();
+      ebond = bondpot[idx].evaluate() - offset[type];
     }
     if (EVFLAG)
       ev_tally_thr(this, i1, i2, nlocal, NEWTON_BOND, ebond, fbond, delx, dely, delz, thr);
diff --git a/src/OPENMP/pair_lepton_omp.cpp b/src/OPENMP/pair_lepton_omp.cpp
index 2acb9e8b9f..b3abfdd34b 100644
--- a/src/OPENMP/pair_lepton_omp.cpp
+++ b/src/OPENMP/pair_lepton_omp.cpp
@@ -144,7 +144,8 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr)
         if (EFLAG) {
           double &r_pot = pairpot[idx].getVariableReference("r");
           r_pot = r;
-          evdwl = factor_lj * pairpot[idx].evaluate();
+          evdwl = pairpot[idx].evaluate() - offset[itype][jtype];
+          evdwl *= factor_lj;
         }
 
         if (EVFLAG)

From 28659295580f4ddc6dd980c62e8e99e451562412 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 07:11:04 -0500
Subject: [PATCH 18/79] update for added source

---
 lib/lepton/Makefile.mpi    | 9 ++++++++-
 lib/lepton/Makefile.serial | 9 ++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/lib/lepton/Makefile.mpi b/lib/lepton/Makefile.mpi
index 934188ba50..3d9cc49310 100644
--- a/lib/lepton/Makefile.mpi
+++ b/lib/lepton/Makefile.mpi
@@ -6,7 +6,14 @@ INC=-I include
 AR=ar
 ARFLAGS=rc
 LIB=liblmplepton.a
-SRC=src/CompiledExpression.cpp src/ExpressionProgram.cpp src/ExpressionTreeNode.cpp src/Operation.cpp src/ParsedExpression.cpp src/Parser.cpp
+SRC=src/CompiledExpression.cpp \
+    src/ExpressionProgram.cpp \
+    src/ExpressionTreeNode.cpp \
+    src/Operation.cpp \
+    src/ParsedExpression.cpp \
+    src/Parser.cpp \
+    src/Utils.cpp
+
 OBJ=$(SRC:src/%.cpp=build/%.o)
 
 all: $(LIB) Makefile.lammps
diff --git a/lib/lepton/Makefile.serial b/lib/lepton/Makefile.serial
index 23d9b3dd57..c83951774e 100644
--- a/lib/lepton/Makefile.serial
+++ b/lib/lepton/Makefile.serial
@@ -6,7 +6,14 @@ INC=-I include
 AR=ar
 ARFLAGS=rc
 LIB=liblmplepton.a
-SRC=src/CompiledExpression.cpp src/ExpressionProgram.cpp src/ExpressionTreeNode.cpp src/Operation.cpp src/ParsedExpression.cpp src/Parser.cpp
+SRC=src/CompiledExpression.cpp \
+    src/ExpressionProgram.cpp \
+    src/ExpressionTreeNode.cpp \
+    src/Operation.cpp \
+    src/ParsedExpression.cpp \
+    src/Parser.cpp \
+    src/Utils.cpp
+
 OBJ=$(SRC:src/%.cpp=build/%.o)
 
 all: $(LIB) Makefile.lammps

From ab72e95d0a5ce7bfa8057d8b6cc0b7c426ae728b Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 07:26:00 -0500
Subject: [PATCH 19/79] restart offset for bond style lepton

---
 src/LEPTON/bond_lepton.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/LEPTON/bond_lepton.cpp b/src/LEPTON/bond_lepton.cpp
index 7ebca00012..3f764e4456 100644
--- a/src/LEPTON/bond_lepton.cpp
+++ b/src/LEPTON/bond_lepton.cpp
@@ -224,6 +224,7 @@ void BondLepton::write_restart(FILE *fp)
 {
   fwrite(&r0[1], sizeof(double), atom->nbondtypes, fp);
   fwrite(&type2expression[1], sizeof(int), atom->nbondtypes, fp);
+  fwrite(&offset[1], sizeof(double), atom->nbondtypes, fp);
 
   int num = expressions.size();
   int maxlen = 0;
@@ -250,9 +251,11 @@ void BondLepton::read_restart(FILE *fp)
   if (comm->me == 0) {
     utils::sfread(FLERR, &r0[1], sizeof(double), atom->nbondtypes, fp, nullptr, error);
     utils::sfread(FLERR, &type2expression[1], sizeof(int), atom->nbondtypes, fp, nullptr, error);
+    utils::sfread(FLERR, &offset[1], sizeof(double), atom->nbondtypes, fp, nullptr, error);
   }
   MPI_Bcast(&r0[1], atom->nbondtypes, MPI_DOUBLE, 0, world);
   MPI_Bcast(&type2expression[1], atom->nbondtypes, MPI_INT, 0, world);
+  MPI_Bcast(&offset[1], atom->nbondtypes, MPI_DOUBLE, 0, world);
   for (int i = 1; i <= atom->nbondtypes; i++) setflag[i] = 1;
 
   int num, maxlen, len;

From a5ecef708f6cf85c4ac8cf6c27dcc7502334b49a Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 09:59:55 -0500
Subject: [PATCH 20/79] correctly compute offsets. update unit test files.

---
 src/LEPTON/bond_lepton.cpp                       |  2 +-
 src/LEPTON/pair_lepton.cpp                       |  2 +-
 unittest/force-styles/tests/bond-lepton.yaml     |  9 ++++-----
 unittest/force-styles/tests/mol-pair-lepton.yaml | 12 ++++++------
 unittest/force-styles/tests/mol-pair-lj_cut.yaml |  9 +++++----
 5 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/LEPTON/bond_lepton.cpp b/src/LEPTON/bond_lepton.cpp
index 3f764e4456..fb55571ca1 100644
--- a/src/LEPTON/bond_lepton.cpp
+++ b/src/LEPTON/bond_lepton.cpp
@@ -179,7 +179,7 @@ void BondLepton::coeff(int narg, char **arg)
     auto bondforce = parsed.differentiate("r").createCompiledExpression();
     double &r_pot = bondpot.getVariableReference("r");
     double &r_for = bondforce.getVariableReference("r");
-    r_for = r_pot = r0_one;
+    r_for = r_pot = 0.0;
     offset_one = bondpot.evaluate();
     bondforce.evaluate();
   } catch (std::exception &e) {
diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index c159db6388..39b5ade806 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -257,7 +257,7 @@ double PairLepton::init_one(int i, int j)
     auto parsed = LMP_Lepton::Parser::parse(expressions[type2expression[i][j]]);
     auto pairpot = parsed.createCompiledExpression();
     double &r_pot = pairpot.getVariableReference("r");
-    r_pot = 1.0;
+    r_pot = cut[i][j];
     offset[i][j] = pairpot.evaluate();
   } else
     offset[i][j] = 0.0;
diff --git a/unittest/force-styles/tests/bond-lepton.yaml b/unittest/force-styles/tests/bond-lepton.yaml
index a220dd8b0c..32d0d1453c 100644
--- a/unittest/force-styles/tests/bond-lepton.yaml
+++ b/unittest/force-styles/tests/bond-lepton.yaml
@@ -1,7 +1,6 @@
 ---
-lammps_version: 3 Nov 2022
-tags: generated
-date_generated: Thu Dec 22 02:03:59 2022
+lammps_version: 22 Dec 2022
+date_generated: Thu Dec 22 09:47:41 2022
 epsilon: 2.5e-13
 skip_tests:
 prerequisites: ! |
@@ -21,7 +20,7 @@ equilibrium: 5 1.5 1.1 1.3 1.2 1
 extract: ! |
   r0 1
 natoms: 29
-init_energy: 38.295825321689215
+init_energy: -1.7041746783107878
 init_stress: ! |-
   -4.7778964706834920e+01 -9.3066674567350432e+01  3.4789470658440035e+02 -3.0023920169312170e+01 -8.0421418879842847e+01  5.8592449335969732e+01
 init_forces: ! |2
@@ -54,7 +53,7 @@ init_forces: ! |2
    27  6.6999960289138851e+00  6.3808952243186141e+00  2.0100808779497248e+00
    28 -8.8466157439236681e-01  3.8018717064230995e-01 -5.9857060538593476e-01
    29 -5.8153344545215182e+00 -6.7610823949609244e+00 -1.4115102725637900e+00
-run_energy: 37.78424389351509
+run_energy: -2.215756106484914
 run_stress: ! |-
   -4.6127506998693484e+01 -9.2129732247211749e+01  3.4548310342284810e+02 -2.9841348469661163e+01 -7.8434962689387717e+01  5.9253167412123155e+01
 run_forces: ! |2
diff --git a/unittest/force-styles/tests/mol-pair-lepton.yaml b/unittest/force-styles/tests/mol-pair-lepton.yaml
index d58780b1fe..81d8286588 100644
--- a/unittest/force-styles/tests/mol-pair-lepton.yaml
+++ b/unittest/force-styles/tests/mol-pair-lepton.yaml
@@ -1,7 +1,6 @@
 ---
-lammps_version: 3 Nov 2022
-tags: generated
-date_generated: Wed Dec 21 20:29:34 2022
+lammps_version: 22 Dec 2022
+date_generated: Thu Dec 22 09:57:30 2022
 epsilon: 5e-14
 skip_tests:
 prerequisites: ! |
@@ -9,7 +8,8 @@ prerequisites: ! |
   pair lepton
 pre_commands: ! |
   variable write_data_pair index ij
-post_commands: ! ""
+post_commands: ! |
+  pair_modify shift yes
 input_file: in.fourmol
 pair_style: lepton 8.0
 pair_coeff: ! |
@@ -27,7 +27,7 @@ pair_coeff: ! |
   3 5    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205;sig=3.15"
 extract: ! ""
 natoms: 29
-init_vdwl: 749.2370315373564
+init_vdwl: 749.2468149791969
 init_coul: 0
 init_stress: ! |2-
    2.1793853434038242e+03  2.1988955172192768e+03  4.6653977523326257e+03 -7.5956547636050584e+02  2.4751536734032861e+01  6.6652028436400667e+02
@@ -61,7 +61,7 @@ init_forces: ! |2
    27  5.1810388677546001e+01 -2.2705458321213797e+02  9.0849111082069669e+01
    28 -1.8041307121444069e+02  7.7534042932772905e+01 -1.2206956760706598e+02
    29  1.2861057254925012e+02  1.4952711274394568e+02  3.1216025556267880e+01
-run_vdwl: 719.4432816774653
+run_vdwl: 719.4530651193046
 run_coul: 0
 run_stress: ! |2-
    2.1330153957371017e+03  2.1547728168285516e+03  4.3976497417710125e+03 -7.3873328448298525e+02  4.1743821105370067e+01  6.2788012209191027e+02
diff --git a/unittest/force-styles/tests/mol-pair-lj_cut.yaml b/unittest/force-styles/tests/mol-pair-lj_cut.yaml
index 58bb0abf08..68bba170fe 100644
--- a/unittest/force-styles/tests/mol-pair-lj_cut.yaml
+++ b/unittest/force-styles/tests/mol-pair-lj_cut.yaml
@@ -1,6 +1,6 @@
 ---
-lammps_version: 17 Feb 2022
-date_generated: Fri Mar 18 22:17:31 2022
+lammps_version: 22 Dec 2022
+date_generated: Thu Dec 22 09:53:54 2022
 epsilon: 5e-14
 skip_tests:
 prerequisites: ! |
@@ -9,6 +9,7 @@ prerequisites: ! |
 pre_commands: ! ""
 post_commands: ! |
   pair_modify mix arithmetic
+  pair_modify shift yes
 input_file: in.fourmol
 pair_style: lj/cut 8.0
 pair_coeff: ! |
@@ -22,7 +23,7 @@ extract: ! |
   epsilon 2
   sigma 2
 natoms: 29
-init_vdwl: 749.2372261744105
+init_vdwl: 749.2470096189502
 init_coul: 0
 init_stress: ! |2-
    2.1793857186503233e+03  2.1988957679770601e+03  4.6653994738862330e+03 -7.5956544622684294e+02  2.4751393539192360e+01  6.6652061873806701e+02
@@ -56,7 +57,7 @@ init_forces: ! |2
    27  5.1810412832327984e+01 -2.2705468907750401e+02  9.0849153441059272e+01
    28 -1.8041315533250560e+02  7.7534079082878250e+01 -1.2206962452216491e+02
    29  1.2861063251415729e+02  1.4952718246094855e+02  3.1216040111076961e+01
-run_vdwl: 719.4434555542921
+run_vdwl: 719.4532389988314
 run_coul: 0
 run_stress: ! |2-
    2.1330157554553721e+03  2.1547730555430498e+03  4.3976512412988704e+03 -7.3873325485023690e+02  4.1743707190786367e+01  6.2788040986774604e+02

From d9b1e318e88e05d801dbfdf6e0f0ccbb4b411e5d Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 15:48:24 -0500
Subject: [PATCH 21/79] add documentation for LEPTON package and lepton pair
 and bond style

---
 doc/src/Build_extras.rst                    | 45 +++++++++++++++++++++
 doc/src/Commands_bond.rst                   |  1 +
 doc/src/Commands_pair.rst                   |  1 +
 doc/src/Packages_details.rst                | 38 +++++++++++++++++
 doc/src/Packages_list.rst                   |  5 +++
 doc/src/bond_style.rst                      |  3 +-
 doc/src/pair_style.rst                      |  1 +
 doc/utils/sphinx-config/false_positives.txt |  3 ++
 8 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst
index 910dccbb8a..976e6e723d 100644
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@@ -43,6 +43,7 @@ This is the list of packages that may require additional steps.
    * :ref:`KIM <kim>`
    * :ref:`KOKKOS <kokkos>`
    * :ref:`LATTE <latte>`
+   * :ref:`LEPTON <lepton>`
    * :ref:`MACHDYN <machdyn>`
    * :ref:`MDI <mdi>`
    * :ref:`MESONT <mesont>`
@@ -873,6 +874,50 @@ library.
 
 ----------
 
+.. _lepton:
+
+LEPTON package
+--------------
+
+To build with this package, you must build the Lepton library which is
+included in the LAMMPS source distribution in the ``lib/lepton`` folder.
+
+.. tabs::
+
+   .. tab:: CMake build
+
+      This is the recommended build procedure for using Lepton in
+      LAMMPS. No additional settings are normally needed besides
+      ``-D PKG_LEPTON=yes``.
+
+   .. tab:: Traditional make
+
+      Before building LAMMPS, one must build the Lepton library in lib/lepton.
+
+      This can be done manually in the same folder by using or adapting
+      one of the provided Makefiles: for example, ``Makefile.serial`` for
+      the GNU C++ compiler, or ``Makefile.mpi`` for the MPI compiler wrapper.
+      The Lepton library is written in C++-11 and thus the C++ compiler
+      may need to be instructed to enable support for that.
+
+      In general, it is safer to use build setting consistent with the
+      rest of LAMMPS.  This is best carried out from the LAMMPS src
+      directory using a command like these, which simply invokes the
+      ``lib/lepton/Install.py`` script with the specified args:
+
+      .. code-block:: bash
+
+         $ make lib-lepton                      # print help message
+         $ make lib-lepton args="-m serial"     # build with GNU g++ compiler (settings as with "make serial")
+         $ make lib-lepton args="-m mpi"        # build with default MPI compiler (settings as with "make mpi")
+
+      The "machine" argument of the "-m" flag is used to find a
+      Makefile.machine to use as build recipe.
+
+      The build should produce a ``build`` folder and the library ``lib/lepton/liblmplepton.a``
+
+----------
+
 .. _mliap:
 
 ML-IAP package
diff --git a/doc/src/Commands_bond.rst b/doc/src/Commands_bond.rst
index ac2d5882fb..f5e5edcc5a 100644
--- a/doc/src/Commands_bond.rst
+++ b/doc/src/Commands_bond.rst
@@ -44,6 +44,7 @@ OPT.
    * :doc:`harmonic (iko) <bond_harmonic>`
    * :doc:`harmonic/shift (o) <bond_harmonic_shift>`
    * :doc:`harmonic/shift/cut (o) <bond_harmonic_shift_cut>`
+   * :doc:`lepton (o) <bond_lepton>`
    * :doc:`mesocnt <bond_mesocnt>`
    * :doc:`mm3 <bond_mm3>`
    * :doc:`morse (o) <bond_morse>`
diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst
index c6d54f0683..8a5f05d095 100644
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@@ -134,6 +134,7 @@ OPT.
    * :doc:`lcbop <pair_lcbop>`
    * :doc:`lebedeva/z <pair_lebedeva_z>`
    * :doc:`lennard/mdf <pair_mdf>`
+   * :doc:`lepton (o) <pair_lepton>`
    * :doc:`line/lj <pair_line_lj>`
    * :doc:`lj/charmm/coul/charmm (giko) <pair_charmm>`
    * :doc:`lj/charmm/coul/charmm/implicit (ko) <pair_charmm>`
diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst
index 74ddb066c6..96ab174a10 100644
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@@ -68,6 +68,7 @@ page gives those details.
    * :ref:`KSPACE <PKG-KSPACE>`
    * :ref:`LATBOLTZ <PKG-LATBOLTZ>`
    * :ref:`LATTE <PKG-LATTE>`
+   * :ref:`LEPTON <PKG-LEPTON>`
    * :ref:`MACHDYN <PKG-MACHDYN>`
    * :ref:`MANIFOLD <PKG-MANIFOLD>`
    * :ref:`MANYBODY <PKG-MANYBODY>`
@@ -1384,6 +1385,43 @@ This package has :ref:`specific installation instructions <latte>` on the :doc:`
 
 ----------
 
+.. _PKG-LEPTON:
+
+LEPTON package
+--------------
+
+**Contents:**
+
+Styles for pair, bond forces that evaluate the potential function from a
+string using the `Lepton mathematical expression parser
+<https://simtk.org/projects/lepton>`_.  Lepton is a C++ library that is
+bundled with `OpenMM <https://openmm.org/>`_ and can be used for
+parsing, evaluating, differentiating, and analyzing mathematical
+expressions.  This is a more lightweight and efficient alternative
+for evaluating custom potential function to an embedded Python
+interpreter as used in the :ref:`PYTHON package <PKG-PYTHON>`.
+On the other hand, since the potentials are evaluated form analytical
+expressions, they are more accurate than what can be done with
+:ref:`tabulated potentials <tabulate>`.  Using the runtime evaluation
+comes with a significant increase in runtime.
+
+**Authors:** Axel Kohlmeyer (Temple U).  Lepton itself is developed
+by Peter Eastman at Stanford University.
+
+**Install:**
+
+This package has :ref:`specific installation instructions <lepton>` on
+the :doc:`Build extras <Build_extras>` page.
+
+**Supporting info:**
+
+* src/LEPTON: filenames -> commands
+* lib/lepton/README.md
+* :doc:`pair_style lepton <pair_lepton>`
+* :doc:`bond_style lepton <bond_lepton>`
+
+----------
+
 .. _PKG-MACHDYN:
 
 MACHDYN package
diff --git a/doc/src/Packages_list.rst b/doc/src/Packages_list.rst
index ac0ba7728e..b1483bd954 100644
--- a/doc/src/Packages_list.rst
+++ b/doc/src/Packages_list.rst
@@ -238,6 +238,11 @@ whether an extra library is needed to build and use the package:
      - :doc:`fix latte <fix_latte>`
      - latte
      - ext
+   * - :ref:`LEPTON <PKG-LEPTON>`
+     - evaluate strings as potential function
+     - :doc:`pair_style lepton <pair_lepton>`
+     - PACKAGES/lepton
+     - int
    * - :ref:`MACHDYN <PKG-MACHDYN>`
      - smoothed Mach dynamics
      - `SMD User Guide <PDF/MACHDYN_LAMMPS_userguide.pdf>`_
diff --git a/doc/src/bond_style.rst b/doc/src/bond_style.rst
index 9197e6c4eb..23b89d00a2 100644
--- a/doc/src/bond_style.rst
+++ b/doc/src/bond_style.rst
@@ -10,7 +10,7 @@ Syntax
 
    bond_style style args
 
-* style = *none* or *zero* or *hybrid* or *bpm/rotational* or *bpm/spring* or *class2* or *fene* or *fene/expand* or *fene/nm* or *gaussian* or *gromos* or *harmonic* or *harmonic/shift* or *harmonic/shift/cut* or *morse* or *nonlinear* or *oxdna/fene* or *oxdena2/fene* or *oxrna2/fene* or *quartic* or *special* or *table*
+* style = *none* or *zero* or *hybrid* or *bpm/rotational* or *bpm/spring* or *class2* or *fene* or *fene/expand* or *fene/nm* or *gaussian* or *gromos* or *harmonic* or *harmonic/shift* or *harmonic/shift/cut* or *lepton* or *morse* or *nonlinear* or *oxdna/fene* or *oxdena2/fene* or *oxrna2/fene* or *quartic* or *special* or *table*
 
 * args = none for any style except *hybrid*
 
@@ -95,6 +95,7 @@ accelerated styles exist.
 * :doc:`harmonic <bond_harmonic>` - harmonic bond
 * :doc:`harmonic/shift <bond_harmonic_shift>` - shifted harmonic bond
 * :doc:`harmonic/shift/cut <bond_harmonic_shift_cut>` - shifted harmonic bond with a cutoff
+* :doc:`lepton <bond_lepton>` - bond potential from evaluating a string
 * :doc:`mesocnt <bond_mesocnt>` - Harmonic bond wrapper with parameterization presets for nanotubes
 * :doc:`mm3 <bond_mm3>` - MM3 anharmonic bond
 * :doc:`morse <bond_morse>` - Morse bond
diff --git a/doc/src/pair_style.rst b/doc/src/pair_style.rst
index 48daf34f17..ac8888f8ad 100644
--- a/doc/src/pair_style.rst
+++ b/doc/src/pair_style.rst
@@ -212,6 +212,7 @@ accelerated styles exist.
 * :doc:`lcbop <pair_lcbop>` - long-range bond-order potential (LCBOP)
 * :doc:`lebedeva/z <pair_lebedeva_z>` - Lebedeva interlayer potential for graphene with normals along z-axis
 * :doc:`lennard/mdf <pair_mdf>` - LJ potential in A/B form with a taper function
+* :doc:`lepton <pair_lepton>` - pair potential from evaluating a string
 * :doc:`line/lj <pair_line_lj>` - LJ potential between line segments
 * :doc:`list <pair_list>` - potential between pairs of atoms explicitly listed in an input file
 * :doc:`lj/charmm/coul/charmm <pair_charmm>` - CHARMM potential with cutoff Coulomb
diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt
index 7cb409d040..c879bdb244 100644
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@@ -553,6 +553,7 @@ corotate
 corotation
 corotational
 correlator
+Cosecant
 cosineshifted
 cossq
 costheta
@@ -585,6 +586,7 @@ Crozier
 Cryst
 Crystallogr
 Csanyi
+csc
 csg
 csh
 cshrc
@@ -3274,6 +3276,7 @@ Simul
 simulations
 Sinkovits
 Sinnott
+sinh
 sinusoid
 sinusoidally
 SiO

From 91c498c4130109c5c5b9b179e1710da5f616d4e1 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 16:32:17 -0500
Subject: [PATCH 22/79] suppres explicit exports/import in Lepton lib

---
 cmake/Modules/Packages/LEPTON.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Modules/Packages/LEPTON.cmake b/cmake/Modules/Packages/LEPTON.cmake
index 05241de592..a1a74f3aa9 100644
--- a/cmake/Modules/Packages/LEPTON.cmake
+++ b/cmake/Modules/Packages/LEPTON.cmake
@@ -2,7 +2,7 @@ set(LEPTON_SOURCE_DIR ${LAMMPS_LIB_SOURCE_DIR}/lepton)
 
 file(GLOB LEPTON_SOURCES ${LEPTON_SOURCE_DIR}/src/[^.]*.cpp)
 add_library(lmplepton STATIC ${LEPTON_SOURCES})
-target_compile_definitions(lmplepton PRIVATE -DLEPTON_BUILDING_STATIC_LIBRARY)
+target_compile_definitions(lmplepton PUBLIC -DLEPTON_BUILDING_STATIC_LIBRARY=1)
 set_target_properties(lmplepton PROPERTIES OUTPUT_NAME lammps_lmplepton${LAMMPS_MACHINE})
 target_include_directories(lmplepton PUBLIC ${LEPTON_SOURCE_DIR}/include)
 target_link_libraries(lammps PRIVATE lmplepton)

From ca27fb3a981f59a3fb68c151a2113992f007bdb9 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 22:16:17 -0500
Subject: [PATCH 23/79] update Lepton to current master branch

---
 .../include/lepton/CompiledExpression.h       |  18 +-
 .../include/lepton/CompiledVectorExpression.h | 145 +++
 .../include/lepton/ExpressionTreeNode.h       |   8 +-
 lib/lepton/include/lepton/ParsedExpression.h  |  19 +-
 lib/lepton/src/CompiledExpression.cpp         | 542 ++++++++--
 lib/lepton/src/CompiledVectorExpression.cpp   | 933 ++++++++++++++++++
 lib/lepton/src/ExpressionTreeNode.cpp         |  48 +-
 lib/lepton/src/Operation.cpp                  | 130 ++-
 lib/lepton/src/ParsedExpression.cpp           |  85 +-
 9 files changed, 1801 insertions(+), 127 deletions(-)
 create mode 100644 lib/lepton/include/lepton/CompiledVectorExpression.h
 create mode 100644 lib/lepton/src/CompiledVectorExpression.cpp

diff --git a/lib/lepton/include/lepton/CompiledExpression.h b/lib/lepton/include/lepton/CompiledExpression.h
index 8ead5ce96f..6c940e081c 100644
--- a/lib/lepton/include/lepton/CompiledExpression.h
+++ b/lib/lepton/include/lepton/CompiledExpression.h
@@ -9,7 +9,7 @@
  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
  * Medical Research, grant U54 GM072970. See https://simtk.org.               *
  *                                                                            *
- * Portions copyright (c) 2013-2019 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2022 Stanford University and the Authors.      *
  * Authors: Peter Eastman                                                     *
  * Contributors:                                                              *
  *                                                                            *
@@ -40,7 +40,11 @@
 #include <utility>
 #include <vector>
 #ifdef LEPTON_USE_JIT
-    #include "asmjit.h"
+#if defined(__ARM__) || defined(__ARM64__)
+#include "asmjit/a64.h"
+#else
+#include "asmjit/x86.h"
+#endif
 #endif
 
 namespace LMP_Lepton {
@@ -101,9 +105,15 @@ private:
     std::map<std::string, double> dummyVariables;
     double (*jitCode)();
 #ifdef LEPTON_USE_JIT
+    void findPowerGroups(std::vector<std::vector<int> >& groups, std::vector<std::vector<int> >& groupPowers, std::vector<int>& stepGroup);
     void generateJitCode();
-    void generateSingleArgCall(asmjit::X86Compiler& c, asmjit::X86Xmm& dest, asmjit::X86Xmm& arg, double (*function)(double));
-    void generateTwoArgCall(asmjit::X86Compiler& c, asmjit::X86Xmm& dest, asmjit::X86Xmm& arg1, asmjit::X86Xmm& arg2, double (*function)(double, double));
+#if defined(__ARM__) || defined(__ARM64__)
+    void generateSingleArgCall(asmjit::a64::Compiler& c, asmjit::arm::Vec& dest, asmjit::arm::Vec& arg, double (*function)(double));
+    void generateTwoArgCall(asmjit::a64::Compiler& c, asmjit::arm::Vec& dest, asmjit::arm::Vec& arg1, asmjit::arm::Vec& arg2, double (*function)(double, double));
+#else
+    void generateSingleArgCall(asmjit::x86::Compiler& c, asmjit::x86::Xmm& dest, asmjit::x86::Xmm& arg, double (*function)(double));
+    void generateTwoArgCall(asmjit::x86::Compiler& c, asmjit::x86::Xmm& dest, asmjit::x86::Xmm& arg1, asmjit::x86::Xmm& arg2, double (*function)(double, double));
+#endif
     std::vector<double> constants;
     asmjit::JitRuntime runtime;
 #endif
diff --git a/lib/lepton/include/lepton/CompiledVectorExpression.h b/lib/lepton/include/lepton/CompiledVectorExpression.h
new file mode 100644
index 0000000000..e097e3eae1
--- /dev/null
+++ b/lib/lepton/include/lepton/CompiledVectorExpression.h
@@ -0,0 +1,145 @@
+#ifndef LEPTON_VECTOR_EXPRESSION_H_
+#define LEPTON_VECTOR_EXPRESSION_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013-2022 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "ExpressionTreeNode.h"
+#include "windowsIncludes.h"
+#include <array>
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+#ifdef LEPTON_USE_JIT
+#if defined(__ARM__) || defined(__ARM64__)
+#include "asmjit/a64.h"
+#else
+#include "asmjit/x86.h"
+#endif
+#endif
+
+namespace LMP_Lepton {
+
+class Operation;
+class ParsedExpression;
+
+/**
+ * A CompiledVectorExpression is a highly optimized representation of an expression for cases when you want to evaluate
+ * it many times as quickly as possible.  It is similar to CompiledExpression, with the extra feature that it uses the CPU's
+ * vector unit (AVX on x86, NEON on ARM) to evaluate the expression for multiple sets of arguments at once.  It also differs
+ * from CompiledExpression and ParsedExpression in using single precision rather than double precision to evaluate the expression.
+ * You should treat it as an opaque object; none of the internal representation is visible.
+ *
+ * A CompiledVectorExpression is created by calling createCompiledVectorExpression() on a ParsedExpression.  When you create
+ * it, you must specify the width of the vectors on which to compute the expression.  The allowed widths depend on the type of
+ * CPU it is running on.  4 is always allowed, and 8 is allowed on x86 processors with AVX.  Call getAllowedWidths() to query
+ * the allowed values.
+ *
+ * WARNING: CompiledVectorExpression is NOT thread safe.  You should never access a CompiledVectorExpression from two threads at
+ * the same time.
+ */
+
+class LEPTON_EXPORT CompiledVectorExpression {
+public:
+    CompiledVectorExpression();
+    CompiledVectorExpression(const CompiledVectorExpression& expression);
+    ~CompiledVectorExpression();
+    CompiledVectorExpression& operator=(const CompiledVectorExpression& expression);
+    /**
+     * Get the width of the vectors on which the expression is computed.
+     */
+    int getWidth() const;
+    /**
+     * Get the names of all variables used by this expression.
+     */
+    const std::set<std::string>& getVariables() const;
+    /**
+     * Get a pointer to the memory location where the value of a particular variable is stored.  This can be used
+     * to set the value of the variable before calling evaluate().
+     *
+     * @param name    the name of the variable to query
+     * @return a pointer to N floating point values, where N is the vector width
+     */
+    float* getVariablePointer(const std::string& name);
+    /**
+     * You can optionally specify the memory locations from which the values of variables should be read.
+     * This is useful, for example, when several expressions all use the same variable.  You can then set
+     * the value of that variable in one place, and it will be seen by all of them.  The location should
+     * be a pointer to N floating point values, where N is the vector width.
+     */
+    void setVariableLocations(std::map<std::string, float*>& variableLocations);
+    /**
+     * Evaluate the expression.  The values of all variables should have been set before calling this.
+     *
+     * @return a pointer to N floating point values, where N is the vector width
+     */
+    const float* evaluate() const;
+    /**
+     * Get the list of vector widths that are supported on the current processor.
+     */
+    static const std::vector<int>& getAllowedWidths();
+private:
+    friend class ParsedExpression;
+    CompiledVectorExpression(const ParsedExpression& expression, int width);
+    void compileExpression(const ExpressionTreeNode& node, std::vector<std::pair<ExpressionTreeNode, int> >& temps, int& workspaceSize);
+    int findTempIndex(const ExpressionTreeNode& node, std::vector<std::pair<ExpressionTreeNode, int> >& temps);
+    int width;
+    std::map<std::string, float*> variablePointers;
+    std::vector<std::pair<float*, float*> > variablesToCopy;
+    std::vector<std::vector<int> > arguments;
+    std::vector<int> target;
+    std::vector<Operation*> operation;
+    std::map<std::string, int> variableIndices;
+    std::set<std::string> variableNames;
+    mutable std::vector<float> workspace;
+    mutable std::vector<double> argValues;
+    std::map<std::string, double> dummyVariables;
+    void (*jitCode)();
+#ifdef LEPTON_USE_JIT
+    void findPowerGroups(std::vector<std::vector<int> >& groups, std::vector<std::vector<int> >& groupPowers, std::vector<int>& stepGroup);
+    void generateJitCode();
+#if defined(__ARM__) || defined(__ARM64__)
+    void generateSingleArgCall(asmjit::a64::Compiler& c, asmjit::arm::Vec& dest, asmjit::arm::Vec& arg, float (*function)(float));
+    void generateTwoArgCall(asmjit::a64::Compiler& c, asmjit::arm::Vec& dest, asmjit::arm::Vec& arg1, asmjit::arm::Vec& arg2, float (*function)(float, float));
+#else
+    void generateSingleArgCall(asmjit::x86::Compiler& c, asmjit::x86::Ymm& dest, asmjit::x86::Ymm& arg, float (*function)(float));
+    void generateTwoArgCall(asmjit::x86::Compiler& c, asmjit::x86::Ymm& dest, asmjit::x86::Ymm& arg1, asmjit::x86::Ymm& arg2, float (*function)(float, float));
+#endif
+    std::vector<float> constants;
+    asmjit::JitRuntime runtime;
+#endif
+};
+
+} // namespace LMP_Lepton
+
+#endif /*LEPTON_VECTOR_EXPRESSION_H_*/
diff --git a/lib/lepton/include/lepton/ExpressionTreeNode.h b/lib/lepton/include/lepton/ExpressionTreeNode.h
index 514cc008a9..eba791fbaa 100644
--- a/lib/lepton/include/lepton/ExpressionTreeNode.h
+++ b/lib/lepton/include/lepton/ExpressionTreeNode.h
@@ -9,7 +9,7 @@
  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
  * Medical Research, grant U54 GM072970. See https://simtk.org.               *
  *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Portions copyright (c) 2009-2021 Stanford University and the Authors.      *
  * Authors: Peter Eastman                                                     *
  * Contributors:                                                              *
  *                                                                            *
@@ -39,6 +39,7 @@
 namespace LMP_Lepton {
 
 class Operation;
+class ParsedExpression;
 
 /**
  * This class represents a node in the abstract syntax tree representation of an expression.
@@ -82,11 +83,13 @@ public:
      */
     ExpressionTreeNode(Operation* operation);
     ExpressionTreeNode(const ExpressionTreeNode& node);
+    ExpressionTreeNode(ExpressionTreeNode&& node);
     ExpressionTreeNode();
     ~ExpressionTreeNode();
     bool operator==(const ExpressionTreeNode& node) const;
     bool operator!=(const ExpressionTreeNode& node) const;
     ExpressionTreeNode& operator=(const ExpressionTreeNode& node);
+    ExpressionTreeNode& operator=(ExpressionTreeNode&& node);
     /**
      * Get the Operation performed by this node.
      */
@@ -96,8 +99,11 @@ public:
      */
     const std::vector<ExpressionTreeNode>& getChildren() const;
 private:
+    friend class ParsedExpression;
+    void assignTags(std::vector<const ExpressionTreeNode*>& examples) const;
     Operation* operation;
     std::vector<ExpressionTreeNode> children;
+    mutable int tag;
 };
 
 } // namespace LMP_Lepton
diff --git a/lib/lepton/include/lepton/ParsedExpression.h b/lib/lepton/include/lepton/ParsedExpression.h
index 586acb4d2c..05081f677c 100644
--- a/lib/lepton/include/lepton/ParsedExpression.h
+++ b/lib/lepton/include/lepton/ParsedExpression.h
@@ -9,7 +9,7 @@
  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
  * Medical Research, grant U54 GM072970. See https://simtk.org.               *
  *                                                                            *
- * Portions copyright (c) 2009=2013 Stanford University and the Authors.      *
+ * Portions copyright (c) 2009-2022 Stanford University and the Authors.      *
  * Authors: Peter Eastman                                                     *
  * Contributors:                                                              *
  *                                                                            *
@@ -41,6 +41,7 @@ namespace LMP_Lepton {
 
 class CompiledExpression;
 class ExpressionProgram;
+class CompiledVectorExpression;
 
 /**
  * This class represents the result of parsing an expression.  It provides methods for working with the
@@ -102,6 +103,16 @@ public:
      * Create a CompiledExpression that represents the same calculation as this expression.
      */
     CompiledExpression createCompiledExpression() const;
+    /**
+     * Create a CompiledVectorExpression that allows the expression to be evaluated efficiently
+     * using the CPU's vector unit.
+     *
+     * @param width    the width of the vectors to evaluate it on.  The allowed values
+     *                 depend on the CPU.  4 is always allowed, and 8 is allowed on
+     *                 x86 processors with AVX.  Call CompiledVectorExpression::getAllowedWidths()
+     *                 to query the allowed widths on the current processor.
+     */
+    CompiledVectorExpression createCompiledVectorExpression(int width) const;
     /**
      * Create a new ParsedExpression which is identical to this one, except that the names of some
      * variables have been changed.
@@ -113,9 +124,9 @@ public:
 private:
     static double evaluate(const ExpressionTreeNode& node, const std::map<std::string, double>& variables);
     static ExpressionTreeNode preevaluateVariables(const ExpressionTreeNode& node, const std::map<std::string, double>& variables);
-    static ExpressionTreeNode precalculateConstantSubexpressions(const ExpressionTreeNode& node);
-    static ExpressionTreeNode substituteSimplerExpression(const ExpressionTreeNode& node);
-    static ExpressionTreeNode differentiate(const ExpressionTreeNode& node, const std::string& variable);
+    static ExpressionTreeNode precalculateConstantSubexpressions(const ExpressionTreeNode& node, std::map<int, ExpressionTreeNode>& nodeCache);
+    static ExpressionTreeNode substituteSimplerExpression(const ExpressionTreeNode& node, std::map<int, ExpressionTreeNode>& nodeCache);
+    static ExpressionTreeNode differentiate(const ExpressionTreeNode& node, const std::string& variable, std::map<int, ExpressionTreeNode>& nodeCache);
     static bool isConstant(const ExpressionTreeNode& node);
     static double getConstantValue(const ExpressionTreeNode& node);
     static ExpressionTreeNode renameNodeVariables(const ExpressionTreeNode& node, const std::map<std::string, std::string>& replacements);
diff --git a/lib/lepton/src/CompiledExpression.cpp b/lib/lepton/src/CompiledExpression.cpp
index c6c1543ce4..b85c3a08f7 100644
--- a/lib/lepton/src/CompiledExpression.cpp
+++ b/lib/lepton/src/CompiledExpression.cpp
@@ -6,7 +6,7 @@
  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
  * Medical Research, grant U54 GM072970. See https://simtk.org.               *
  *                                                                            *
- * Portions copyright (c) 2013-2019 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2022 Stanford University and the Authors.      *
  * Authors: Peter Eastman                                                     *
  * Contributors:                                                              *
  *                                                                            *
@@ -151,7 +151,7 @@ void CompiledExpression::setVariableLocations(map<string, double*>& variableLoca
 
     if (workspace.size() > 0)
         generateJitCode();
-#else
+#endif
     // Make a list of all variables we will need to copy before evaluating the expression.
 
     variablesToCopy.clear();
@@ -160,13 +160,11 @@ void CompiledExpression::setVariableLocations(map<string, double*>& variableLoca
         if (pointer != variablePointers.end())
             variablesToCopy.push_back(make_pair(&workspace[iter->second], pointer->second));
     }
-#endif
 }
 
 double CompiledExpression::evaluate() const {
-#ifdef LEPTON_USE_JIT
-    return jitCode();
-#else
+    if (jitCode)
+        return jitCode();
     for (int i = 0; i < (int)variablesToCopy.size(); i++)
         *variablesToCopy[i].first = *variablesToCopy[i].second;
 
@@ -183,7 +181,6 @@ double CompiledExpression::evaluate() const {
         }
     }
     return workspace[workspace.size()-1];
-#endif
 }
 
 #ifdef LEPTON_USE_JIT
@@ -192,24 +189,70 @@ static double evaluateOperation(Operation* op, double* args) {
     return op->evaluate(args, dummyVariables);
 }
 
+void CompiledExpression::findPowerGroups(vector<vector<int> >& groups, vector<vector<int> >& groupPowers, vector<int>& stepGroup) {
+    // Identify every step that raises an argument to an integer power.
+
+    vector<int> stepPower(operation.size(), 0);
+    vector<int> stepArg(operation.size(), -1);
+    for (int step = 0; step < (int)operation.size(); step++) {
+        Operation& op = *operation[step];
+        int power = 0;
+        if (op.getId() == Operation::SQUARE)
+            power = 2;
+        else if (op.getId() == Operation::CUBE)
+            power = 3;
+        else if (op.getId() == Operation::POWER_CONSTANT) {
+            double realPower = dynamic_cast<const Operation::PowerConstant*>(&op)->getValue();
+            if (realPower == (int) realPower)
+                power = (int) realPower;
+        }
+        if (power != 0) {
+            stepPower[step] = power;
+            stepArg[step] = arguments[step][0];
+        }
+    }
+
+    // Find groups that operate on the same argument and whose powers have the same sign.
+
+    stepGroup.resize(operation.size(), -1);
+    for (int i = 0; i < (int)operation.size(); i++) {
+        if (stepGroup[i] != -1)
+            continue;
+        vector<int> group, power;
+        for (int j = i; j < (int)operation.size(); j++) {
+            if (stepArg[i] == stepArg[j] && stepPower[i]*stepPower[j] > 0) {
+                stepGroup[j] = groups.size();
+                group.push_back(j);
+                power.push_back(stepPower[j]);
+            }
+        }
+        groups.push_back(group);
+        groupPowers.push_back(power);
+    }
+}
+
+#if defined(__ARM__) || defined(__ARM64__)
 void CompiledExpression::generateJitCode() {
     CodeHolder code;
-    code.init(runtime.getCodeInfo());
-    X86Compiler c(&code);
-    c.addFunc(FuncSignature0<double>());
-    vector<X86Xmm> workspaceVar(workspace.size());
+    code.init(runtime.environment());
+    a64::Compiler c(&code);
+    c.addFunc(FuncSignatureT<double>());
+    vector<arm::Vec> workspaceVar(workspace.size());
     for (int i = 0; i < (int) workspaceVar.size(); i++)
-        workspaceVar[i] = c.newXmmSd();
-    X86Gp argsPointer = c.newIntPtr();
-    c.mov(argsPointer, imm_ptr(&argValues[0]));
+        workspaceVar[i] = c.newVecD();
+    arm::Gp argsPointer = c.newIntPtr();
+    c.mov(argsPointer, imm(&argValues[0]));
+    vector<vector<int> > groups, groupPowers;
+    vector<int> stepGroup;
+    findPowerGroups(groups, groupPowers, stepGroup);
 
     // Load the arguments into variables.
 
     for (set<string>::const_iterator iter = variableNames.begin(); iter != variableNames.end(); ++iter) {
         map<string, int>::iterator index = variableIndices.find(*iter);
-        X86Gp variablePointer = c.newIntPtr();
-        c.mov(variablePointer, imm_ptr(&getVariableReference(index->first)));
-        c.movsd(workspaceVar[index->second], x86::ptr(variablePointer, 0, 0));
+        arm::Gp variablePointer = c.newIntPtr();
+        c.mov(variablePointer, imm(&getVariableReference(index->first)));
+        c.ldr(workspaceVar[index->second], arm::ptr(variablePointer, 0));
     }
 
     // Make a list of all constants that will be needed for evaluation.
@@ -232,6 +275,12 @@ void CompiledExpression::generateJitCode() {
             value = 1.0;
         else if (op.getId() == Operation::DELTA)
             value = 1.0;
+        else if (op.getId() == Operation::POWER_CONSTANT) {
+            if (stepGroup[step] == -1)
+                value = dynamic_cast<Operation::PowerConstant&>(op).getValue();
+            else
+                value = 1.0;
+        }
         else
             continue;
 
@@ -250,19 +299,63 @@ void CompiledExpression::generateJitCode() {
 
     // Load constants into variables.
 
-    vector<X86Xmm> constantVar(constants.size());
+    vector<arm::Vec> constantVar(constants.size());
     if (constants.size() > 0) {
-        X86Gp constantsPointer = c.newIntPtr();
-        c.mov(constantsPointer, imm_ptr(&constants[0]));
+        arm::Gp constantsPointer = c.newIntPtr();
+        c.mov(constantsPointer, imm(&constants[0]));
         for (int i = 0; i < (int) constants.size(); i++) {
-            constantVar[i] = c.newXmmSd();
-            c.movsd(constantVar[i], x86::ptr(constantsPointer, 8*i, 0));
+            constantVar[i] = c.newVecD();
+            c.ldr(constantVar[i], arm::ptr(constantsPointer, 8*i));
         }
     }
 
     // Evaluate the operations.
 
+    vector<bool> hasComputedPower(operation.size(), false);
     for (int step = 0; step < (int) operation.size(); step++) {
+        if (hasComputedPower[step])
+            continue;
+
+        // When one or more steps involve raising the same argument to multiple integer
+        // powers, we can compute them all together for efficiency.
+
+        if (stepGroup[step] != -1) {
+            vector<int>& group = groups[stepGroup[step]];
+            vector<int>& powers = groupPowers[stepGroup[step]];
+            arm::Vec multiplier = c.newVecD();
+            if (powers[0] > 0)
+                c.fmov(multiplier, workspaceVar[arguments[step][0]]);
+            else {
+                c.fdiv(multiplier, constantVar[operationConstantIndex[step]], workspaceVar[arguments[step][0]]);
+                for (int i = 0; i < powers.size(); i++)
+                    powers[i] = -powers[i];
+            }
+            vector<bool> hasAssigned(group.size(), false);
+            bool done = false;
+            while (!done) {
+                done = true;
+                for (int i = 0; i < group.size(); i++) {
+                    if (powers[i]%2 == 1) {
+                        if (!hasAssigned[i])
+                            c.fmov(workspaceVar[target[group[i]]], multiplier);
+                        else
+                            c.fmul(workspaceVar[target[group[i]]], workspaceVar[target[group[i]]], multiplier);
+                        hasAssigned[i] = true;
+                    }
+                    powers[i] >>= 1;
+                    if (powers[i] != 0)
+                        done = false;
+                }
+                if (!done)
+                    c.fmul(multiplier, multiplier, multiplier);
+            }
+            for (int step : group)
+                hasComputedPower[step] = true;
+            continue;
+        }
+
+        // Evaluate the step.
+
         Operation& op = *operation[step];
         vector<int> args = arguments[step];
         if (args.size() == 1) {
@@ -276,33 +369,28 @@ void CompiledExpression::generateJitCode() {
 
         switch (op.getId()) {
             case Operation::CONSTANT:
-                c.movsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                c.fmov(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
                 break;
             case Operation::ADD:
-                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
-                c.addsd(workspaceVar[target[step]], workspaceVar[args[1]]);
+                c.fadd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
                 break;
             case Operation::SUBTRACT:
-                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
-                c.subsd(workspaceVar[target[step]], workspaceVar[args[1]]);
+                c.fsub(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
                 break;
             case Operation::MULTIPLY:
-                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
-                c.mulsd(workspaceVar[target[step]], workspaceVar[args[1]]);
+                c.fmul(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
                 break;
             case Operation::DIVIDE:
-                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
-                c.divsd(workspaceVar[target[step]], workspaceVar[args[1]]);
+                c.fdiv(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
                 break;
             case Operation::POWER:
                 generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], pow);
                 break;
             case Operation::NEGATE:
-                c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]);
-                c.subsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.fneg(workspaceVar[target[step]], workspaceVar[args[0]]);
                 break;
             case Operation::SQRT:
-                c.sqrtsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.fsqrt(workspaceVar[target[step]], workspaceVar[args[0]]);
                 break;
             case Operation::EXP:
                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], exp);
@@ -341,56 +429,63 @@ void CompiledExpression::generateJitCode() {
                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanh);
                 break;
             case Operation::STEP:
-                c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]);
-                c.cmpsd(workspaceVar[target[step]], workspaceVar[args[0]], imm(18)); // Comparison mode is _CMP_LE_OQ = 18
-                c.andps(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                c.cmge(workspaceVar[target[step]], workspaceVar[args[0]], imm(0));
+                c.and_(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
                 break;
             case Operation::DELTA:
-                c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]);
-                c.cmpsd(workspaceVar[target[step]], workspaceVar[args[0]], imm(16)); // Comparison mode is _CMP_EQ_OS = 16
-                c.andps(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                c.cmeq(workspaceVar[target[step]], workspaceVar[args[0]], imm(0));
+                c.and_(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
                 break;
             case Operation::SQUARE:
-                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
-                c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.fmul(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
                 break;
             case Operation::CUBE:
-                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
-                c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]);
-                c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.fmul(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
+                c.fmul(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]]);
                 break;
             case Operation::RECIPROCAL:
-                c.movsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
-                c.divsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.fdiv(workspaceVar[target[step]], constantVar[operationConstantIndex[step]], workspaceVar[args[0]]);
                 break;
             case Operation::ADD_CONSTANT:
-                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
-                c.addsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                c.fadd(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
                 break;
             case Operation::MULTIPLY_CONSTANT:
-                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
-                c.mulsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                c.fmul(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::POWER_CONSTANT:
+                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]], pow);
+                break;
+            case Operation::MIN:
+                c.fmin(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
+                break;
+            case Operation::MAX:
+                c.fmax(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
                 break;
             case Operation::ABS:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], fabs);
+                c.fabs(workspaceVar[target[step]], workspaceVar[args[0]]);
                 break;
             case Operation::FLOOR:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], floor);
+                c.frintm(workspaceVar[target[step]], workspaceVar[args[0]]);
                 break;
             case Operation::CEIL:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], ceil);
+                c.frintp(workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::SELECT:
+                c.fcmeq(workspaceVar[target[step]], workspaceVar[args[0]], imm(0));
+                c.bsl(workspaceVar[target[step]], workspaceVar[args[2]], workspaceVar[args[1]]);
                 break;
             default:
                 // Just invoke evaluateOperation().
 
                 for (int i = 0; i < (int) args.size(); i++)
-                    c.movsd(x86::ptr(argsPointer, 8*i, 0), workspaceVar[args[i]]);
-                X86Gp fn = c.newIntPtr();
-                c.mov(fn, imm_ptr((void*) evaluateOperation));
-                CCFuncCall* call = c.call(fn, FuncSignature2<double, Operation*, double*>());
-                call->setArg(0, imm_ptr(&op));
-                call->setArg(1, imm_ptr(&argValues[0]));
-                call->setRet(0, workspaceVar[target[step]]);
+                    c.str(workspaceVar[args[i]], arm::ptr(argsPointer, 8*i));
+                arm::Gp fn = c.newIntPtr();
+                c.mov(fn, imm((void*) evaluateOperation));
+                InvokeNode* invoke;
+                c.invoke(&invoke, fn, FuncSignatureT<double, Operation*, double*>());
+                invoke->setArg(0, imm(&op));
+                invoke->setArg(1, imm(&argValues[0]));
+                invoke->setRet(0, workspaceVar[target[step]]);
         }
     }
     c.ret(workspaceVar[workspace.size()-1]);
@@ -399,20 +494,319 @@ void CompiledExpression::generateJitCode() {
     runtime.add(&jitCode, &code);
 }
 
-void CompiledExpression::generateSingleArgCall(X86Compiler& c, X86Xmm& dest, X86Xmm& arg, double (*function)(double)) {
-    X86Gp fn = c.newIntPtr();
-    c.mov(fn, imm_ptr((void*) function));
-    CCFuncCall* call = c.call(fn, FuncSignature1<double, double>());
-    call->setArg(0, arg);
-    call->setRet(0, dest);
+void CompiledExpression::generateSingleArgCall(a64::Compiler& c, arm::Vec& dest, arm::Vec& arg, double (*function)(double)) {
+    arm::Gp fn = c.newIntPtr();
+    c.mov(fn, imm((void*) function));
+    InvokeNode* invoke;
+    c.invoke(&invoke, fn, FuncSignatureT<double, double>());
+    invoke->setArg(0, arg);
+    invoke->setRet(0, dest);
 }
 
-void CompiledExpression::generateTwoArgCall(X86Compiler& c, X86Xmm& dest, X86Xmm& arg1, X86Xmm& arg2, double (*function)(double, double)) {
-    X86Gp fn = c.newIntPtr();
-    c.mov(fn, imm_ptr((void*) function));
-    CCFuncCall* call = c.call(fn, FuncSignature2<double, double, double>());
-    call->setArg(0, arg1);
-    call->setArg(1, arg2);
-    call->setRet(0, dest);
+void CompiledExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& dest, arm::Vec& arg1, arm::Vec& arg2, double (*function)(double, double)) {
+    arm::Gp fn = c.newIntPtr();
+    c.mov(fn, imm((void*) function));
+    InvokeNode* invoke;
+    c.invoke(&invoke, fn, FuncSignatureT<double, double, double>());
+    invoke->setArg(0, arg1);
+    invoke->setArg(1, arg2);
+    invoke->setRet(0, dest);
+}
+#else
+void CompiledExpression::generateJitCode() {
+    const CpuInfo& cpu = CpuInfo::host();
+    if (!cpu.hasFeature(CpuFeatures::X86::kAVX))
+        return;
+    CodeHolder code;
+    code.init(runtime.environment());
+    x86::Compiler c(&code);
+    FuncNode* funcNode = c.addFunc(FuncSignatureT<double>());
+    funcNode->frame().setAvxEnabled();
+    vector<x86::Xmm> workspaceVar(workspace.size());
+    for (int i = 0; i < (int) workspaceVar.size(); i++)
+        workspaceVar[i] = c.newXmmSd();
+    x86::Gp argsPointer = c.newIntPtr();
+    c.mov(argsPointer, imm(&argValues[0]));
+    vector<vector<int> > groups, groupPowers;
+    vector<int> stepGroup;
+    findPowerGroups(groups, groupPowers, stepGroup);
+
+    // Load the arguments into variables.
+
+    x86::Gp variablePointer = c.newIntPtr();
+    for (set<string>::const_iterator iter = variableNames.begin(); iter != variableNames.end(); ++iter) {
+        map<string, int>::iterator index = variableIndices.find(*iter);
+        c.mov(variablePointer, imm(&getVariableReference(index->first)));
+        c.vmovsd(workspaceVar[index->second], x86::ptr(variablePointer, 0, 0));
+    }
+
+    // Make a list of all constants that will be needed for evaluation.
+
+    vector<int> operationConstantIndex(operation.size(), -1);
+    for (int step = 0; step < (int) operation.size(); step++) {
+        // Find the constant value (if any) used by this operation.
+
+        Operation& op = *operation[step];
+        double value;
+        if (op.getId() == Operation::CONSTANT)
+            value = dynamic_cast<Operation::Constant&>(op).getValue();
+        else if (op.getId() == Operation::ADD_CONSTANT)
+            value = dynamic_cast<Operation::AddConstant&>(op).getValue();
+        else if (op.getId() == Operation::MULTIPLY_CONSTANT)
+            value = dynamic_cast<Operation::MultiplyConstant&>(op).getValue();
+        else if (op.getId() == Operation::RECIPROCAL)
+            value = 1.0;
+        else if (op.getId() == Operation::STEP)
+            value = 1.0;
+        else if (op.getId() == Operation::DELTA)
+            value = 1.0;
+        else if (op.getId() == Operation::ABS) {
+            long long mask = 0x7FFFFFFFFFFFFFFF;
+            value = *reinterpret_cast<double*>(&mask);
+        }
+        else if (op.getId() == Operation::POWER_CONSTANT) {
+            if (stepGroup[step] == -1)
+                value = dynamic_cast<Operation::PowerConstant&>(op).getValue();
+            else
+                value = 1.0;
+        }
+        else
+            continue;
+
+        // See if we already have a variable for this constant.
+
+        for (int i = 0; i < (int) constants.size(); i++)
+            if (value == constants[i]) {
+                operationConstantIndex[step] = i;
+                break;
+            }
+        if (operationConstantIndex[step] == -1) {
+            operationConstantIndex[step] = constants.size();
+            constants.push_back(value);
+        }
+    }
+
+    // Load constants into variables.
+
+    vector<x86::Xmm> constantVar(constants.size());
+    if (constants.size() > 0) {
+        x86::Gp constantsPointer = c.newIntPtr();
+        c.mov(constantsPointer, imm(&constants[0]));
+        for (int i = 0; i < (int) constants.size(); i++) {
+            constantVar[i] = c.newXmmSd();
+            c.vmovsd(constantVar[i], x86::ptr(constantsPointer, 8*i, 0));
+        }
+    }
+
+    // Evaluate the operations.
+
+    vector<bool> hasComputedPower(operation.size(), false);
+    for (int step = 0; step < (int) operation.size(); step++) {
+        if (hasComputedPower[step])
+            continue;
+
+        // When one or more steps involve raising the same argument to multiple integer
+        // powers, we can compute them all together for efficiency.
+
+        if (stepGroup[step] != -1) {
+            vector<int>& group = groups[stepGroup[step]];
+            vector<int>& powers = groupPowers[stepGroup[step]];
+            x86::Xmm multiplier = c.newXmmSd();
+            if (powers[0] > 0)
+                c.vmovsd(multiplier, workspaceVar[arguments[step][0]], workspaceVar[arguments[step][0]]);
+            else {
+                c.vdivsd(multiplier, constantVar[operationConstantIndex[step]], workspaceVar[arguments[step][0]]);
+                for (int i = 0; i < (int)powers.size(); i++)
+                    powers[i] = -powers[i];
+            }
+            vector<bool> hasAssigned(group.size(), false);
+            bool done = false;
+            while (!done) {
+                done = true;
+                for (int i = 0; i < (int)group.size(); i++) {
+                    if (powers[i]%2 == 1) {
+                        if (!hasAssigned[i])
+                            c.vmovsd(workspaceVar[target[group[i]]], multiplier, multiplier);
+                        else
+                            c.vmulsd(workspaceVar[target[group[i]]], workspaceVar[target[group[i]]], multiplier);
+                        hasAssigned[i] = true;
+                    }
+                    powers[i] >>= 1;
+                    if (powers[i] != 0)
+                        done = false;
+                }
+                if (!done)
+                    c.vmulsd(multiplier, multiplier, multiplier);
+            }
+            for (int step : group)
+                hasComputedPower[step] = true;
+            continue;
+        }
+
+        // Evaluate the step.
+
+        Operation& op = *operation[step];
+        vector<int> args = arguments[step];
+        if (args.size() == 1) {
+            // One or more sequential arguments.  Fill out the list.
+
+            for (int i = 1; i < op.getNumArguments(); i++)
+                args.push_back(args[0]+i);
+        }
+
+        // Generate instructions to execute this operation.
+
+        switch (op.getId()) {
+            case Operation::CONSTANT:
+                c.vmovsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::ADD:
+                c.vaddsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
+                break;
+            case Operation::SUBTRACT:
+                c.vsubsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
+                break;
+            case Operation::MULTIPLY:
+                c.vmulsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
+                break;
+            case Operation::DIVIDE:
+                c.vdivsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
+                break;
+            case Operation::POWER:
+                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], pow);
+                break;
+            case Operation::NEGATE:
+                c.vxorps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[target[step]]);
+                c.vsubsd(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::SQRT:
+                c.vsqrtsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
+                break;
+            case Operation::EXP:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], exp);
+                break;
+            case Operation::LOG:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], log);
+                break;
+            case Operation::SIN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sin);
+                break;
+            case Operation::COS:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cos);
+                break;
+            case Operation::TAN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tan);
+                break;
+            case Operation::ASIN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], asin);
+                break;
+            case Operation::ACOS:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], acos);
+                break;
+            case Operation::ATAN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], atan);
+                break;
+            case Operation::ATAN2:
+                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], atan2);
+                break;
+            case Operation::SINH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinh);
+                break;
+            case Operation::COSH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cosh);
+                break;
+            case Operation::TANH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanh);
+                break;
+            case Operation::STEP:
+                c.vxorps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[target[step]]);
+                c.vcmpsd(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]], imm(18)); // Comparison mode is _CMP_LE_OQ = 18
+                c.vandps(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::DELTA:
+                c.vxorps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[target[step]]);
+                c.vcmpsd(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]], imm(16)); // Comparison mode is _CMP_EQ_OS = 16
+                c.vandps(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::SQUARE:
+                c.vmulsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
+                break;
+            case Operation::CUBE:
+                c.vmulsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
+                c.vmulsd(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::RECIPROCAL:
+                c.vdivsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::ADD_CONSTANT:
+                c.vaddsd(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::MULTIPLY_CONSTANT:
+                c.vmulsd(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::POWER_CONSTANT:
+                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]], pow);
+                break;
+            case Operation::MIN:
+                c.vminsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
+                break;
+            case Operation::MAX:
+                c.vmaxsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
+                break;
+            case Operation::ABS:
+                c.vandpd(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::FLOOR:
+                c.vroundsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]], imm(1));
+                break;
+            case Operation::CEIL:
+                c.vroundsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]], imm(2));
+                break;
+            case Operation::SELECT:
+            {
+                x86::Xmm mask = c.newXmmSd();
+                c.vxorps(mask, mask, mask);
+                c.vcmpsd(mask, mask, workspaceVar[args[0]], imm(0)); // Comparison mode is _CMP_EQ_OQ = 0
+                c.vblendvps(workspaceVar[target[step]], workspaceVar[args[1]], workspaceVar[args[2]], mask);
+                break;
+            }
+            default:
+                // Just invoke evaluateOperation().
+
+                for (int i = 0; i < (int) args.size(); i++)
+                    c.vmovsd(x86::ptr(argsPointer, 8*i, 0), workspaceVar[args[i]]);
+                x86::Gp fn = c.newIntPtr();
+                c.mov(fn, imm((void*) evaluateOperation));
+                InvokeNode* invoke;
+                c.invoke(&invoke, fn, FuncSignatureT<double, Operation*, double*>());
+                invoke->setArg(0, imm(&op));
+                invoke->setArg(1, imm(&argValues[0]));
+                invoke->setRet(0, workspaceVar[target[step]]);
+        }
+    }
+    c.ret(workspaceVar[workspace.size()-1]);
+    c.endFunc();
+    c.finalize();
+    runtime.add(&jitCode, &code);
+}
+
+void CompiledExpression::generateSingleArgCall(x86::Compiler& c, x86::Xmm& dest, x86::Xmm& arg, double (*function)(double)) {
+    x86::Gp fn = c.newIntPtr();
+    c.mov(fn, imm((void*) function));
+    InvokeNode* invoke;
+    c.invoke(&invoke, fn, FuncSignatureT<double, double>());
+    invoke->setArg(0, arg);
+    invoke->setRet(0, dest);
+}
+
+void CompiledExpression::generateTwoArgCall(x86::Compiler& c, x86::Xmm& dest, x86::Xmm& arg1, x86::Xmm& arg2, double (*function)(double, double)) {
+    x86::Gp fn = c.newIntPtr();
+    c.mov(fn, imm((void*) function));
+    InvokeNode* invoke;
+    c.invoke(&invoke, fn, FuncSignatureT<double, double, double>());
+    invoke->setArg(0, arg1);
+    invoke->setArg(1, arg2);
+    invoke->setRet(0, dest);
 }
 #endif
+#endif
diff --git a/lib/lepton/src/CompiledVectorExpression.cpp b/lib/lepton/src/CompiledVectorExpression.cpp
new file mode 100644
index 0000000000..7e4dfcad9c
--- /dev/null
+++ b/lib/lepton/src/CompiledVectorExpression.cpp
@@ -0,0 +1,933 @@
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013-2022 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/CompiledVectorExpression.h"
+#include "lepton/Operation.h"
+#include "lepton/ParsedExpression.h"
+#include <algorithm>
+#include <utility>
+
+using namespace LMP_Lepton;
+using namespace std;
+#ifdef LEPTON_USE_JIT
+using namespace asmjit;
+#endif
+
+CompiledVectorExpression::CompiledVectorExpression() : jitCode(NULL) {
+}
+
+CompiledVectorExpression::CompiledVectorExpression(const ParsedExpression& expression, int width) : width(width), jitCode(NULL) {
+    const vector<int> allowedWidths = getAllowedWidths();
+    if (find(allowedWidths.begin(), allowedWidths.end(), width) == allowedWidths.end())
+        throw Exception("Unsupported width for vector expression: "+to_string(width));
+    ParsedExpression expr = expression.optimize(); // Just in case it wasn't already optimized.
+    vector<pair<ExpressionTreeNode, int> > temps;
+    int workspaceSize = 0;
+    compileExpression(expr.getRootNode(), temps, workspaceSize);
+    workspace.resize(workspaceSize*width);
+    int maxArguments = 1;
+    for (int i = 0; i < (int) operation.size(); i++)
+        if (operation[i]->getNumArguments() > maxArguments)
+            maxArguments = operation[i]->getNumArguments();
+    argValues.resize(maxArguments);
+#ifdef LEPTON_USE_JIT
+    generateJitCode();
+#endif
+}
+
+CompiledVectorExpression::~CompiledVectorExpression() {
+    for (int i = 0; i < (int) operation.size(); i++)
+        if (operation[i] != NULL)
+            delete operation[i];
+}
+
+CompiledVectorExpression::CompiledVectorExpression(const CompiledVectorExpression& expression) : jitCode(NULL) {
+    *this = expression;
+}
+
+CompiledVectorExpression& CompiledVectorExpression::operator=(const CompiledVectorExpression& expression) {
+    arguments = expression.arguments;
+    width = expression.width;
+    target = expression.target;
+    variableIndices = expression.variableIndices;
+    variableNames = expression.variableNames;
+    workspace.resize(expression.workspace.size());
+    argValues.resize(expression.argValues.size());
+    operation.resize(expression.operation.size());
+    for (int i = 0; i < (int) operation.size(); i++)
+        operation[i] = expression.operation[i]->clone();
+    setVariableLocations(variablePointers);
+    return *this;
+}
+
+const vector<int>& CompiledVectorExpression::getAllowedWidths() {
+    static vector<int> widths;
+    if (widths.size() == 0) {
+        widths.push_back(4);
+#ifdef LEPTON_USE_JIT
+        const CpuInfo& cpu = CpuInfo::host();
+        if (cpu.hasFeature(CpuFeatures::X86::kAVX))
+            widths.push_back(8);
+#endif
+    }
+    return widths;
+}
+
+void CompiledVectorExpression::compileExpression(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps, int& workspaceSize) {
+    if (findTempIndex(node, temps) != -1)
+        return; // We have already processed a node identical to this one.
+
+    // Process the child nodes.
+
+    vector<int> args;
+    for (int i = 0; i < (int)node.getChildren().size(); i++) {
+        compileExpression(node.getChildren()[i], temps, workspaceSize);
+        args.push_back(findTempIndex(node.getChildren()[i], temps));
+    }
+
+    // Process this node.
+
+    if (node.getOperation().getId() == Operation::VARIABLE) {
+        variableIndices[node.getOperation().getName()] = workspaceSize;
+        variableNames.insert(node.getOperation().getName());
+    }
+    else {
+        int stepIndex = (int) arguments.size();
+        arguments.push_back(vector<int>());
+        target.push_back(workspaceSize);
+        operation.push_back(node.getOperation().clone());
+        if (args.size() == 0)
+            arguments[stepIndex].push_back(0); // The value won't actually be used.  We just need something there.
+        else {
+            // If the arguments are sequential, we can just pass a pointer to the first one.
+
+            bool sequential = true;
+            for (int i = 1; i < (int)args.size(); i++)
+                if (args[i] != args[i - 1] + 1)
+                    sequential = false;
+            if (sequential)
+                arguments[stepIndex].push_back(args[0]);
+            else
+                arguments[stepIndex] = args;
+        }
+    }
+    temps.push_back(make_pair(node, workspaceSize));
+    workspaceSize++;
+}
+
+int CompiledVectorExpression::findTempIndex(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps) {
+    for (int i = 0; i < (int) temps.size(); i++)
+        if (temps[i].first == node)
+            return i;
+    return -1;
+}
+
+int CompiledVectorExpression::getWidth() const {
+    return width;
+}
+
+const set<string>& CompiledVectorExpression::getVariables() const {
+    return variableNames;
+}
+
+float* CompiledVectorExpression::getVariablePointer(const string& name) {
+    map<string, float*>::iterator pointer = variablePointers.find(name);
+    if (pointer != variablePointers.end())
+        return pointer->second;
+    map<string, int>::iterator index = variableIndices.find(name);
+    if (index == variableIndices.end())
+        throw Exception("getVariableReference: Unknown variable '" + name + "'");
+    return &workspace[index->second*width];
+}
+
+void CompiledVectorExpression::setVariableLocations(map<string, float*>& variableLocations) {
+    variablePointers = variableLocations;
+#ifdef LEPTON_USE_JIT
+    // Rebuild the JIT code.
+
+    if (workspace.size() > 0)
+        generateJitCode();
+#endif
+    // Make a list of all variables we will need to copy before evaluating the expression.
+
+    variablesToCopy.clear();
+    for (map<string, int>::const_iterator iter = variableIndices.begin(); iter != variableIndices.end(); ++iter) {
+        map<string, float*>::iterator pointer = variablePointers.find(iter->first);
+        if (pointer != variablePointers.end())
+            variablesToCopy.push_back(make_pair(&workspace[iter->second*width], pointer->second));
+    }
+}
+
+const float* CompiledVectorExpression::evaluate() const {
+    if (jitCode) {
+        jitCode();
+        return &workspace[workspace.size()-width];
+    }
+    for (int i = 0; i < (int)variablesToCopy.size(); i++)
+        for (int j = 0; j < width; j++)
+            variablesToCopy[i].first[j] = variablesToCopy[i].second[j];
+
+    // Loop over the operations and evaluate each one.
+
+    for (int step = 0; step < (int)operation.size(); step++) {
+        const vector<int>& args = arguments[step];
+        if (args.size() == 1) {
+            for (int j = 0; j < width; j++) {
+                for (int i = 0; i < operation[step]->getNumArguments(); i++)
+                    argValues[i] = workspace[(args[0]+i)*width+j];
+                workspace[target[step]*width+j] = operation[step]->evaluate(&argValues[0], dummyVariables);
+            }
+        } else {
+            for (int j = 0; j < width; j++) {
+              for (int i = 0; i < (int)args.size(); i++)
+                    argValues[i] = workspace[args[i]*width+j];
+                workspace[target[step]*width+j] = operation[step]->evaluate(&argValues[0], dummyVariables);
+            }
+        }
+    }
+    return &workspace[workspace.size()-width];
+}
+
+#ifdef LEPTON_USE_JIT
+
+static double evaluateOperation(Operation* op, double* args) {
+    static map<string, double> dummyVariables;
+    return op->evaluate(args, dummyVariables);
+}
+
+void CompiledVectorExpression::findPowerGroups(vector<vector<int> >& groups, vector<vector<int> >& groupPowers, vector<int>& stepGroup) {
+    // Identify every step that raises an argument to an integer power.
+
+    vector<int> stepPower(operation.size(), 0);
+    vector<int> stepArg(operation.size(), -1);
+    for (int step = 0; step < (int)operation.size(); step++) {
+        Operation& op = *operation[step];
+        int power = 0;
+        if (op.getId() == Operation::SQUARE)
+            power = 2;
+        else if (op.getId() == Operation::CUBE)
+            power = 3;
+        else if (op.getId() == Operation::POWER_CONSTANT) {
+            double realPower = dynamic_cast<const Operation::PowerConstant*> (&op)->getValue();
+            if (realPower == (int) realPower)
+                power = (int) realPower;
+        }
+        if (power != 0) {
+            stepPower[step] = power;
+            stepArg[step] = arguments[step][0];
+        }
+    }
+
+    // Find groups that operate on the same argument and whose powers have the same sign.
+
+    stepGroup.resize(operation.size(), -1);
+    for (int i = 0; i < (int)operation.size(); i++) {
+        if (stepGroup[i] != -1)
+            continue;
+        vector<int> group, power;
+        for (int j = i; j < (int)operation.size(); j++) {
+            if (stepArg[i] == stepArg[j] && stepPower[i] * stepPower[j] > 0) {
+                stepGroup[j] = groups.size();
+                group.push_back(j);
+                power.push_back(stepPower[j]);
+            }
+        }
+        groups.push_back(group);
+        groupPowers.push_back(power);
+    }
+}
+
+#if defined(__ARM__) || defined(__ARM64__)
+
+void CompiledVectorExpression::generateJitCode() {
+    CodeHolder code;
+    code.init(runtime.environment());
+    a64::Compiler c(&code);
+    c.addFunc(FuncSignatureT<void>());
+    vector<arm::Vec> workspaceVar(workspace.size()/width);
+    for (int i = 0; i < (int) workspaceVar.size(); i++)
+        workspaceVar[i] = c.newVecQ();
+    arm::Gp argsPointer = c.newIntPtr();
+    c.mov(argsPointer, imm(&argValues[0]));
+    vector<vector<int> > groups, groupPowers;
+    vector<int> stepGroup;
+    findPowerGroups(groups, groupPowers, stepGroup);
+
+    // Load the arguments into variables.
+
+    arm::Gp variablePointer = c.newIntPtr();
+    for (set<string>::const_iterator iter = variableNames.begin(); iter != variableNames.end(); ++iter) {
+        map<string, int>::iterator index = variableIndices.find(*iter);
+        c.mov(variablePointer, imm(getVariablePointer(index->first)));
+        c.ldr(workspaceVar[index->second].s4(), arm::ptr(variablePointer, 0));
+    }
+
+    // Make a list of all constants that will be needed for evaluation.
+
+    vector<int> operationConstantIndex(operation.size(), -1);
+    for (int step = 0; step < (int) operation.size(); step++) {
+        // Find the constant value (if any) used by this operation.
+
+        Operation& op = *operation[step];
+        float value;
+        if (op.getId() == Operation::CONSTANT)
+            value = dynamic_cast<Operation::Constant&> (op).getValue();
+        else if (op.getId() == Operation::ADD_CONSTANT)
+            value = dynamic_cast<Operation::AddConstant&> (op).getValue();
+        else if (op.getId() == Operation::MULTIPLY_CONSTANT)
+            value = dynamic_cast<Operation::MultiplyConstant&> (op).getValue();
+        else if (op.getId() == Operation::RECIPROCAL)
+            value = 1.0;
+        else if (op.getId() == Operation::STEP)
+            value = 1.0;
+        else if (op.getId() == Operation::DELTA)
+            value = 1.0;
+        else if (op.getId() == Operation::POWER_CONSTANT) {
+            if (stepGroup[step] == -1)
+                value = dynamic_cast<Operation::PowerConstant&> (op).getValue();
+            else
+                value = 1.0;
+        } else
+            continue;
+
+        // See if we already have a variable for this constant.
+
+        for (int i = 0; i < (int) constants.size(); i++)
+            if (value == constants[i]) {
+                operationConstantIndex[step] = i;
+                break;
+            }
+        if (operationConstantIndex[step] == -1) {
+            operationConstantIndex[step] = constants.size();
+            constants.push_back(value);
+        }
+    }
+
+    // Load constants into variables.
+
+    vector<arm::Vec> constantVar(constants.size());
+    if (constants.size() > 0) {
+        arm::Gp constantsPointer = c.newIntPtr();
+        for (int i = 0; i < (int) constants.size(); i++) {
+            c.mov(constantsPointer, imm(&constants[i]));
+            constantVar[i] = c.newVecQ();
+            c.ld1r(constantVar[i].s4(), arm::ptr(constantsPointer));
+        }
+    }
+
+    // Evaluate the operations.
+
+    vector<bool> hasComputedPower(operation.size(), false);
+    arm::Vec argReg = c.newVecS();
+    arm::Vec doubleArgReg = c.newVecD();
+    arm::Vec doubleResultReg = c.newVecD();
+    for (int step = 0; step < (int) operation.size(); step++) {
+        if (hasComputedPower[step])
+            continue;
+
+        // When one or more steps involve raising the same argument to multiple integer
+        // powers, we can compute them all together for efficiency.
+
+        if (stepGroup[step] != -1) {
+            vector<int>& group = groups[stepGroup[step]];
+            vector<int>& powers = groupPowers[stepGroup[step]];
+            arm::Vec multiplier = c.newVecQ();
+            if (powers[0] > 0)
+                c.mov(multiplier.s4(), workspaceVar[arguments[step][0]].s4());
+            else {
+                c.fdiv(multiplier.s4(), constantVar[operationConstantIndex[step]].s4(), workspaceVar[arguments[step][0]].s4());
+                for (int i = 0; i < powers.size(); i++)
+                    powers[i] = -powers[i];
+            }
+            vector<bool> hasAssigned(group.size(), false);
+            bool done = false;
+            while (!done) {
+                done = true;
+                for (int i = 0; i < group.size(); i++) {
+                    if (powers[i] % 2 == 1) {
+                        if (!hasAssigned[i])
+                            c.mov(workspaceVar[target[group[i]]].s4(), multiplier.s4());
+                        else
+                            c.fmul(workspaceVar[target[group[i]]].s4(), workspaceVar[target[group[i]]].s4(), multiplier.s4());
+                        hasAssigned[i] = true;
+                    }
+                    powers[i] >>= 1;
+                    if (powers[i] != 0)
+                        done = false;
+                }
+                if (!done)
+                    c.fmul(multiplier.s4(), multiplier.s4(), multiplier.s4());
+            }
+            for (int step : group)
+                hasComputedPower[step] = true;
+            continue;
+        }
+
+        // Evaluate the step.
+
+        Operation& op = *operation[step];
+        vector<int> args = arguments[step];
+        if (args.size() == 1) {
+            // One or more sequential arguments.  Fill out the list.
+
+            for (int i = 1; i < op.getNumArguments(); i++)
+                args.push_back(args[0] + i);
+        }
+
+        // Generate instructions to execute this operation.
+
+        switch (op.getId()) {
+            case Operation::CONSTANT:
+                c.mov(workspaceVar[target[step]].s4(), constantVar[operationConstantIndex[step]].s4());
+                break;
+            case Operation::ADD:
+                c.fadd(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[1]].s4());
+                break;
+            case Operation::SUBTRACT:
+                c.fsub(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[1]].s4());
+                break;
+            case Operation::MULTIPLY:
+                c.fmul(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[1]].s4());
+                break;
+            case Operation::DIVIDE:
+                c.fdiv(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[1]].s4());
+                break;
+            case Operation::POWER:
+                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], powf);
+                break;
+            case Operation::NEGATE:
+                c.fneg(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4());
+                break;
+            case Operation::SQRT:
+                c.fsqrt(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4());
+                break;
+            case Operation::EXP:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], expf);
+                break;
+            case Operation::LOG:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], logf);
+                break;
+            case Operation::SIN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinf);
+                break;
+            case Operation::COS:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cosf);
+                break;
+            case Operation::TAN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanf);
+                break;
+            case Operation::ASIN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], asinf);
+                break;
+            case Operation::ACOS:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], acosf);
+                break;
+            case Operation::ATAN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], atanf);
+                break;
+            case Operation::ATAN2:
+                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], atan2f);
+                break;
+            case Operation::SINH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinhf);
+                break;
+            case Operation::COSH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], coshf);
+                break;
+            case Operation::TANH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanhf);
+                break;
+            case Operation::STEP:
+                c.cmge(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), imm(0));
+                c.and_(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::DELTA:
+                c.cmeq(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), imm(0));
+                c.and_(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::SQUARE:
+                c.fmul(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[0]].s4());
+                break;
+            case Operation::CUBE:
+                c.fmul(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[0]].s4());
+                c.fmul(workspaceVar[target[step]].s4(), workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4());
+                break;
+            case Operation::RECIPROCAL:
+                c.fdiv(workspaceVar[target[step]].s4(), constantVar[operationConstantIndex[step]].s4(), workspaceVar[args[0]].s4());
+                break;
+            case Operation::ADD_CONSTANT:
+                c.fadd(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), constantVar[operationConstantIndex[step]].s4());
+                break;
+            case Operation::MULTIPLY_CONSTANT:
+                c.fmul(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), constantVar[operationConstantIndex[step]].s4());
+                break;
+            case Operation::POWER_CONSTANT:
+                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]], powf);
+                break;
+            case Operation::MIN:
+                c.fmin(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[1]].s4());
+                break;
+            case Operation::MAX:
+                c.fmax(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[1]].s4());
+                break;
+            case Operation::ABS:
+                c.fabs(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4());
+                break;
+            case Operation::FLOOR:
+                c.frintm(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4());
+                break;
+            case Operation::CEIL:
+                c.frintp(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4());
+                break;
+            case Operation::SELECT:
+                c.fcmeq(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), imm(0));
+                c.bsl(workspaceVar[target[step]], workspaceVar[args[2]], workspaceVar[args[1]]);
+                break;
+            default:
+                // Just invoke evaluateOperation().
+                for (int element = 0; element < width; element++) {
+                    for (int i = 0; i < (int) args.size(); i++) {
+                        c.ins(argReg.s(0), workspaceVar[args[i]].s(element));
+                        c.fcvt(doubleArgReg, argReg);
+                        c.str(doubleArgReg, arm::ptr(argsPointer, 8*i));
+                    }
+                    arm::Gp fn = c.newIntPtr();
+                    c.mov(fn, imm((void*) evaluateOperation));
+                    InvokeNode* invoke;
+                    c.invoke(&invoke, fn, FuncSignatureT<double, Operation*, double*>());
+                    invoke->setArg(0, imm(&op));
+                    invoke->setArg(1, imm(&argValues[0]));
+                    invoke->setRet(0, doubleResultReg);
+                    c.fcvt(argReg, doubleResultReg);
+                    c.ins(workspaceVar[target[step]].s(element), argReg.s(0));
+                }
+        }
+    }
+    arm::Gp resultPointer = c.newIntPtr();
+    c.mov(resultPointer, imm(&workspace[workspace.size()-width]));
+    c.str(workspaceVar.back().s4(), arm::ptr(resultPointer, 0));
+    c.endFunc();
+    c.finalize();
+    runtime.add(&jitCode, &code);
+}
+
+void CompiledVectorExpression::generateSingleArgCall(a64::Compiler& c, arm::Vec& dest, arm::Vec& arg, float (*function)(float)) {
+    arm::Gp fn = c.newIntPtr();
+    c.mov(fn, imm((void*) function));
+    arm::Vec a = c.newVecS();
+    arm::Vec d = c.newVecS();
+    for (int element = 0; element < width; element++) {
+        c.ins(a.s(0), arg.s(element));
+        InvokeNode* invoke;
+        c.invoke(&invoke, fn, FuncSignatureT<float, float>());
+        invoke->setArg(0, a);
+        invoke->setRet(0, d);
+        c.ins(dest.s(element), d.s(0));
+    }
+}
+
+void CompiledVectorExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& dest, arm::Vec& arg1, arm::Vec& arg2, float (*function)(float, float)) {
+    arm::Gp fn = c.newIntPtr();
+    c.mov(fn, imm((void*) function));
+    arm::Vec a1 = c.newVecS();
+    arm::Vec a2 = c.newVecS();
+    arm::Vec d = c.newVecS();
+    for (int element = 0; element < width; element++) {
+        c.ins(a1.s(0), arg1.s(element));
+        c.ins(a2.s(0), arg2.s(element));
+        InvokeNode* invoke;
+        c.invoke(&invoke, fn, FuncSignatureT<float, float, float>());
+        invoke->setArg(0, a1);
+        invoke->setArg(1, a2);
+        invoke->setRet(0, d);
+        c.ins(dest.s(element), d.s(0));
+    }
+}
+#else
+
+void CompiledVectorExpression::generateJitCode() {
+    const CpuInfo& cpu = CpuInfo::host();
+    if (!cpu.hasFeature(CpuFeatures::X86::kAVX))
+        return;
+    CodeHolder code;
+    code.init(runtime.environment());
+    x86::Compiler c(&code);
+    FuncNode* funcNode = c.addFunc(FuncSignatureT<void>());
+    funcNode->frame().setAvxEnabled();
+    vector<x86::Ymm> workspaceVar(workspace.size()/width);
+    for (int i = 0; i < (int) workspaceVar.size(); i++)
+        workspaceVar[i] = c.newYmmPs();
+    x86::Gp argsPointer = c.newIntPtr();
+    c.mov(argsPointer, imm(&argValues[0]));
+    vector<vector<int> > groups, groupPowers;
+    vector<int> stepGroup;
+    findPowerGroups(groups, groupPowers, stepGroup);
+
+    // Load the arguments into variables.
+
+    for (set<string>::const_iterator iter = variableNames.begin(); iter != variableNames.end(); ++iter) {
+        map<string, int>::iterator index = variableIndices.find(*iter);
+        x86::Gp variablePointer = c.newIntPtr();
+        c.mov(variablePointer, imm(getVariablePointer(index->first)));
+        if (width == 4)
+            c.vmovdqu(workspaceVar[index->second].xmm(), x86::ptr(variablePointer, 0, 0));
+        else
+            c.vmovdqu(workspaceVar[index->second], x86::ptr(variablePointer, 0, 0));
+    }
+
+    // Make a list of all constants that will be needed for evaluation.
+
+    vector<int> operationConstantIndex(operation.size(), -1);
+    for (int step = 0; step < (int) operation.size(); step++) {
+        // Find the constant value (if any) used by this operation.
+
+        Operation& op = *operation[step];
+        double value;
+        if (op.getId() == Operation::CONSTANT)
+            value = dynamic_cast<Operation::Constant&> (op).getValue();
+        else if (op.getId() == Operation::ADD_CONSTANT)
+            value = dynamic_cast<Operation::AddConstant&> (op).getValue();
+        else if (op.getId() == Operation::MULTIPLY_CONSTANT)
+            value = dynamic_cast<Operation::MultiplyConstant&> (op).getValue();
+        else if (op.getId() == Operation::RECIPROCAL)
+            value = 1.0;
+        else if (op.getId() == Operation::STEP)
+            value = 1.0;
+        else if (op.getId() == Operation::DELTA)
+            value = 1.0;
+        else if (op.getId() == Operation::ABS) {
+            int mask = 0x7FFFFFFF;
+            value = *reinterpret_cast<float*>(&mask);
+        }
+        else if (op.getId() == Operation::POWER_CONSTANT) {
+            if (stepGroup[step] == -1)
+                value = dynamic_cast<Operation::PowerConstant&> (op).getValue();
+            else
+                value = 1.0;
+        } else
+            continue;
+
+        // See if we already have a variable for this constant.
+
+        for (int i = 0; i < (int) constants.size(); i++)
+            if (value == constants[i]) {
+                operationConstantIndex[step] = i;
+                break;
+            }
+        if (operationConstantIndex[step] == -1) {
+            operationConstantIndex[step] = constants.size();
+            constants.push_back(value);
+        }
+    }
+
+    // Load constants into variables.
+
+    vector<x86::Ymm> constantVar(constants.size());
+    if (constants.size() > 0) {
+        x86::Gp constantsPointer = c.newIntPtr();
+        c.mov(constantsPointer, imm(&constants[0]));
+        for (int i = 0; i < (int) constants.size(); i++) {
+            constantVar[i] = c.newYmmPs();
+            c.vbroadcastss(constantVar[i], x86::ptr(constantsPointer, 4*i, 0));
+        }
+    }
+
+    // Evaluate the operations.
+
+    vector<bool> hasComputedPower(operation.size(), false);
+    x86::Ymm argReg = c.newYmm();
+    x86::Ymm doubleArgReg = c.newYmm();
+    x86::Ymm doubleResultReg = c.newYmm();
+    for (int step = 0; step < (int) operation.size(); step++) {
+        if (hasComputedPower[step])
+            continue;
+
+        // When one or more steps involve raising the same argument to multiple integer
+        // powers, we can compute them all together for efficiency.
+
+        if (stepGroup[step] != -1) {
+            vector<int>& group = groups[stepGroup[step]];
+            vector<int>& powers = groupPowers[stepGroup[step]];
+            x86::Ymm multiplier = c.newYmmPs();
+            if (powers[0] > 0)
+                c.vmovdqu(multiplier, workspaceVar[arguments[step][0]]);
+            else {
+                c.vdivps(multiplier, constantVar[operationConstantIndex[step]], workspaceVar[arguments[step][0]]);
+                for (int i = 0; i < (int)powers.size(); i++)
+                    powers[i] = -powers[i];
+            }
+            vector<bool> hasAssigned(group.size(), false);
+            bool done = false;
+            while (!done) {
+                done = true;
+                for (int i = 0; i < (int)group.size(); i++) {
+                    if (powers[i] % 2 == 1) {
+                        if (!hasAssigned[i])
+                            c.vmovdqu(workspaceVar[target[group[i]]], multiplier);
+                        else
+                            c.vmulps(workspaceVar[target[group[i]]], workspaceVar[target[group[i]]], multiplier);
+                        hasAssigned[i] = true;
+                    }
+                    powers[i] >>= 1;
+                    if (powers[i] != 0)
+                        done = false;
+                }
+                if (!done)
+                    c.vmulps(multiplier, multiplier, multiplier);
+            }
+            for (int step : group)
+                hasComputedPower[step] = true;
+            continue;
+        }
+
+        // Evaluate the step.
+
+        Operation& op = *operation[step];
+        vector<int> args = arguments[step];
+        if (args.size() == 1) {
+            // One or more sequential arguments.  Fill out the list.
+
+            for (int i = 1; i < op.getNumArguments(); i++)
+                args.push_back(args[0] + i);
+        }
+
+        // Generate instructions to execute this operation.
+
+        switch (op.getId()) {
+            case Operation::CONSTANT:
+                c.vmovdqu(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::ADD:
+                c.vaddps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
+                break;
+            case Operation::SUBTRACT:
+                c.vsubps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
+                break;
+            case Operation::MULTIPLY:
+                c.vmulps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
+                break;
+            case Operation::DIVIDE:
+                c.vdivps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
+                break;
+            case Operation::POWER:
+                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], powf);
+                break;
+            case Operation::NEGATE:
+                c.vxorps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[target[step]]);
+                c.vsubps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::SQRT:
+                c.vsqrtps(workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::EXP:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], expf);
+                break;
+            case Operation::LOG:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], logf);
+                break;
+            case Operation::SIN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinf);
+                break;
+            case Operation::COS:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cosf);
+                break;
+            case Operation::TAN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanf);
+                break;
+            case Operation::ASIN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], asinf);
+                break;
+            case Operation::ACOS:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], acosf);
+                break;
+            case Operation::ATAN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], atanf);
+                break;
+            case Operation::ATAN2:
+                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], atan2f);
+                break;
+            case Operation::SINH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinhf);
+                break;
+            case Operation::COSH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], coshf);
+                break;
+            case Operation::TANH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanhf);
+                break;
+            case Operation::STEP:
+                c.vxorps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[target[step]]);
+                c.vcmpps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]], imm(18)); // Comparison mode is _CMP_LE_OQ = 18
+                c.vandps(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::DELTA:
+                c.vxorps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[target[step]]);
+                c.vcmpps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]], imm(16)); // Comparison mode is _CMP_EQ_OQ = 0
+                c.vandps(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::SQUARE:
+                c.vmulps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
+                break;
+            case Operation::CUBE:
+                c.vmulps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
+                c.vmulps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::RECIPROCAL:
+                c.vdivps(workspaceVar[target[step]], constantVar[operationConstantIndex[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::ADD_CONSTANT:
+                c.vaddps(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::MULTIPLY_CONSTANT:
+                c.vmulps(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::POWER_CONSTANT:
+                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]], powf);
+                break;
+            case Operation::MIN:
+                c.vminps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
+                break;
+            case Operation::MAX:
+                c.vmaxps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
+                break;
+            case Operation::ABS:
+                c.vandps(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::FLOOR:
+                c.vroundps(workspaceVar[target[step]], workspaceVar[args[0]], imm(1));
+                break;
+            case Operation::CEIL:
+                c.vroundps(workspaceVar[target[step]], workspaceVar[args[0]], imm(2));
+                break;
+            case Operation::SELECT:
+            {
+                x86::Ymm mask = c.newYmmPs();
+                c.vxorps(mask, mask, mask);
+                c.vcmpps(mask, mask, workspaceVar[args[0]], imm(0)); // Comparison mode is _CMP_EQ_OQ = 0
+                c.vblendvps(workspaceVar[target[step]], workspaceVar[args[1]], workspaceVar[args[2]], mask);
+                break;
+            }
+            default:
+                // Just invoke evaluateOperation().
+
+                for (int element = 0; element < width; element++) {
+                    for (int i = 0; i < (int) args.size(); i++) {
+                        if (element < 4)
+                            c.vshufps(argReg, workspaceVar[args[i]], workspaceVar[args[i]], imm(element));
+                        else {
+                            c.vperm2f128(argReg, workspaceVar[args[i]], workspaceVar[args[i]], imm(1));
+                            c.vshufps(argReg, argReg, argReg, imm(element-4));
+                        }
+                        c.vcvtss2sd(doubleArgReg.xmm(), doubleArgReg.xmm(), argReg.xmm());
+                        c.vmovsd(x86::ptr(argsPointer, 8*i, 0), doubleArgReg.xmm());
+                    }
+                    x86::Gp fn = c.newIntPtr();
+                    c.mov(fn, imm((void*) evaluateOperation));
+                    InvokeNode* invoke;
+                    c.invoke(&invoke, fn, FuncSignatureT<double, Operation*, double*>());
+                    invoke->setArg(0, imm(&op));
+                    invoke->setArg(1, imm(&argValues[0]));
+                    invoke->setRet(0, doubleResultReg);
+                    c.vcvtsd2ss(argReg.xmm(), argReg.xmm(), doubleResultReg.xmm());
+                    if (element > 3)
+                        c.vperm2f128(argReg, argReg, argReg, imm(0));
+                    if (element != 0)
+                        c.vshufps(argReg, argReg, argReg, imm(0));
+                    c.vblendps(workspaceVar[target[step]], workspaceVar[target[step]], argReg, 1<<element);
+                }
+        }
+    }
+    x86::Gp resultPointer = c.newIntPtr();
+    c.mov(resultPointer, imm(&workspace[workspace.size()-width]));
+    if (width == 4)
+        c.vmovdqu(x86::ptr(resultPointer, 0, 0), workspaceVar.back().xmm());
+    else
+        c.vmovdqu(x86::ptr(resultPointer, 0, 0), workspaceVar.back());
+    c.endFunc();
+    c.finalize();
+    runtime.add(&jitCode, &code);
+}
+
+void CompiledVectorExpression::generateSingleArgCall(x86::Compiler& c, x86::Ymm& dest, x86::Ymm& arg, float (*function)(float)) {
+    x86::Gp fn = c.newIntPtr();
+    c.mov(fn, imm((void*) function));
+    x86::Ymm a = c.newYmm();
+    x86::Ymm d = c.newYmm();
+    for (int element = 0; element < width; element++) {
+        if (element < 4)
+            c.vshufps(a, arg, arg, imm(element));
+        else {
+            c.vperm2f128(a, arg, arg, imm(1));
+            c.vshufps(a, a, a, imm(element-4));
+        }
+        InvokeNode* invoke;
+        c.invoke(&invoke, fn, FuncSignatureT<float, float>());
+        invoke->setArg(0, a);
+        invoke->setRet(0, d);
+        if (element > 3)
+            c.vperm2f128(d, d, d, imm(0));
+        if (element != 0)
+            c.vshufps(d, d, d, imm(0));
+        c.vblendps(dest, dest, d, 1<<element);
+    }
+}
+
+void CompiledVectorExpression::generateTwoArgCall(x86::Compiler& c, x86::Ymm& dest, x86::Ymm& arg1, x86::Ymm& arg2, float (*function)(float, float)) {
+    x86::Gp fn = c.newIntPtr();
+    c.mov(fn, imm((void*) function));
+    x86::Ymm a1 = c.newYmm();
+    x86::Ymm a2 = c.newYmm();
+    x86::Ymm d = c.newYmm();
+    for (int element = 0; element < width; element++) {
+        if (element < 4) {
+            c.vshufps(a1, arg1, arg1, imm(element));
+            c.vshufps(a2, arg2, arg2, imm(element));
+        }
+        else {
+            c.vperm2f128(a1, arg1, arg1, imm(1));
+            c.vperm2f128(a2, arg2, arg2, imm(1));
+            c.vshufps(a1, a1, a1, imm(element-4));
+            c.vshufps(a2, a2, a2, imm(element-4));
+        }
+        InvokeNode* invoke;
+        c.invoke(&invoke, fn, FuncSignatureT<float, float, float>());
+        invoke->setArg(0, a1);
+        invoke->setArg(1, a2);
+        invoke->setRet(0, d);
+        if (element > 3)
+            c.vperm2f128(d, d, d, imm(0));
+        if (element != 0)
+            c.vshufps(d, d, d, imm(0));
+        c.vblendps(dest, dest, d, 1<<element);
+    }
+}
+#endif
+#endif
diff --git a/lib/lepton/src/ExpressionTreeNode.cpp b/lib/lepton/src/ExpressionTreeNode.cpp
index 90020aa373..3b34a0a1c0 100644
--- a/lib/lepton/src/ExpressionTreeNode.cpp
+++ b/lib/lepton/src/ExpressionTreeNode.cpp
@@ -6,7 +6,7 @@
  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
  * Medical Research, grant U54 GM072970. See https://simtk.org.               *
  *                                                                            *
- * Portions copyright (c) 2009-2015 Stanford University and the Authors.      *
+ * Portions copyright (c) 2009-2021 Stanford University and the Authors.      *
  * Authors: Peter Eastman                                                     *
  * Contributors:                                                              *
  *                                                                            *
@@ -32,6 +32,7 @@
 #include "lepton/ExpressionTreeNode.h"
 #include "lepton/Exception.h"
 #include "lepton/Operation.h"
+#include <utility>
 
 using namespace LMP_Lepton;
 using namespace std;
@@ -62,6 +63,11 @@ ExpressionTreeNode::ExpressionTreeNode(Operation* operation) : operation(operati
 ExpressionTreeNode::ExpressionTreeNode(const ExpressionTreeNode& node) : operation(node.operation == NULL ? NULL : node.operation->clone()), children(node.getChildren()) {
 }
 
+ExpressionTreeNode::ExpressionTreeNode(ExpressionTreeNode&& node) : operation(node.operation), children(move(node.children)) {
+    node.operation = NULL;
+    node.children.clear();
+}
+
 ExpressionTreeNode::ExpressionTreeNode() : operation(NULL) {
 }
 
@@ -98,6 +104,16 @@ ExpressionTreeNode& ExpressionTreeNode::operator=(const ExpressionTreeNode& node
     return *this;
 }
 
+ExpressionTreeNode& ExpressionTreeNode::operator=(ExpressionTreeNode&& node) {
+    if (operation != NULL)
+        delete operation;
+    operation = node.operation;
+    children = move(node.children);
+    node.operation = NULL;
+    node.children.clear();
+    return *this;
+}
+
 const Operation& ExpressionTreeNode::getOperation() const {
     return *operation;
 }
@@ -105,3 +121,33 @@ const Operation& ExpressionTreeNode::getOperation() const {
 const vector<ExpressionTreeNode>& ExpressionTreeNode::getChildren() const {
     return children;
 }
+
+void ExpressionTreeNode::assignTags(vector<const ExpressionTreeNode*>& examples) const {
+    // Assign tag values to all nodes in a tree, such that two nodes have the same
+    // tag if and only if they (and all their children) are equal.  This is used to
+    // optimize other operations.
+
+    int numTags = examples.size();
+    for (const ExpressionTreeNode& child : getChildren())
+        child.assignTags(examples);
+    if (numTags == (int)examples.size()) {
+        // All the children matched existing tags, so possibly this node does too.
+
+      for (int i = 0; i < (int)examples.size(); i++) {
+            const ExpressionTreeNode& example = *examples[i];
+            bool matches = (getChildren().size() == example.getChildren().size() && getOperation() == example.getOperation());
+            for (int j = 0; matches && j < (int)getChildren().size(); j++)
+                if (getChildren()[j].tag != example.getChildren()[j].tag)
+                    matches = false;
+            if (matches) {
+                tag = i;
+                return;
+            }
+        }
+    }
+
+    // This node does not match any previous node, so assign a new tag.
+
+    tag = examples.size();
+    examples.push_back(this);
+}
diff --git a/lib/lepton/src/Operation.cpp b/lib/lepton/src/Operation.cpp
index bec5686a74..08deff8584 100644
--- a/lib/lepton/src/Operation.cpp
+++ b/lib/lepton/src/Operation.cpp
@@ -7,7 +7,7 @@
  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
  * Medical Research, grant U54 GM072970. See https://simtk.org.               *
  *                                                                            *
- * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
+ * Portions copyright (c) 2009-2021 Stanford University and the Authors.      *
  * Authors: Peter Eastman                                                     *
  * Contributors:                                                              *
  *                                                                            *
@@ -37,7 +37,13 @@
 using namespace LMP_Lepton;
 using namespace std;
 
-double Operation::Erf::evaluate(double* args, const map<string, double>& ) const {
+static bool isZero(const ExpressionTreeNode& node) {
+    if (node.getOperation().getId() != Operation::CONSTANT)
+        return false;
+    return dynamic_cast<const Operation::Constant&>(node.getOperation()).getValue() == 0.0;
+}
+
+double Operation::Erf::evaluate(double* args, const map<string, double>&) const {
     return erf(args[0]);
 }
 
@@ -58,35 +64,71 @@ ExpressionTreeNode Operation::Variable::differentiate(const std::vector<Expressi
 ExpressionTreeNode Operation::Custom::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     if (function->getNumArguments() == 0)
         return ExpressionTreeNode(new Operation::Constant(0.0));
-    ExpressionTreeNode result = ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::Custom(*this, 0), children), childDerivs[0]);
-    for (int i = 1; i < getNumArguments(); i++) {
-        result = ExpressionTreeNode(new Operation::Add(),
-                                    result,
-                                    ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::Custom(*this, i), children), childDerivs[i]));
+    ExpressionTreeNode result;
+    bool foundTerm = false;
+    for (int i = 0; i < getNumArguments(); i++) {
+        if (!isZero(childDerivs[i])) {
+            if (foundTerm)
+                result = ExpressionTreeNode(new Operation::Add(),
+                                            result,
+                                            ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::Custom(*this, i), children), childDerivs[i]));
+            else {
+                result = ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::Custom(*this, i), children), childDerivs[i]);
+                foundTerm = true;
+            }
+        }
     }
-    return result;
+    if (foundTerm)
+        return result;
+    return ExpressionTreeNode(new Operation::Constant(0.0));
 }
 
 ExpressionTreeNode Operation::Add::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return childDerivs[1];
+    if (isZero(childDerivs[1]))
+        return childDerivs[0];
     return ExpressionTreeNode(new Operation::Add(), childDerivs[0], childDerivs[1]);
 }
 
 ExpressionTreeNode Operation::Subtract::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0])) {
+        if (isZero(childDerivs[1]))
+            return ExpressionTreeNode(new Operation::Constant(0.0));
+        return ExpressionTreeNode(new Operation::Negate(), childDerivs[1]);
+    }
+    if (isZero(childDerivs[1]))
+        return childDerivs[0];
     return ExpressionTreeNode(new Operation::Subtract(), childDerivs[0], childDerivs[1]);
 }
 
 ExpressionTreeNode Operation::Multiply::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0])) {
+        if (isZero(childDerivs[1]))
+            return ExpressionTreeNode(new Operation::Constant(0.0));
+        return ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1]);
+    }
+    if (isZero(childDerivs[1]))
+        return ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]);
     return ExpressionTreeNode(new Operation::Add(),
                               ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1]),
                               ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]));
 }
 
 ExpressionTreeNode Operation::Divide::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
-    return ExpressionTreeNode(new Operation::Divide(),
-                              ExpressionTreeNode(new Operation::Subtract(),
+    ExpressionTreeNode subexp;
+    if (isZero(childDerivs[0])) {
+        if (isZero(childDerivs[1]))
+            return ExpressionTreeNode(new Operation::Constant(0.0));
+        subexp = ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1]));
+    }
+    else if (isZero(childDerivs[1]))
+        subexp = ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]);
+    else
+        subexp = ExpressionTreeNode(new Operation::Subtract(),
                                                  ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]),
-                                                 ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1])),
-                              ExpressionTreeNode(new Operation::Square(), children[1]));
+                                                 ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1]));
+    return ExpressionTreeNode(new Operation::Divide(), subexp, ExpressionTreeNode(new Operation::Square(), children[1]));
 }
 
 ExpressionTreeNode Operation::Power::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
@@ -105,10 +147,14 @@ ExpressionTreeNode Operation::Power::differentiate(const std::vector<ExpressionT
 }
 
 ExpressionTreeNode Operation::Negate::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Negate(), childDerivs[0]);
 }
 
 ExpressionTreeNode Operation::Sqrt::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::MultiplyConstant(0.5),
                                                  ExpressionTreeNode(new Operation::Reciprocal(),
@@ -117,24 +163,32 @@ ExpressionTreeNode Operation::Sqrt::differentiate(const std::vector<ExpressionTr
 }
 
 ExpressionTreeNode Operation::Exp::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Exp(), children[0]),
                               childDerivs[0]);
 }
 
 ExpressionTreeNode Operation::Log::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Reciprocal(), children[0]),
                               childDerivs[0]);
 }
 
 ExpressionTreeNode Operation::Sin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Cos(), children[0]),
                               childDerivs[0]);
 }
 
 ExpressionTreeNode Operation::Cos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Negate(),
                                                  ExpressionTreeNode(new Operation::Sin(), children[0])),
@@ -142,6 +196,8 @@ ExpressionTreeNode Operation::Cos::differentiate(const std::vector<ExpressionTre
 }
 
 ExpressionTreeNode Operation::Sec::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Multiply(),
                                                  ExpressionTreeNode(new Operation::Sec(), children[0]),
@@ -150,6 +206,8 @@ ExpressionTreeNode Operation::Sec::differentiate(const std::vector<ExpressionTre
 }
 
 ExpressionTreeNode Operation::Csc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Negate(),
                                                  ExpressionTreeNode(new Operation::Multiply(),
@@ -159,6 +217,8 @@ ExpressionTreeNode Operation::Csc::differentiate(const std::vector<ExpressionTre
 }
 
 ExpressionTreeNode Operation::Tan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Square(),
                                                  ExpressionTreeNode(new Operation::Sec(), children[0])),
@@ -166,6 +226,8 @@ ExpressionTreeNode Operation::Tan::differentiate(const std::vector<ExpressionTre
 }
 
 ExpressionTreeNode Operation::Cot::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Negate(),
                                                  ExpressionTreeNode(new Operation::Square(),
@@ -174,6 +236,8 @@ ExpressionTreeNode Operation::Cot::differentiate(const std::vector<ExpressionTre
 }
 
 ExpressionTreeNode Operation::Asin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Reciprocal(),
                                                  ExpressionTreeNode(new Operation::Sqrt(),
@@ -184,6 +248,8 @@ ExpressionTreeNode Operation::Asin::differentiate(const std::vector<ExpressionTr
 }
 
 ExpressionTreeNode Operation::Acos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Negate(),
                                                  ExpressionTreeNode(new Operation::Reciprocal(),
@@ -195,6 +261,8 @@ ExpressionTreeNode Operation::Acos::differentiate(const std::vector<ExpressionTr
 }
 
 ExpressionTreeNode Operation::Atan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Reciprocal(),
                                                  ExpressionTreeNode(new Operation::AddConstant(1.0),
@@ -213,6 +281,8 @@ ExpressionTreeNode Operation::Atan2::differentiate(const std::vector<ExpressionT
 }
 
 ExpressionTreeNode Operation::Sinh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Cosh(),
                                                  children[0]),
@@ -220,6 +290,8 @@ ExpressionTreeNode Operation::Sinh::differentiate(const std::vector<ExpressionTr
 }
 
 ExpressionTreeNode Operation::Cosh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Sinh(),
                                                  children[0]),
@@ -227,6 +299,8 @@ ExpressionTreeNode Operation::Cosh::differentiate(const std::vector<ExpressionTr
 }
 
 ExpressionTreeNode Operation::Tanh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Subtract(),
                                                  ExpressionTreeNode(new Operation::Constant(1.0)),
@@ -236,6 +310,8 @@ ExpressionTreeNode Operation::Tanh::differentiate(const std::vector<ExpressionTr
 }
 
 ExpressionTreeNode Operation::Erf::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Multiply(),
                                                  ExpressionTreeNode(new Operation::Constant(2.0/sqrt(M_PI))),
@@ -246,6 +322,8 @@ ExpressionTreeNode Operation::Erf::differentiate(const std::vector<ExpressionTre
 }
 
 ExpressionTreeNode Operation::Erfc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Multiply(),
                                                  ExpressionTreeNode(new Operation::Constant(-2.0/sqrt(M_PI))),
@@ -264,6 +342,8 @@ ExpressionTreeNode Operation::Delta::differentiate(const std::vector<ExpressionT
 }
 
 ExpressionTreeNode Operation::Square::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::MultiplyConstant(2.0),
                                                  children[0]),
@@ -271,6 +351,8 @@ ExpressionTreeNode Operation::Square::differentiate(const std::vector<Expression
 }
 
 ExpressionTreeNode Operation::Cube::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::MultiplyConstant(3.0),
                                                  ExpressionTreeNode(new Operation::Square(), children[0])),
@@ -278,6 +360,8 @@ ExpressionTreeNode Operation::Cube::differentiate(const std::vector<ExpressionTr
 }
 
 ExpressionTreeNode Operation::Reciprocal::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::Negate(),
                                                  ExpressionTreeNode(new Operation::Reciprocal(),
@@ -290,11 +374,15 @@ ExpressionTreeNode Operation::AddConstant::differentiate(const std::vector<Expre
 }
 
 ExpressionTreeNode Operation::MultiplyConstant::differentiate(const std::vector<ExpressionTreeNode>& , const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::MultiplyConstant(value),
                               childDerivs[0]);
 }
 
 ExpressionTreeNode Operation::PowerConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     return ExpressionTreeNode(new Operation::Multiply(),
                               ExpressionTreeNode(new Operation::MultiplyConstant(value),
                                                  ExpressionTreeNode(new Operation::PowerConstant(value-1),
@@ -305,22 +393,18 @@ ExpressionTreeNode Operation::PowerConstant::differentiate(const std::vector<Exp
 ExpressionTreeNode Operation::Min::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     ExpressionTreeNode step(new Operation::Step(),
                             ExpressionTreeNode(new Operation::Subtract(), children[0], children[1]));
-    return ExpressionTreeNode(new Operation::Subtract(),
-                              ExpressionTreeNode(new Operation::Multiply(), childDerivs[1], step),
-                              ExpressionTreeNode(new Operation::Multiply(), childDerivs[0],
-                                                 ExpressionTreeNode(new Operation::AddConstant(-1), step)));
+    return ExpressionTreeNode(new Operation::Select(), {step, childDerivs[1], childDerivs[0]});
 }
 
 ExpressionTreeNode Operation::Max::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
     ExpressionTreeNode step(new Operation::Step(),
                             ExpressionTreeNode(new Operation::Subtract(), children[0], children[1]));
-    return ExpressionTreeNode(new Operation::Subtract(),
-                              ExpressionTreeNode(new Operation::Multiply(), childDerivs[0], step),
-                              ExpressionTreeNode(new Operation::Multiply(), childDerivs[1],
-                                                 ExpressionTreeNode(new Operation::AddConstant(-1), step)));
+    return ExpressionTreeNode(new Operation::Select(), {step, childDerivs[0], childDerivs[1]});
 }
 
 ExpressionTreeNode Operation::Abs::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
+    if (isZero(childDerivs[0]))
+        return ExpressionTreeNode(new Operation::Constant(0.0));
     ExpressionTreeNode step(new Operation::Step(), children[0]);
     return ExpressionTreeNode(new Operation::Multiply(),
                               childDerivs[0],
@@ -337,9 +421,5 @@ ExpressionTreeNode Operation::Ceil::differentiate(const std::vector<ExpressionTr
 }
 
 ExpressionTreeNode Operation::Select::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& ) const {
-    vector<ExpressionTreeNode> derivChildren;
-    derivChildren.push_back(children[0]);
-    derivChildren.push_back(childDerivs[1]);
-    derivChildren.push_back(childDerivs[2]);
-    return ExpressionTreeNode(new Operation::Select(), derivChildren);
+    return ExpressionTreeNode(new Operation::Select(), {children[0], childDerivs[1], childDerivs[2]});
 }
diff --git a/lib/lepton/src/ParsedExpression.cpp b/lib/lepton/src/ParsedExpression.cpp
index 1417551011..a6f41ae354 100644
--- a/lib/lepton/src/ParsedExpression.cpp
+++ b/lib/lepton/src/ParsedExpression.cpp
@@ -6,7 +6,7 @@
  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
  * Medical Research, grant U54 GM072970. See https://simtk.org.               *
  *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Portions copyright (c) 2009-2022 Stanford University and the Authors.      *
  * Authors: Peter Eastman                                                     *
  * Contributors:                                                              *
  *                                                                            *
@@ -31,6 +31,7 @@
 
 #include "lepton/ParsedExpression.h"
 #include "lepton/CompiledExpression.h"
+#include "lepton/CompiledVectorExpression.h"
 #include "lepton/ExpressionProgram.h"
 #include "lepton/Operation.h"
 #include <limits>
@@ -68,9 +69,16 @@ double ParsedExpression::evaluate(const ExpressionTreeNode& node, const map<stri
 }
 
 ParsedExpression ParsedExpression::optimize() const {
-    ExpressionTreeNode result = precalculateConstantSubexpressions(getRootNode());
+    ExpressionTreeNode result = getRootNode();
+    vector<const ExpressionTreeNode*> examples;
+    result.assignTags(examples);
+    map<int, ExpressionTreeNode> nodeCache;
+    result = precalculateConstantSubexpressions(result, nodeCache);
     while (true) {
-        ExpressionTreeNode simplified = substituteSimplerExpression(result);
+        examples.clear();
+        result.assignTags(examples);
+        nodeCache.clear();
+        ExpressionTreeNode simplified = substituteSimplerExpression(result, nodeCache);
         if (simplified == result)
             break;
         result = simplified;
@@ -80,9 +88,15 @@ ParsedExpression ParsedExpression::optimize() const {
 
 ParsedExpression ParsedExpression::optimize(const map<string, double>& variables) const {
     ExpressionTreeNode result = preevaluateVariables(getRootNode(), variables);
-    result = precalculateConstantSubexpressions(result);
+    vector<const ExpressionTreeNode*> examples;
+    result.assignTags(examples);
+    map<int, ExpressionTreeNode> nodeCache;
+    result = precalculateConstantSubexpressions(result, nodeCache);
     while (true) {
-        ExpressionTreeNode simplified = substituteSimplerExpression(result);
+        examples.clear();
+        result.assignTags(examples);
+        nodeCache.clear();
+        ExpressionTreeNode simplified = substituteSimplerExpression(result, nodeCache);
         if (simplified == result)
             break;
         result = simplified;
@@ -104,27 +118,44 @@ ExpressionTreeNode ParsedExpression::preevaluateVariables(const ExpressionTreeNo
     return ExpressionTreeNode(node.getOperation().clone(), children);
 }
 
-ExpressionTreeNode ParsedExpression::precalculateConstantSubexpressions(const ExpressionTreeNode& node) {
+ExpressionTreeNode ParsedExpression::precalculateConstantSubexpressions(const ExpressionTreeNode& node, map<int, ExpressionTreeNode>& nodeCache) {
+    auto cached = nodeCache.find(node.tag);
+    if (cached != nodeCache.end())
+        return cached->second;
     vector<ExpressionTreeNode> children(node.getChildren().size());
     for (int i = 0; i < (int) children.size(); i++)
-        children[i] = precalculateConstantSubexpressions(node.getChildren()[i]);
+        children[i] = precalculateConstantSubexpressions(node.getChildren()[i], nodeCache);
     ExpressionTreeNode result = ExpressionTreeNode(node.getOperation().clone(), children);
-    if (node.getOperation().getId() == Operation::VARIABLE || node.getOperation().getId() == Operation::CUSTOM)
+    if (node.getOperation().getId() == Operation::VARIABLE || node.getOperation().getId() == Operation::CUSTOM) {
+        nodeCache[node.tag] = result;
         return result;
+    }
     for (int i = 0; i < (int) children.size(); i++)
-        if (children[i].getOperation().getId() != Operation::CONSTANT)
+        if (children[i].getOperation().getId() != Operation::CONSTANT) {
+            nodeCache[node.tag] = result;
             return result;
-    return ExpressionTreeNode(new Operation::Constant(evaluate(result, map<string, double>())));
+        }
+    result = ExpressionTreeNode(new Operation::Constant(evaluate(result, map<string, double>())));
+    nodeCache[node.tag] = result;
+    return result;
 }
 
-ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const ExpressionTreeNode& node) {
+ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const ExpressionTreeNode& node, map<int, ExpressionTreeNode>& nodeCache) {
     vector<ExpressionTreeNode> children(node.getChildren().size());
-    for (int i = 0; i < (int) children.size(); i++)
-        children[i] = substituteSimplerExpression(node.getChildren()[i]);
+    for (int i = 0; i < (int) children.size(); i++) {
+        const ExpressionTreeNode& child = node.getChildren()[i];
+        auto cached = nodeCache.find(child.tag);
+        if (cached == nodeCache.end()) {
+            children[i] = substituteSimplerExpression(child, nodeCache);
+            nodeCache[child.tag] = children[i];
+        }
+        else
+            children[i] = cached->second;
+    }
 
     // Collect some info on constant expressions in children
     bool first_const = children.size() > 0 && isConstant(children[0]); // is first child constant?
-    bool second_const = children.size() > 1 && isConstant(children[1]); ; // is second child constant?
+    bool second_const = children.size() > 1 && isConstant(children[1]); // is second child constant?
     double first, second; // if yes, value of first and second child
     if (first_const)
         first = getConstantValue(children[0]);
@@ -296,6 +327,12 @@ ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const Expressio
                 return children[0].getChildren()[0];
             break;
         }
+        case Operation::SELECT:
+        {
+            if (children[1] == children[2]) // Select between two identical values
+                return children[1];
+            break;
+        }
         default:
         {
             // If operation ID is not one of the above,
@@ -308,14 +345,22 @@ ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const Expressio
 }
 
 ParsedExpression ParsedExpression::differentiate(const string& variable) const {
-    return differentiate(getRootNode(), variable);
+    vector<const ExpressionTreeNode*> examples;
+    getRootNode().assignTags(examples);
+    map<int, ExpressionTreeNode> nodeCache;
+    return differentiate(getRootNode(), variable, nodeCache);
 }
 
-ExpressionTreeNode ParsedExpression::differentiate(const ExpressionTreeNode& node, const string& variable) {
+ExpressionTreeNode ParsedExpression::differentiate(const ExpressionTreeNode& node, const string& variable, map<int, ExpressionTreeNode>& nodeCache) {
+    auto cached = nodeCache.find(node.tag);
+    if (cached != nodeCache.end())
+        return cached->second;
     vector<ExpressionTreeNode> childDerivs(node.getChildren().size());
     for (int i = 0; i < (int) childDerivs.size(); i++)
-        childDerivs[i] = differentiate(node.getChildren()[i], variable);
-    return node.getOperation().differentiate(node.getChildren(),childDerivs, variable);
+        childDerivs[i] = differentiate(node.getChildren()[i], variable, nodeCache);
+    ExpressionTreeNode result = node.getOperation().differentiate(node.getChildren(), childDerivs, variable);
+    nodeCache[node.tag] = result;
+    return result;
 }
 
 bool ParsedExpression::isConstant(const ExpressionTreeNode& node) {
@@ -337,6 +382,10 @@ CompiledExpression ParsedExpression::createCompiledExpression() const {
     return CompiledExpression(*this);
 }
 
+CompiledVectorExpression ParsedExpression::createCompiledVectorExpression(int width) const {
+    return CompiledVectorExpression(*this, width);
+}
+
 ParsedExpression ParsedExpression::renameVariables(const map<string, string>& replacements) const {
     return ParsedExpression(renameNodeVariables(getRootNode(), replacements));
 }

From e59f99b44079e5bf60f458905b43ad7ec756fbf8 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 22:50:01 -0500
Subject: [PATCH 24/79] add support for JIT compilation

---
 cmake/Modules/Packages/LEPTON.cmake        |   23 +-
 lib/lepton/asmjit/LICENSE.md               |   17 +
 lib/lepton/asmjit/a64.h                    |   62 +
 lib/lepton/asmjit/arm.h                    |   62 +
 lib/lepton/asmjit/arm/a64archtraits_p.h    |   81 +
 lib/lepton/asmjit/arm/a64assembler.cpp     | 5115 ++++++++++++++++++++
 lib/lepton/asmjit/arm/a64assembler.h       |   72 +
 lib/lepton/asmjit/arm/a64builder.cpp       |   51 +
 lib/lepton/asmjit/arm/a64builder.h         |   57 +
 lib/lepton/asmjit/arm/a64compiler.cpp      |   60 +
 lib/lepton/asmjit/arm/a64compiler.h        |  247 +
 lib/lepton/asmjit/arm/a64emithelper.cpp    |  464 ++
 lib/lepton/asmjit/arm/a64emithelper_p.h    |   50 +
 lib/lepton/asmjit/arm/a64emitter.h         | 1228 +++++
 lib/lepton/asmjit/arm/a64formatter.cpp     |  298 ++
 lib/lepton/asmjit/arm/a64formatter_p.h     |   59 +
 lib/lepton/asmjit/arm/a64func.cpp          |  189 +
 lib/lepton/asmjit/arm/a64func_p.h          |   33 +
 lib/lepton/asmjit/arm/a64globals.h         | 1894 ++++++++
 lib/lepton/asmjit/arm/a64instapi.cpp       |  278 ++
 lib/lepton/asmjit/arm/a64instapi_p.h       |   41 +
 lib/lepton/asmjit/arm/a64instdb.cpp        | 1957 ++++++++
 lib/lepton/asmjit/arm/a64instdb.h          |   74 +
 lib/lepton/asmjit/arm/a64instdb_p.h        |  876 ++++
 lib/lepton/asmjit/arm/a64operand.cpp       |   85 +
 lib/lepton/asmjit/arm/a64operand.h         |  312 ++
 lib/lepton/asmjit/arm/a64rapass.cpp        |  852 ++++
 lib/lepton/asmjit/arm/a64rapass_p.h        |  105 +
 lib/lepton/asmjit/arm/a64utils.h           |  179 +
 lib/lepton/asmjit/arm/armformatter.cpp     |  143 +
 lib/lepton/asmjit/arm/armformatter_p.h     |   44 +
 lib/lepton/asmjit/arm/armglobals.h         |   21 +
 lib/lepton/asmjit/arm/armoperand.h         |  621 +++
 lib/lepton/asmjit/asmjit-scope-begin.h     |   17 +
 lib/lepton/asmjit/asmjit-scope-end.h       |    9 +
 lib/lepton/asmjit/asmjit.h                 |   33 +
 lib/lepton/asmjit/core.h                   | 1861 +++++++
 lib/lepton/asmjit/core/api-build_p.h       |   55 +
 lib/lepton/asmjit/core/api-config.h        |  613 +++
 lib/lepton/asmjit/core/archcommons.h       |  229 +
 lib/lepton/asmjit/core/archtraits.cpp      |  160 +
 lib/lepton/asmjit/core/archtraits.h        |  290 ++
 lib/lepton/asmjit/core/assembler.cpp       |  406 ++
 lib/lepton/asmjit/core/assembler.h         |  129 +
 lib/lepton/asmjit/core/builder.cpp         |  889 ++++
 lib/lepton/asmjit/core/builder.h           | 1391 ++++++
 lib/lepton/asmjit/core/codebuffer.h        |  113 +
 lib/lepton/asmjit/core/codeholder.cpp      | 1149 +++++
 lib/lepton/asmjit/core/codeholder.h        | 1035 ++++
 lib/lepton/asmjit/core/codewriter.cpp      |  175 +
 lib/lepton/asmjit/core/codewriter_p.h      |  179 +
 lib/lepton/asmjit/core/compiler.cpp        |  582 +++
 lib/lepton/asmjit/core/compiler.h          |  737 +++
 lib/lepton/asmjit/core/compilerdefs.h      |  173 +
 lib/lepton/asmjit/core/constpool.cpp       |  363 ++
 lib/lepton/asmjit/core/constpool.h         |  250 +
 lib/lepton/asmjit/core/cpuinfo.cpp         | 1162 +++++
 lib/lepton/asmjit/core/cpuinfo.h           |  813 ++++
 lib/lepton/asmjit/core/emithelper.cpp      |  323 ++
 lib/lepton/asmjit/core/emithelper_p.h      |   58 +
 lib/lepton/asmjit/core/emitter.cpp         |  333 ++
 lib/lepton/asmjit/core/emitter.h           |  741 +++
 lib/lepton/asmjit/core/emitterutils.cpp    |  129 +
 lib/lepton/asmjit/core/emitterutils_p.h    |   89 +
 lib/lepton/asmjit/core/environment.cpp     |   46 +
 lib/lepton/asmjit/core/environment.h       |  508 ++
 lib/lepton/asmjit/core/errorhandler.cpp    |   14 +
 lib/lepton/asmjit/core/errorhandler.h      |  228 +
 lib/lepton/asmjit/core/formatter.cpp       |  584 +++
 lib/lepton/asmjit/core/formatter.h         |  247 +
 lib/lepton/asmjit/core/formatter_p.h       |   34 +
 lib/lepton/asmjit/core/func.cpp            |  286 ++
 lib/lepton/asmjit/core/func.h              | 1445 ++++++
 lib/lepton/asmjit/core/funcargscontext.cpp |  293 ++
 lib/lepton/asmjit/core/funcargscontext_p.h |  199 +
 lib/lepton/asmjit/core/globals.cpp         |  133 +
 lib/lepton/asmjit/core/globals.h           |  393 ++
 lib/lepton/asmjit/core/inst.cpp            |  113 +
 lib/lepton/asmjit/core/inst.h              |  772 +++
 lib/lepton/asmjit/core/jitallocator.cpp    | 1242 +++++
 lib/lepton/asmjit/core/jitallocator.h      |  261 +
 lib/lepton/asmjit/core/jitruntime.cpp      |   80 +
 lib/lepton/asmjit/core/jitruntime.h        |   89 +
 lib/lepton/asmjit/core/logger.cpp          |   69 +
 lib/lepton/asmjit/core/logger.h            |  198 +
 lib/lepton/asmjit/core/misc_p.h            |   33 +
 lib/lepton/asmjit/core/operand.cpp         |  132 +
 lib/lepton/asmjit/core/operand.h           | 1611 ++++++
 lib/lepton/asmjit/core/osutils.cpp         |   84 +
 lib/lepton/asmjit/core/osutils.h           |   61 +
 lib/lepton/asmjit/core/osutils_p.h         |   68 +
 lib/lepton/asmjit/core/raassignment_p.h    |  418 ++
 lib/lepton/asmjit/core/rabuilders_p.h      |  612 +++
 lib/lepton/asmjit/core/radefs_p.h          | 1204 +++++
 lib/lepton/asmjit/core/ralocal.cpp         | 1166 +++++
 lib/lepton/asmjit/core/ralocal_p.h         |  254 +
 lib/lepton/asmjit/core/rapass.cpp          | 1969 ++++++++
 lib/lepton/asmjit/core/rapass_p.h          | 1183 +++++
 lib/lepton/asmjit/core/rastack.cpp         |  184 +
 lib/lepton/asmjit/core/rastack_p.h         |  171 +
 lib/lepton/asmjit/core/string.cpp          |  559 +++
 lib/lepton/asmjit/core/string.h            |  372 ++
 lib/lepton/asmjit/core/support.cpp         |  494 ++
 lib/lepton/asmjit/core/support.h           | 1773 +++++++
 lib/lepton/asmjit/core/target.cpp          |   14 +
 lib/lepton/asmjit/core/target.h            |   53 +
 lib/lepton/asmjit/core/type.cpp            |   74 +
 lib/lepton/asmjit/core/type.h              |  419 ++
 lib/lepton/asmjit/core/virtmem.cpp         |  722 +++
 lib/lepton/asmjit/core/virtmem.h           |  242 +
 lib/lepton/asmjit/core/zone.cpp            |  353 ++
 lib/lepton/asmjit/core/zone.h              |  615 +++
 lib/lepton/asmjit/core/zonehash.cpp        |  309 ++
 lib/lepton/asmjit/core/zonehash.h          |  186 +
 lib/lepton/asmjit/core/zonelist.cpp        |  163 +
 lib/lepton/asmjit/core/zonelist.h          |  209 +
 lib/lepton/asmjit/core/zonestack.cpp       |  176 +
 lib/lepton/asmjit/core/zonestack.h         |  239 +
 lib/lepton/asmjit/core/zonestring.h        |  120 +
 lib/lepton/asmjit/core/zonetree.cpp        |   99 +
 lib/lepton/asmjit/core/zonetree.h          |  380 ++
 lib/lepton/asmjit/core/zonevector.cpp      |  356 ++
 lib/lepton/asmjit/core/zonevector.h        |  690 +++
 lib/lepton/asmjit/x86.h                    |   93 +
 lib/lepton/asmjit/x86/x86archtraits_p.h    |  148 +
 lib/lepton/asmjit/x86/x86assembler.cpp     | 5110 +++++++++++++++++++
 lib/lepton/asmjit/x86/x86assembler.h       |  685 +++
 lib/lepton/asmjit/x86/x86builder.cpp       |   52 +
 lib/lepton/asmjit/x86/x86builder.h         |  351 ++
 lib/lepton/asmjit/x86/x86compiler.cpp      |   61 +
 lib/lepton/asmjit/x86/x86compiler.h        |  721 +++
 lib/lepton/asmjit/x86/x86emithelper.cpp    |  619 +++
 lib/lepton/asmjit/x86/x86emithelper_p.h    |   60 +
 lib/lepton/asmjit/x86/x86emitter.h         | 4315 +++++++++++++++++
 lib/lepton/asmjit/x86/x86formatter.cpp     |  944 ++++
 lib/lepton/asmjit/x86/x86formatter_p.h     |   58 +
 lib/lepton/asmjit/x86/x86func.cpp          |  503 ++
 lib/lepton/asmjit/x86/x86func_p.h          |   33 +
 lib/lepton/asmjit/x86/x86globals.h         | 2169 +++++++++
 lib/lepton/asmjit/x86/x86instapi.cpp       | 1732 +++++++
 lib/lepton/asmjit/x86/x86instapi_p.h       |   41 +
 lib/lepton/asmjit/x86/x86instdb.cpp        | 4427 +++++++++++++++++
 lib/lepton/asmjit/x86/x86instdb.h          |  563 +++
 lib/lepton/asmjit/x86/x86instdb_p.h        |  311 ++
 lib/lepton/asmjit/x86/x86opcode_p.h        |  436 ++
 lib/lepton/asmjit/x86/x86operand.cpp       |  231 +
 lib/lepton/asmjit/x86/x86operand.h         | 1085 +++++
 lib/lepton/asmjit/x86/x86rapass.cpp        | 1509 ++++++
 lib/lepton/asmjit/x86/x86rapass_p.h        |   94 +
 149 files changed, 81486 insertions(+), 2 deletions(-)
 create mode 100644 lib/lepton/asmjit/LICENSE.md
 create mode 100644 lib/lepton/asmjit/a64.h
 create mode 100644 lib/lepton/asmjit/arm.h
 create mode 100644 lib/lepton/asmjit/arm/a64archtraits_p.h
 create mode 100644 lib/lepton/asmjit/arm/a64assembler.cpp
 create mode 100644 lib/lepton/asmjit/arm/a64assembler.h
 create mode 100644 lib/lepton/asmjit/arm/a64builder.cpp
 create mode 100644 lib/lepton/asmjit/arm/a64builder.h
 create mode 100644 lib/lepton/asmjit/arm/a64compiler.cpp
 create mode 100644 lib/lepton/asmjit/arm/a64compiler.h
 create mode 100644 lib/lepton/asmjit/arm/a64emithelper.cpp
 create mode 100644 lib/lepton/asmjit/arm/a64emithelper_p.h
 create mode 100644 lib/lepton/asmjit/arm/a64emitter.h
 create mode 100644 lib/lepton/asmjit/arm/a64formatter.cpp
 create mode 100644 lib/lepton/asmjit/arm/a64formatter_p.h
 create mode 100644 lib/lepton/asmjit/arm/a64func.cpp
 create mode 100644 lib/lepton/asmjit/arm/a64func_p.h
 create mode 100644 lib/lepton/asmjit/arm/a64globals.h
 create mode 100644 lib/lepton/asmjit/arm/a64instapi.cpp
 create mode 100644 lib/lepton/asmjit/arm/a64instapi_p.h
 create mode 100644 lib/lepton/asmjit/arm/a64instdb.cpp
 create mode 100644 lib/lepton/asmjit/arm/a64instdb.h
 create mode 100644 lib/lepton/asmjit/arm/a64instdb_p.h
 create mode 100644 lib/lepton/asmjit/arm/a64operand.cpp
 create mode 100644 lib/lepton/asmjit/arm/a64operand.h
 create mode 100644 lib/lepton/asmjit/arm/a64rapass.cpp
 create mode 100644 lib/lepton/asmjit/arm/a64rapass_p.h
 create mode 100644 lib/lepton/asmjit/arm/a64utils.h
 create mode 100644 lib/lepton/asmjit/arm/armformatter.cpp
 create mode 100644 lib/lepton/asmjit/arm/armformatter_p.h
 create mode 100644 lib/lepton/asmjit/arm/armglobals.h
 create mode 100644 lib/lepton/asmjit/arm/armoperand.h
 create mode 100644 lib/lepton/asmjit/asmjit-scope-begin.h
 create mode 100644 lib/lepton/asmjit/asmjit-scope-end.h
 create mode 100644 lib/lepton/asmjit/asmjit.h
 create mode 100644 lib/lepton/asmjit/core.h
 create mode 100644 lib/lepton/asmjit/core/api-build_p.h
 create mode 100644 lib/lepton/asmjit/core/api-config.h
 create mode 100644 lib/lepton/asmjit/core/archcommons.h
 create mode 100644 lib/lepton/asmjit/core/archtraits.cpp
 create mode 100644 lib/lepton/asmjit/core/archtraits.h
 create mode 100644 lib/lepton/asmjit/core/assembler.cpp
 create mode 100644 lib/lepton/asmjit/core/assembler.h
 create mode 100644 lib/lepton/asmjit/core/builder.cpp
 create mode 100644 lib/lepton/asmjit/core/builder.h
 create mode 100644 lib/lepton/asmjit/core/codebuffer.h
 create mode 100644 lib/lepton/asmjit/core/codeholder.cpp
 create mode 100644 lib/lepton/asmjit/core/codeholder.h
 create mode 100644 lib/lepton/asmjit/core/codewriter.cpp
 create mode 100644 lib/lepton/asmjit/core/codewriter_p.h
 create mode 100644 lib/lepton/asmjit/core/compiler.cpp
 create mode 100644 lib/lepton/asmjit/core/compiler.h
 create mode 100644 lib/lepton/asmjit/core/compilerdefs.h
 create mode 100644 lib/lepton/asmjit/core/constpool.cpp
 create mode 100644 lib/lepton/asmjit/core/constpool.h
 create mode 100644 lib/lepton/asmjit/core/cpuinfo.cpp
 create mode 100644 lib/lepton/asmjit/core/cpuinfo.h
 create mode 100644 lib/lepton/asmjit/core/emithelper.cpp
 create mode 100644 lib/lepton/asmjit/core/emithelper_p.h
 create mode 100644 lib/lepton/asmjit/core/emitter.cpp
 create mode 100644 lib/lepton/asmjit/core/emitter.h
 create mode 100644 lib/lepton/asmjit/core/emitterutils.cpp
 create mode 100644 lib/lepton/asmjit/core/emitterutils_p.h
 create mode 100644 lib/lepton/asmjit/core/environment.cpp
 create mode 100644 lib/lepton/asmjit/core/environment.h
 create mode 100644 lib/lepton/asmjit/core/errorhandler.cpp
 create mode 100644 lib/lepton/asmjit/core/errorhandler.h
 create mode 100644 lib/lepton/asmjit/core/formatter.cpp
 create mode 100644 lib/lepton/asmjit/core/formatter.h
 create mode 100644 lib/lepton/asmjit/core/formatter_p.h
 create mode 100644 lib/lepton/asmjit/core/func.cpp
 create mode 100644 lib/lepton/asmjit/core/func.h
 create mode 100644 lib/lepton/asmjit/core/funcargscontext.cpp
 create mode 100644 lib/lepton/asmjit/core/funcargscontext_p.h
 create mode 100644 lib/lepton/asmjit/core/globals.cpp
 create mode 100644 lib/lepton/asmjit/core/globals.h
 create mode 100644 lib/lepton/asmjit/core/inst.cpp
 create mode 100644 lib/lepton/asmjit/core/inst.h
 create mode 100644 lib/lepton/asmjit/core/jitallocator.cpp
 create mode 100644 lib/lepton/asmjit/core/jitallocator.h
 create mode 100644 lib/lepton/asmjit/core/jitruntime.cpp
 create mode 100644 lib/lepton/asmjit/core/jitruntime.h
 create mode 100644 lib/lepton/asmjit/core/logger.cpp
 create mode 100644 lib/lepton/asmjit/core/logger.h
 create mode 100644 lib/lepton/asmjit/core/misc_p.h
 create mode 100644 lib/lepton/asmjit/core/operand.cpp
 create mode 100644 lib/lepton/asmjit/core/operand.h
 create mode 100644 lib/lepton/asmjit/core/osutils.cpp
 create mode 100644 lib/lepton/asmjit/core/osutils.h
 create mode 100644 lib/lepton/asmjit/core/osutils_p.h
 create mode 100644 lib/lepton/asmjit/core/raassignment_p.h
 create mode 100644 lib/lepton/asmjit/core/rabuilders_p.h
 create mode 100644 lib/lepton/asmjit/core/radefs_p.h
 create mode 100644 lib/lepton/asmjit/core/ralocal.cpp
 create mode 100644 lib/lepton/asmjit/core/ralocal_p.h
 create mode 100644 lib/lepton/asmjit/core/rapass.cpp
 create mode 100644 lib/lepton/asmjit/core/rapass_p.h
 create mode 100644 lib/lepton/asmjit/core/rastack.cpp
 create mode 100644 lib/lepton/asmjit/core/rastack_p.h
 create mode 100644 lib/lepton/asmjit/core/string.cpp
 create mode 100644 lib/lepton/asmjit/core/string.h
 create mode 100644 lib/lepton/asmjit/core/support.cpp
 create mode 100644 lib/lepton/asmjit/core/support.h
 create mode 100644 lib/lepton/asmjit/core/target.cpp
 create mode 100644 lib/lepton/asmjit/core/target.h
 create mode 100644 lib/lepton/asmjit/core/type.cpp
 create mode 100644 lib/lepton/asmjit/core/type.h
 create mode 100644 lib/lepton/asmjit/core/virtmem.cpp
 create mode 100644 lib/lepton/asmjit/core/virtmem.h
 create mode 100644 lib/lepton/asmjit/core/zone.cpp
 create mode 100644 lib/lepton/asmjit/core/zone.h
 create mode 100644 lib/lepton/asmjit/core/zonehash.cpp
 create mode 100644 lib/lepton/asmjit/core/zonehash.h
 create mode 100644 lib/lepton/asmjit/core/zonelist.cpp
 create mode 100644 lib/lepton/asmjit/core/zonelist.h
 create mode 100644 lib/lepton/asmjit/core/zonestack.cpp
 create mode 100644 lib/lepton/asmjit/core/zonestack.h
 create mode 100644 lib/lepton/asmjit/core/zonestring.h
 create mode 100644 lib/lepton/asmjit/core/zonetree.cpp
 create mode 100644 lib/lepton/asmjit/core/zonetree.h
 create mode 100644 lib/lepton/asmjit/core/zonevector.cpp
 create mode 100644 lib/lepton/asmjit/core/zonevector.h
 create mode 100644 lib/lepton/asmjit/x86.h
 create mode 100644 lib/lepton/asmjit/x86/x86archtraits_p.h
 create mode 100644 lib/lepton/asmjit/x86/x86assembler.cpp
 create mode 100644 lib/lepton/asmjit/x86/x86assembler.h
 create mode 100644 lib/lepton/asmjit/x86/x86builder.cpp
 create mode 100644 lib/lepton/asmjit/x86/x86builder.h
 create mode 100644 lib/lepton/asmjit/x86/x86compiler.cpp
 create mode 100644 lib/lepton/asmjit/x86/x86compiler.h
 create mode 100644 lib/lepton/asmjit/x86/x86emithelper.cpp
 create mode 100644 lib/lepton/asmjit/x86/x86emithelper_p.h
 create mode 100644 lib/lepton/asmjit/x86/x86emitter.h
 create mode 100644 lib/lepton/asmjit/x86/x86formatter.cpp
 create mode 100644 lib/lepton/asmjit/x86/x86formatter_p.h
 create mode 100644 lib/lepton/asmjit/x86/x86func.cpp
 create mode 100644 lib/lepton/asmjit/x86/x86func_p.h
 create mode 100644 lib/lepton/asmjit/x86/x86globals.h
 create mode 100644 lib/lepton/asmjit/x86/x86instapi.cpp
 create mode 100644 lib/lepton/asmjit/x86/x86instapi_p.h
 create mode 100644 lib/lepton/asmjit/x86/x86instdb.cpp
 create mode 100644 lib/lepton/asmjit/x86/x86instdb.h
 create mode 100644 lib/lepton/asmjit/x86/x86instdb_p.h
 create mode 100644 lib/lepton/asmjit/x86/x86opcode_p.h
 create mode 100644 lib/lepton/asmjit/x86/x86operand.cpp
 create mode 100644 lib/lepton/asmjit/x86/x86operand.h
 create mode 100644 lib/lepton/asmjit/x86/x86rapass.cpp
 create mode 100644 lib/lepton/asmjit/x86/x86rapass_p.h

diff --git a/cmake/Modules/Packages/LEPTON.cmake b/cmake/Modules/Packages/LEPTON.cmake
index a1a74f3aa9..28ade58636 100644
--- a/cmake/Modules/Packages/LEPTON.cmake
+++ b/cmake/Modules/Packages/LEPTON.cmake
@@ -1,8 +1,27 @@
 set(LEPTON_SOURCE_DIR ${LAMMPS_LIB_SOURCE_DIR}/lepton)
 
 file(GLOB LEPTON_SOURCES ${LEPTON_SOURCE_DIR}/src/[^.]*.cpp)
-add_library(lmplepton STATIC ${LEPTON_SOURCES})
-target_compile_definitions(lmplepton PUBLIC -DLEPTON_BUILDING_STATIC_LIBRARY=1)
+
+if((CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") OR
+   (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "amd64") OR
+   (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "AMD64"))
+   option(LEPTON_ENABLE_JIT "Enable Just-In-Time compiler for Lepton" ON)
+else()
+   option(LEPTON_ENABLE_JIT "Enable Just-In-Time compiler for Lepton" OFF)
+endif()
+
+if(LEPTON_ENABLE_JIT)
+  file(GLOB ASMJIT_SOURCES ${LEPTON_SOURCE_DIR}/asmjit/*/[^.]*.cpp)
+endif()
+
+add_library(lmplepton STATIC ${LEPTON_SOURCES} ${ASMJIT_SOURCES})
 set_target_properties(lmplepton PROPERTIES OUTPUT_NAME lammps_lmplepton${LAMMPS_MACHINE})
+target_compile_definitions(lmplepton PUBLIC -DLEPTON_BUILDING_STATIC_LIBRARY=1)
 target_include_directories(lmplepton PUBLIC ${LEPTON_SOURCE_DIR}/include)
+
+if(LEPTON_ENABLE_JIT)
+  target_compile_definitions(lmplepton PUBLIC "LEPTON_USE_JIT=1;ASMJIT_BUILD_X86=1;ASMJIT_EMBED=1;ASMJIT_BUILD_RELEASE=1")
+  target_include_directories(lmplepton PUBLIC ${LEPTON_SOURCE_DIR})
+endif()
+
 target_link_libraries(lammps PRIVATE lmplepton)
diff --git a/lib/lepton/asmjit/LICENSE.md b/lib/lepton/asmjit/LICENSE.md
new file mode 100644
index 0000000000..020a569dbd
--- /dev/null
+++ b/lib/lepton/asmjit/LICENSE.md
@@ -0,0 +1,17 @@
+Copyright (c) 2008-2020 The AsmJit Authors
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
diff --git a/lib/lepton/asmjit/a64.h b/lib/lepton/asmjit/a64.h
new file mode 100644
index 0000000000..ea4d304f05
--- /dev/null
+++ b/lib/lepton/asmjit/a64.h
@@ -0,0 +1,62 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_A64_H_INCLUDED
+#define ASMJIT_A64_H_INCLUDED
+
+//! \addtogroup asmjit_a64
+//!
+//! ### Emitters
+//!
+//!   - \ref a64::Assembler - AArch64 assembler (must read, provides examples).
+//!   - \ref a64::Builder - AArch64 builder.
+//!   - \ref a64::Compiler - AArch64 compiler.
+//!   - \ref a64::Emitter - AArch64 emitter (abstract).
+//!
+//! ### Supported Instructions
+//!
+//!   - Emitters:
+//!     - \ref a64::EmitterExplicitT - Provides all instructions that use explicit
+//!       operands, provides also utility functions. The member functions provided
+//!       are part of all ARM/AArch64 emitters.
+//!
+//!   - Instruction representation:
+//!     - \ref a64::Inst::Id - instruction identifiers.
+//!
+//! ### Register Operands
+//!
+//!   - \ref arm::Reg - Base class for any AArch32/AArch64 register.
+//!     - \ref arm::Gp - General purpose register:
+//!       - \ref arm::GpW - 32-bit register.
+//!       - \ref arm::GpX - 64-bit register.
+//!     - \ref arm::Vec - Vector (SIMD) register:
+//!       - \ref arm::VecB - 8-bit SIMD register (AArch64 only).
+//!       - \ref arm::VecH - 16-bit SIMD register (AArch64 only).
+//!       - \ref arm::VecS - 32-bit SIMD register.
+//!       - \ref arm::VecD - 64-bit SIMD register.
+//!       - \ref arm::VecV - 128-bit SIMD register.
+//!
+//! ### Memory Operands
+//!
+//!   - \ref arm::Mem - AArch32/AArch64 memory operand that provides support for all ARM addressing features
+//!     including base, index, pre/post increment, and ARM-specific shift addressing and index extending.
+//!
+//! ### Other
+//!
+//!   - \ref arm::Shift - Shift operation and value.
+//!   - \ref a64::Utils - Utilities that can help during code generation for AArch64.
+
+#include "./arm.h"
+#include "./arm/a64assembler.h"
+#include "./arm/a64builder.h"
+#include "./arm/a64compiler.h"
+#include "./arm/a64emitter.h"
+#include "./arm/a64globals.h"
+#include "./arm/a64instdb.h"
+#include "./arm/a64operand.h"
+#include "./arm/a64utils.h"
+
+#endif // ASMJIT_A64_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/arm.h b/lib/lepton/asmjit/arm.h
new file mode 100644
index 0000000000..57ffa815b8
--- /dev/null
+++ b/lib/lepton/asmjit/arm.h
@@ -0,0 +1,62 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_H_INCLUDED
+#define ASMJIT_ARM_H_INCLUDED
+
+//! \addtogroup asmjit_arm
+//!
+//! ### Namespaces
+//!
+//!   - \ref arm - arm namespace provides common functionality for both AArch32 and AArch64 backends.
+//!   - \ref a64 - a64 namespace provides support for AArch64 architecture. In addition it includes
+//!     \ref arm namespace, so you can only use a single namespace when targeting AArch64 architecture.
+//!
+//! ### Emitters
+//!
+//!   - AArch64
+//!     - \ref a64::Assembler - AArch64 assembler (must read, provides examples).
+//!     - \ref a64::Builder - AArch64 builder.
+//!     - \ref a64::Compiler - AArch64 compiler.
+//!     - \ref a64::Emitter - AArch64 emitter (abstract).
+//!
+//! ### Supported Instructions
+//!
+//!   - AArch64:
+//!     - Emitters:
+//!       - \ref a64::EmitterExplicitT - Provides all instructions that use explicit operands, provides also
+//!         utility functions. The member functions provided are part of all AArch64 emitters.
+//!     - Instruction representation:
+//!       - \ref a64::Inst::Id - instruction identifiers.
+//!
+//! ### Register Operands
+//!
+//!   - \ref arm::Reg - Base class for any AArch32/AArch64 register.
+//!     - \ref arm::Gp - General purpose register:
+//!       - \ref arm::GpW - 32-bit register.
+//!       - \ref arm::GpX - 64-bit register.
+//!     - \ref arm::Vec - Vector (SIMD) register:
+//!       - \ref arm::VecB - 8-bit SIMD register (AArch64 only).
+//!       - \ref arm::VecH - 16-bit SIMD register (AArch64 only).
+//!       - \ref arm::VecS - 32-bit SIMD register.
+//!       - \ref arm::VecD - 64-bit SIMD register.
+//!       - \ref arm::VecV - 128-bit SIMD register.
+//!
+//! ### Memory Operands
+//!
+//!   - \ref arm::Mem - AArch32/AArch64 memory operand that provides support for all ARM addressing features
+//!     including base, index, pre/post increment, and ARM-specific shift addressing and index extending.
+//!
+//! ### Other
+//!
+//!   - \ref arm::Shift - Shift operation and value (both AArch32 and AArch64).
+//!   - \ref arm::DataType - Data type that is part of an instruction in AArch32 mode.
+//!   - \ref a64::Utils - Utilities that can help during code generation for AArch64.
+
+#include "./core.h"
+#include "./arm/armglobals.h"
+#include "./arm/armoperand.h"
+
+#endif // ASMJIT_ARM_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64archtraits_p.h b/lib/lepton/asmjit/arm/a64archtraits_p.h
new file mode 100644
index 0000000000..87559c71d5
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64archtraits_p.h
@@ -0,0 +1,81 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64ARCHTRAITS_P_H_INCLUDED
+#define ASMJIT_ARM_A64ARCHTRAITS_P_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/misc_p.h"
+#include "../core/type.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+static const constexpr ArchTraits a64ArchTraits = {
+  // SP/FP/LR/PC.
+  Gp::kIdSp, Gp::kIdFp, Gp::kIdLr, 0xFF,
+
+  // Reserved.
+  { 0, 0, 0 },
+
+  // HW stack alignment (AArch64 requires stack aligned to 64 bytes).
+  16,
+
+  // Min/max stack offset - byte addressing is the worst, VecQ addressing the best.
+  4095, 65520,
+
+  // Instruction hints [Gp, Vec, ExtraVirt2, ExtraVirt3].
+  {{
+    InstHints::kPushPop,
+    InstHints::kPushPop,
+    InstHints::kNoHints,
+    InstHints::kNoHints
+  }},
+
+  // RegInfo.
+  #define V(index) OperandSignature{arm::RegTraits<RegType(index)>::kSignature}
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // RegTypeToTypeId.
+  #define V(index) TypeId(arm::RegTraits<RegType(index)>::kTypeId)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // TypeIdToRegType.
+  #define V(index) (index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt8)    ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt8)   ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt16)   ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt16)  ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt32)   ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt32)  ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt64)   ? RegType::kARM_GpX   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt64)  ? RegType::kARM_GpX   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kIntPtr)  ? RegType::kARM_GpX   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUIntPtr) ? RegType::kARM_GpX   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat32) ? RegType::kARM_VecS  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat64) ? RegType::kARM_VecD  : RegType::kNone)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // Word names of 8-bit, 16-bit, 32-bit, and 64-bit quantities.
+  {
+    ArchTypeNameId::kByte,
+    ArchTypeNameId::kHWord,
+    ArchTypeNameId::kWord,
+    ArchTypeNameId::kXWord
+  }
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64ARCHTRAITS_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64assembler.cpp b/lib/lepton/asmjit/arm/a64assembler.cpp
new file mode 100644
index 0000000000..485f05f491
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64assembler.cpp
@@ -0,0 +1,5115 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../core/codewriter_p.h"
+#include "../core/cpuinfo.h"
+#include "../core/emitterutils_p.h"
+#include "../core/formatter.h"
+#include "../core/logger.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../arm/armformatter_p.h"
+#include "../arm/a64assembler.h"
+#include "../arm/a64emithelper_p.h"
+#include "../arm/a64instdb_p.h"
+#include "../arm/a64utils.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::Assembler - Cond
+// =====================
+
+static inline uint32_t condCodeToOpcodeCond(uint32_t cond) noexcept {
+  return (uint32_t(cond) - 2u) & 0xFu;
+}
+
+// a64::Assembler - Bits
+// =====================
+
+template<typename T>
+static inline constexpr uint32_t B(const T& index) noexcept { return uint32_t(1u) << uint32_t(index); }
+
+static constexpr uint32_t kSP = Gp::kIdSp;
+static constexpr uint32_t kZR = Gp::kIdZr;
+static constexpr uint32_t kWX = InstDB::kWX;
+
+// a64::Assembler - ShiftOpToLdStOptMap
+// ====================================
+
+// Table that maps ShiftOp to OPT part in LD/ST (register) opcode.
+#define VALUE(index) index == uint32_t(ShiftOp::kUXTW) ? 2u : \
+                     index == uint32_t(ShiftOp::kLSL)  ? 3u : \
+                     index == uint32_t(ShiftOp::kSXTW) ? 6u : \
+                     index == uint32_t(ShiftOp::kSXTX) ? 7u : 0xFF
+static const uint8_t armShiftOpToLdStOptMap[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
+#undef VALUE
+
+static inline constexpr uint32_t diff(RegType a, RegType b) noexcept {
+  return uint32_t(a) - uint32_t(b);
+}
+
+// asmjit::a64::Assembler - SizeOp
+// ===============================
+
+//! Struct that contains Size (2 bits), Q flag, and S (scalar) flag. These values
+//! are used to encode Q, Size, and Scalar fields in an opcode.
+struct SizeOp {
+  enum : uint8_t {
+    k128BitShift = 0,
+    kScalarShift = 1,
+    kSizeShift = 2,
+
+    kQ = uint8_t(1u << k128BitShift),
+    kS = uint8_t(1u << kScalarShift),
+
+    k00 = uint8_t(0 << kSizeShift),
+    k01 = uint8_t(1 << kSizeShift),
+    k10 = uint8_t(2 << kSizeShift),
+    k11 = uint8_t(3 << kSizeShift),
+
+    k00Q = k00 | kQ,
+    k01Q = k01 | kQ,
+    k10Q = k10 | kQ,
+    k11Q = k11 | kQ,
+
+    k00S = k00 | kS,
+    k01S = k01 | kS,
+    k10S = k10 | kS,
+    k11S = k11 | kS,
+
+    kInvalid = 0xFFu,
+
+    // Masks used by SizeOpMap.
+    kSzQ = (0x3u << kSizeShift) | kQ,
+    kSzS = (0x3u << kSizeShift) | kS,
+    kSzQS = (0x3u << kSizeShift) | kQ | kS
+  };
+
+  uint8_t value;
+
+  inline bool isValid() const noexcept { return value != kInvalid; }
+  inline void makeInvalid() noexcept { value = kInvalid; }
+
+  inline uint32_t q() const noexcept { return (value >> k128BitShift) & 0x1u; }
+  inline uint32_t qs() const noexcept { return ((value >> k128BitShift) | (value >> kScalarShift)) & 0x1u; }
+  inline uint32_t scalar() const noexcept { return (value >> kScalarShift) & 0x1u; }
+  inline uint32_t size() const noexcept { return (value >> kSizeShift) & 0x3u; }
+
+  inline void decrementSize() noexcept {
+    ASMJIT_ASSERT(size() > 0);
+    value = uint8_t(value - (1u << kSizeShift));
+  }
+};
+
+struct SizeOpTable {
+  enum TableId : uint8_t {
+    kTableBin = 0,
+    kTableAny,
+    kCount
+  };
+
+  // 40 elements for each combination.
+  SizeOp array[(uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB) + 1) * 8];
+};
+
+#define VALUE_BIN(x) { \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeNone)) ? SizeOp::k00  : \
+  x == (((uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeNone)) ? SizeOp::k00Q : \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeB   )) ? SizeOp::k00  : \
+  x == (((uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeB   )) ? SizeOp::k00Q : SizeOp::kInvalid \
+}
+
+#define VALUE_ANY(x) { \
+  x == (((uint32_t(RegType::kARM_VecB) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeNone)) ? SizeOp::k00S : \
+  x == (((uint32_t(RegType::kARM_VecH) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeNone)) ? SizeOp::k01S : \
+  x == (((uint32_t(RegType::kARM_VecS) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeNone)) ? SizeOp::k10S : \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeNone)) ? SizeOp::k11S : \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeB   )) ? SizeOp::k00  : \
+  x == (((uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeB   )) ? SizeOp::k00Q : \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeH   )) ? SizeOp::k01  : \
+  x == (((uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeH   )) ? SizeOp::k01Q : \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeS   )) ? SizeOp::k10  : \
+  x == (((uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeS   )) ? SizeOp::k10Q : \
+  x == (((uint32_t(RegType::kARM_VecD) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeD   )) ? SizeOp::k11S : \
+  x == (((uint32_t(RegType::kARM_VecV) - uint32_t(RegType::kARM_VecB)) << 3) | (Vec::kElementTypeD   )) ? SizeOp::k11Q : SizeOp::kInvalid \
+}
+
+static const SizeOpTable sizeOpTable[SizeOpTable::kCount] = {
+  {{ ASMJIT_LOOKUP_TABLE_40(VALUE_BIN, 0) }},
+  {{ ASMJIT_LOOKUP_TABLE_40(VALUE_ANY, 0) }}
+};
+
+#undef VALUE_ANY
+#undef VALUE_BIN
+
+struct SizeOpMap {
+  uint8_t tableId;
+  uint8_t sizeOpMask;
+  uint16_t acceptMask;
+};
+
+static const constexpr SizeOpMap sizeOpMap[InstDB::kVO_Count] = {
+  { // kVO_V_B:
+    SizeOpTable::kTableBin, SizeOp::kQ   , uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q))
+  },
+
+  { // kVO_V_BH:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k01) | B(SizeOp::k01Q))
+  },
+
+  { // kVO_V_BH_4S:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_BHS:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k10) | B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_BHS_D2:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k10) | B(SizeOp::k10Q) | B(SizeOp::k11Q))
+  },
+
+  { // kVO_V_HS:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k10) | B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_S:
+    SizeOpTable::kTableAny, SizeOp::kQ   , uint16_t(B(SizeOp::k10) | B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_B8H4:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k01))
+  },
+
+  { // kVO_V_B8H4S2:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k01) | B(SizeOp::k10))
+  },
+
+  { // kVO_V_B8D1:
+    SizeOpTable::kTableAny, SizeOp::kSzQ , uint16_t(B(SizeOp::k00) | B(SizeOp::k11S))
+  },
+
+  { // kVO_V_H4S2:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k01) | B(SizeOp::k10))
+  },
+
+  { // kVO_V_B16:
+    SizeOpTable::kTableBin, SizeOp::kQ   , uint16_t(B(SizeOp::k00Q))
+  },
+
+  { // kVO_V_B16H8:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00Q) | B(SizeOp::k01Q))
+  },
+
+  { // kVO_V_B16H8S4:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00Q) | B(SizeOp::k01Q) | B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_B16D2:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00Q) | B(SizeOp::k11Q))
+  },
+
+  { // kVO_V_H8S4:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k01Q) | B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_S4:
+    SizeOpTable::kTableAny, 0            , uint16_t(B(SizeOp::k10Q))
+  },
+
+  { // kVO_V_D2:
+    SizeOpTable::kTableAny, 0            , uint16_t(B(SizeOp::k11Q))
+  },
+
+  { // kVO_SV_BHS:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k00S) | B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k01S) | B(SizeOp::k10) | B(SizeOp::k10Q) | B(SizeOp::k10S))
+  },
+
+  { // kVO_SV_B8H4S2:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00S) | B(SizeOp::k01) | B(SizeOp::k01S) | B(SizeOp::k10) | B(SizeOp::k10S))
+  },
+
+  { // kVO_SV_HS:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k01S) | B(SizeOp::k10) | B(SizeOp::k10Q) | B(SizeOp::k10S))
+  },
+
+  { // kVO_V_Any:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k10) | B(SizeOp::k10Q) | B(SizeOp::k11S) | B(SizeOp::k11Q))
+  },
+
+  { // kVO_SV_Any:
+    SizeOpTable::kTableAny, SizeOp::kSzQS, uint16_t(B(SizeOp::k00) | B(SizeOp::k00Q) | B(SizeOp::k00S) |
+                                                    B(SizeOp::k01) | B(SizeOp::k01Q) | B(SizeOp::k01S) |
+                                                    B(SizeOp::k10) | B(SizeOp::k10Q) | B(SizeOp::k10S) |
+                                                    B(SizeOp::k11) | B(SizeOp::k11Q) | B(SizeOp::k11S))
+  }
+};
+
+static const Operand_& significantSimdOp(const Operand_& o0, const Operand_& o1, uint32_t instFlags) noexcept {
+  return !(instFlags & InstDB::kInstFlagLong) ? o0 : o1;
+}
+
+static inline SizeOp armElementTypeToSizeOp(uint32_t vecOpType, RegType regType, uint32_t elementType) noexcept {
+  // Instruction data or Assembler is wrong if this triggers an assertion failure.
+  ASMJIT_ASSERT(vecOpType < InstDB::kVO_Count);
+  // ElementType uses 3 bits in the operand signature, it should never overflow.
+  ASMJIT_ASSERT(elementType <= 0x7u);
+
+  const SizeOpMap& map = sizeOpMap[vecOpType];
+  const SizeOpTable& table = sizeOpTable[map.tableId];
+
+  size_t index = (Support::min<uint32_t>(diff(regType, RegType::kARM_VecB), diff(RegType::kARM_VecV, RegType::kARM_VecB) + 1) << 3) | elementType;
+  SizeOp op = table.array[index];
+  SizeOp modifiedOp { uint8_t(op.value & map.sizeOpMask) };
+
+  if (!Support::bitTest(map.acceptMask, op.value))
+    modifiedOp.makeInvalid();
+
+  return modifiedOp;
+}
+
+// a64::Assembler - Immediate Encoding Utilities (Integral)
+// ========================================================
+
+using Utils::LogicalImm;
+
+struct HalfWordImm {
+  uint32_t hw;
+  uint32_t inv;
+  uint32_t imm;
+};
+
+struct LMHImm {
+  uint32_t lm;
+  uint32_t h;
+  uint32_t maxRmId;
+};
+
+static inline uint32_t countZeroHalfWords64(uint64_t imm) noexcept {
+  return uint32_t((imm & 0x000000000000FFFFu) == 0) +
+         uint32_t((imm & 0x00000000FFFF0000u) == 0) +
+         uint32_t((imm & 0x0000FFFF00000000u) == 0) +
+         uint32_t((imm & 0xFFFF000000000000u) == 0) ;
+}
+
+static uint32_t encodeMovSequence32(uint32_t out[2], uint32_t imm, uint32_t rd, uint32_t x) noexcept {
+  ASMJIT_ASSERT(rd <= 31);
+
+  uint32_t kMovZ = 0b01010010100000000000000000000000 | (x << 31);
+  uint32_t kMovN = 0b00010010100000000000000000000000;
+  uint32_t kMovK = 0b01110010100000000000000000000000;
+
+  if ((imm & 0xFFFF0000u) == 0x00000000u) {
+    out[0] = kMovZ | (0 << 21) | ((imm & 0xFFFFu) << 5) | rd;
+    return 1;
+  }
+
+  if ((imm & 0xFFFF0000u) == 0xFFFF0000u) {
+    out[0] = kMovN | (0 << 21) | ((~imm & 0xFFFFu) << 5) | rd;
+    return 1;
+  }
+
+  if ((imm & 0x0000FFFFu) == 0x00000000u) {
+    out[0] = kMovZ | (1 << 21) | ((imm >> 16) << 5) | rd;
+    return 1;
+  }
+
+  if ((imm & 0x0000FFFFu) == 0x0000FFFFu) {
+    out[0] = kMovN | (1 << 21) | ((~imm >> 16) << 5) | rd;
+    return 1;
+  }
+
+  out[0] = kMovZ | (0 << 21) | ((imm & 0xFFFFu) << 5) | rd;
+  out[1] = kMovK | (1 << 21) | ((imm     >> 16) << 5) | rd;
+  return 2;
+}
+
+static uint32_t encodeMovSequence64(uint32_t out[4], uint64_t imm, uint32_t rd, uint32_t x) noexcept {
+  ASMJIT_ASSERT(rd <= 31);
+
+  uint32_t kMovZ = 0b11010010100000000000000000000000;
+  uint32_t kMovN = 0b10010010100000000000000000000000;
+  uint32_t kMovK = 0b11110010100000000000000000000000;
+
+  if (imm <= 0xFFFFFFFFu)
+    return encodeMovSequence32(out, uint32_t(imm), rd, x);
+
+  uint32_t zhw = countZeroHalfWords64( imm);
+  uint32_t ohw = countZeroHalfWords64(~imm);
+
+  if (zhw >= ohw) {
+    uint32_t op = kMovZ;
+    uint32_t count = 0;
+
+    for (uint32_t hwIndex = 0; hwIndex < 4; hwIndex++, imm >>= 16) {
+      uint32_t hwImm = uint32_t(imm & 0xFFFFu);
+      if (hwImm == 0)
+        continue;
+
+      out[count++] = op | (hwIndex << 21) | (hwImm << 5) | rd;
+      op = kMovK;
+    }
+
+    // This should not happen - zero should be handled by encodeMovSequence32().
+    ASMJIT_ASSERT(count > 0);
+
+    return count;
+  }
+  else {
+    uint32_t op = kMovN;
+    uint32_t count = 0;
+    uint32_t negMask = 0xFFFFu;
+
+    for (uint32_t hwIndex = 0; hwIndex < 4; hwIndex++, imm >>= 16) {
+      uint32_t hwImm = uint32_t(imm & 0xFFFFu);
+      if (hwImm == 0xFFFFu)
+        continue;
+
+      out[count++] = op | (hwIndex << 21) | ((hwImm ^ negMask) << 5) | rd;
+      op = kMovK;
+      negMask = 0;
+    }
+
+    if (count == 0) {
+      out[count++] = kMovN | ((0xFFFF ^ negMask) << 5) | rd;
+    }
+
+    return count;
+  }
+}
+
+static inline bool encodeLMH(uint32_t sizeField, uint32_t elementIndex, LMHImm* out) noexcept {
+  if (sizeField != 1 && sizeField != 2)
+    return false;
+
+  uint32_t hShift = 3u - sizeField;
+  uint32_t lmShift = sizeField - 1u;
+  uint32_t maxElementIndex = 15u >> sizeField;
+
+  out->h = elementIndex >> hShift;
+  out->lm = (elementIndex << lmShift) & 0x3u;
+  out->maxRmId = (8u << sizeField) - 1;
+
+  return elementIndex <= maxElementIndex;
+}
+
+// [.......A|B.......|.......C|D.......|.......E|F.......|.......G|H.......]
+static inline uint32_t encodeImm64ByteMaskToImm8(uint64_t imm) noexcept {
+  return uint32_t(((imm >> (7  - 0)) & 0b00000011) | // [.......G|H.......]
+                  ((imm >> (23 - 2)) & 0b00001100) | // [.......E|F.......]
+                  ((imm >> (39 - 4)) & 0b00110000) | // [.......C|D.......]
+                  ((imm >> (55 - 6)) & 0b11000000)); // [.......A|B.......]
+}
+
+// a64::Assembler - Opcode
+// =======================
+
+//! Helper class to store and manipulate ARM opcode.
+struct Opcode {
+  uint32_t v;
+
+  enum Bits : uint32_t {
+    kN = (1u << 22),
+    kQ = (1u << 30),
+    kX = (1u << 31)
+  };
+
+  // --------------------------------------------------------------------------
+  // [Opcode Builder]
+  // --------------------------------------------------------------------------
+
+  inline uint32_t get() const noexcept { return v; }
+  inline void reset(uint32_t value) noexcept { v = value; }
+
+  inline bool hasQ() const noexcept { return (v & kQ) != 0; }
+  inline bool hasX() const noexcept { return (v & kX) != 0; }
+
+  template<typename T>
+  inline Opcode& addImm(T value, uint32_t bitIndex) noexcept { return operator|=(uint32_t(value) << bitIndex); }
+
+  template<typename T>
+  inline Opcode& xorImm(T value, uint32_t bitIndex) noexcept { return operator^=(uint32_t(value) << bitIndex); }
+
+  template<typename T, typename Condition>
+  inline Opcode& addIf(T value, const Condition& condition) noexcept { return operator|=(condition ? uint32_t(value) : uint32_t(0)); }
+
+  inline Opcode& addLogicalImm(const LogicalImm& logicalImm) noexcept {
+    addImm(logicalImm.n, 22);
+    addImm(logicalImm.r, 16);
+    addImm(logicalImm.s, 10);
+    return *this;
+  }
+
+  inline Opcode& addReg(uint32_t id, uint32_t bitIndex) noexcept { return operator|=((id & 31u) << bitIndex); }
+  inline Opcode& addReg(const Operand_& op, uint32_t bitIndex) noexcept { return addReg(op.id(), bitIndex); }
+
+  inline Opcode& operator=(uint32_t x) noexcept { v = x; return *this; }
+  inline Opcode& operator&=(uint32_t x) noexcept { v &= x; return *this; }
+  inline Opcode& operator|=(uint32_t x) noexcept { v |= x; return *this; }
+  inline Opcode& operator^=(uint32_t x) noexcept { v ^= x; return *this; }
+
+  inline uint32_t operator&(uint32_t x) const noexcept { return v & x; }
+  inline uint32_t operator|(uint32_t x) const noexcept { return v | x; }
+  inline uint32_t operator^(uint32_t x) const noexcept { return v ^ x; }
+};
+
+// a64::Assembler - Signature Utilities
+// ====================================
+
+// TODO: [ARM] Deprecate matchSignature.
+static inline bool matchSignature(const Operand_& o0, const Operand_& o1, uint32_t instFlags) noexcept {
+  if (!(instFlags & (InstDB::kInstFlagLong | InstDB::kInstFlagNarrow)))
+    return o0.signature() == o1.signature();
+
+  // TODO: [ARM] Something smart to validate this.
+  return true;
+}
+
+static inline bool matchSignature(const Operand_& o0, const Operand_& o1, const Operand_& o2, uint32_t instFlags) noexcept {
+  return matchSignature(o0, o1, instFlags) && o1.signature() == o2.signature();
+}
+
+static inline bool matchSignature(const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, uint32_t instFlags) noexcept {
+  return matchSignature(o0, o1, instFlags) && o1.signature() == o2.signature() && o2.signature() == o3.signature();;
+}
+
+// Memory must be either:
+// 1. Absolute address, which will be converted to relative.
+// 2. Relative displacement (Label).
+// 3. Base register + either offset or index.
+static inline bool armCheckMemBaseIndexRel(const Mem& mem) noexcept {
+  // Allowed base types (Nothing, Label, and GpX).
+  constexpr uint32_t kBaseMask  = B(0) |
+                                  B(RegType::kLabelTag) |
+                                  B(RegType::kARM_GpX);
+
+  // Allowed index types (Nothing, GpW, and GpX).
+  constexpr uint32_t kIndexMask = B(0) |
+                                  B(RegType::kARM_GpW) |
+                                  B(RegType::kARM_GpX) ;
+
+  RegType baseType = mem.baseType();
+  RegType indexType = mem.indexType();
+
+  if (!Support::bitTest(kBaseMask, baseType))
+    return false;
+
+  if (baseType > RegType::kLabelTag) {
+    // Index allows either GpW or GpX.
+    if (!Support::bitTest(kIndexMask, indexType))
+      return false;
+
+    if (indexType == RegType::kNone)
+      return true;
+    else
+      return !mem.hasOffset();
+  }
+  else {
+    // No index register allowed if this is a PC relative address (literal).
+    return indexType == RegType::kNone;
+  }
+}
+
+struct EncodeFpOpcodeBits {
+  uint32_t sizeMask;
+  uint32_t mask[3];
+};
+
+static inline bool pickFpOpcode(const Vec& reg, uint32_t sOp, uint32_t sHf, uint32_t vOp, uint32_t vHf, Opcode* opcode, uint32_t* szOut) noexcept {
+  static constexpr uint32_t kQBitIndex = 30;
+
+  static const EncodeFpOpcodeBits szBits[InstDB::kHF_Count] = {
+    { B(2) | B(1)       , { 0u                           , 0u, B(22) } },
+    { B(2) | B(1) | B(0), { 0u                           , 0u, 0u    } },
+    { B(2) | B(1) | B(0), { B(23) | B(22)                , 0u, B(22) } },
+    { B(2) | B(1) | B(0), { B(22) | B(20) | B(19)        , 0u, B(22) } },
+    { B(2) | B(1) | B(0), { B(22) | B(21) | B(15) | B(14), 0u, B(22) } },
+    { B(2) | B(1) | B(0), { B(23)                        , 0u, B(22) } }
+  };
+
+  if (!reg.hasElementType()) {
+    // Scalar operation [HSD].
+    uint32_t sz = diff(reg.type(), RegType::kARM_VecH);
+    if (sz > 2u || !Support::bitTest(szBits[sHf].sizeMask, sz))
+      return false;
+
+    opcode->reset(szBits[sHf].mask[sz] ^ sOp);
+    *szOut = sz;
+    return sOp != 0;
+  }
+  else {
+    // Vector operation [HSD].
+    uint32_t q = diff(reg.type(), RegType::kARM_VecD);
+    uint32_t sz = reg.elementType() - Vec::kElementTypeH;
+
+    if (q > 1u || sz > 2u || !Support::bitTest(szBits[vHf].sizeMask, sz))
+      return false;
+
+    opcode->reset(szBits[vHf].mask[sz] ^ (vOp | (q << kQBitIndex)));
+    *szOut = sz;
+    return vOp != 0;
+  }
+}
+
+static inline bool pickFpOpcode(const Vec& reg, uint32_t sOp, uint32_t sHf, uint32_t vOp, uint32_t vHf, Opcode* opcode) noexcept {
+  uint32_t sz;
+  return pickFpOpcode(reg, sOp, sHf, vOp, vHf, opcode, &sz);
+}
+
+// a64::Assembler - Operand Checks
+// ===============================
+
+// Checks whether all operands have the same signature.
+static inline bool checkSignature(const Operand_& o0, const Operand_& o1) noexcept {
+  return o0.signature() == o1.signature();
+}
+
+static inline bool checkSignature(const Operand_& o0, const Operand_& o1, const Operand_& o2) noexcept {
+  return o0.signature() == o1.signature() &&
+         o1.signature() == o2.signature();
+}
+
+static inline bool checkSignature(const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) noexcept {
+  return o0.signature() == o1.signature() &&
+         o1.signature() == o2.signature() &&
+         o2.signature() == o3.signature();
+}
+
+// Checks whether the register is GP register of the allowed types.
+//
+// Allowed is a 2-bit mask, where the first bits allows GpW and the second bit
+// allows GpX. These bits are usually stored within the instruction, but could
+// be also hardcoded in the assembler for instructions where GP types are not
+// selectable.
+static inline bool checkGpType(const Operand_& op, uint32_t allowed) noexcept {
+  RegType type = op.as<Reg>().type();
+  return Support::bitTest(allowed << uint32_t(RegType::kARM_GpW), type);
+}
+
+static inline bool checkGpType(const Operand_& op, uint32_t allowed, uint32_t* x) noexcept {
+  // NOTE: We set 'x' to one only when GpW is allowed, otherwise the X is part
+  // of the opcode and we cannot set it. This is why this works without requiring
+  // additional logic.
+  RegType type = op.as<Reg>().type();
+  *x = diff(type, RegType::kARM_GpW) & allowed;
+  return Support::bitTest(allowed << uint32_t(RegType::kARM_GpW), type);
+}
+
+static inline bool checkGpType(const Operand_& o0, const Operand_& o1, uint32_t allowed, uint32_t* x) noexcept {
+  return checkGpType(o0, allowed, x) && checkSignature(o0, o1);
+}
+
+static inline bool checkGpType(const Operand_& o0, const Operand_& o1, const Operand_& o2, uint32_t allowed, uint32_t* x) noexcept {
+  return checkGpType(o0, allowed, x) && checkSignature(o0, o1, o2);
+}
+
+static inline bool checkGpId(const Operand_& op, uint32_t hiId = kZR) noexcept {
+  uint32_t id = op.as<Reg>().id();
+  return id < 31u || id == hiId;
+}
+
+static inline bool checkGpId(const Operand_& o0, const Operand_& o1, uint32_t hiId = kZR) noexcept {
+  uint32_t id0 = o0.as<Reg>().id();
+  uint32_t id1 = o1.as<Reg>().id();
+
+  return (id0 < 31u || id0 == hiId) && (id1 < 31u || id1 == hiId);
+}
+
+static inline bool checkGpId(const Operand_& o0, const Operand_& o1, const Operand_& o2, uint32_t hiId = kZR) noexcept {
+  uint32_t id0 = o0.as<Reg>().id();
+  uint32_t id1 = o1.as<Reg>().id();
+  uint32_t id2 = o2.as<Reg>().id();
+
+  return (id0 < 31u || id0 == hiId) && (id1 < 31u || id1 == hiId) && (id2 < 31u || id2 == hiId);
+}
+
+static inline bool checkVecId(const Operand_& op) noexcept {
+  uint32_t id = op.as<Reg>().id();
+  return id <= 31u;
+}
+
+static inline bool checkVecId(const Operand_& o0, const Operand_& o1) noexcept {
+  uint32_t id0 = o0.as<Reg>().id();
+  uint32_t id1 = o1.as<Reg>().id();
+
+  return (id0 | id1) <= 31u;
+}
+
+/* Unused at the moment.
+static inline bool checkVecId(const Operand_& o0, const Operand_& o1, const Operand_& o2) noexcept {
+  uint32_t id0 = o0.as<Reg>().id();
+  uint32_t id1 = o1.as<Reg>().id();
+  uint32_t id2 = o2.as<Reg>().id();
+
+  return (id0 | id1 | id2) <= 31u;
+}
+
+static inline bool checkVecId(const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) noexcept {
+  uint32_t id0 = o0.as<Reg>().id();
+  uint32_t id1 = o1.as<Reg>().id();
+  uint32_t id2 = o2.as<Reg>().id();
+  uint32_t id3 = o3.as<Reg>().id();
+
+  return (id0 | id1 | id2 | id3) <= 31u;
+}
+*/
+
+static inline bool checkMemBase(const Mem& mem) noexcept {
+  return mem.baseType() == RegType::kARM_GpX && mem.baseId() <= 31;
+}
+
+static inline bool checkEven(const Operand_& o0, const Operand_& o1) noexcept {
+  return ((o0.id() | o1.id()) & 1) == 0;
+}
+
+static inline bool checkConsecutive(const Operand_& o0, const Operand_& o1) noexcept {
+  return ((o0.id() + 1u) & 0x1Fu) == o1.id();
+}
+
+static inline bool checkConsecutive(const Operand_& o0, const Operand_& o1, const Operand_& o2) noexcept {
+  return ((o0.id() + 1u) & 0x1Fu) == o1.id() &&
+         ((o0.id() + 2u) & 0x1Fu) == o2.id();
+}
+
+static inline bool checkConsecutive(const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) noexcept {
+  return ((o0.id() + 1u) & 0x1Fu) == o1.id() &&
+         ((o0.id() + 2u) & 0x1Fu) == o2.id() &&
+         ((o0.id() + 3u) & 0x1Fu) == o3.id();
+}
+
+// a64::Assembler - CheckReg
+// =========================
+
+#define V(index) (index == uint32_t(RegType::kARM_GpW)  ? Gp::kIdZr :  \
+                  index == uint32_t(RegType::kARM_GpX)  ? Gp::kIdZr :  \
+                  index == uint32_t(RegType::kARM_VecB) ? 31u       :  \
+                  index == uint32_t(RegType::kARM_VecH) ? 31u       :  \
+                  index == uint32_t(RegType::kARM_VecS) ? 31u       :  \
+                  index == uint32_t(RegType::kARM_VecD) ? 31u       :  \
+                  index == uint32_t(RegType::kARM_VecV) ? 31u       : 0)
+static const Support::Array<uint8_t, 32> commonHiRegIdOfType = {{
+  ASMJIT_LOOKUP_TABLE_32(V, 0)
+}};
+#undef V
+
+static inline bool checkValidRegs(const Operand_& o0) noexcept {
+  return ((o0.id() < 31) | (o0.id() == commonHiRegIdOfType[o0.as<Reg>().type()]));
+}
+
+static inline bool checkValidRegs(const Operand_& o0, const Operand_& o1) noexcept {
+  return ((o0.id() < 31) | (o0.id() == commonHiRegIdOfType[o0.as<Reg>().type()])) &
+         ((o1.id() < 31) | (o1.id() == commonHiRegIdOfType[o1.as<Reg>().type()])) ;
+}
+
+static inline bool checkValidRegs(const Operand_& o0, const Operand_& o1, const Operand_& o2) noexcept {
+  return ((o0.id() < 31) | (o0.id() == commonHiRegIdOfType[o0.as<Reg>().type()])) &
+         ((o1.id() < 31) | (o1.id() == commonHiRegIdOfType[o1.as<Reg>().type()])) &
+         ((o2.id() < 31) | (o2.id() == commonHiRegIdOfType[o2.as<Reg>().type()])) ;
+}
+
+static inline bool checkValidRegs(const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) noexcept {
+  return ((o0.id() < 31) | (o0.id() == commonHiRegIdOfType[o0.as<Reg>().type()])) &
+         ((o1.id() < 31) | (o1.id() == commonHiRegIdOfType[o1.as<Reg>().type()])) &
+         ((o2.id() < 31) | (o2.id() == commonHiRegIdOfType[o2.as<Reg>().type()])) &
+         ((o3.id() < 31) | (o3.id() == commonHiRegIdOfType[o3.as<Reg>().type()])) ;
+}
+
+// a64::Assembler - Construction & Destruction
+// ===========================================
+
+Assembler::Assembler(CodeHolder* code) noexcept : BaseAssembler() {
+  _archMask = uint64_t(1) << uint32_t(Arch::kAArch64);
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+
+Assembler::~Assembler() noexcept {}
+
+// a64::Assembler - Emit
+// =====================
+
+#define ENC_OPS1(OP0) \
+  (uint32_t(OperandType::k##OP0))
+
+#define ENC_OPS2(OP0, OP1) \
+  (uint32_t(OperandType::k##OP0) + \
+  (uint32_t(OperandType::k##OP1) << 3))
+
+#define ENC_OPS3(OP0, OP1, OP2) \
+  (uint32_t(OperandType::k##OP0) + \
+  (uint32_t(OperandType::k##OP1) << 3) + \
+  (uint32_t(OperandType::k##OP2) << 6))
+
+#define ENC_OPS4(OP0, OP1, OP2, OP3) \
+  (uint32_t(OperandType::k##OP0) + \
+  (uint32_t(OperandType::k##OP1) << 3) + \
+  (uint32_t(OperandType::k##OP2) << 6) + \
+  (uint32_t(OperandType::k##OP3) << 9))
+
+Error Assembler::_emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) {
+  // Logging/Validation/Error.
+  constexpr InstOptions kRequiresSpecialHandling = InstOptions::kReserved;
+
+  Error err;
+  CodeWriter writer(this);
+
+  // Combine all instruction options and also check whether the instruction
+  // is valid. All options that require special handling (including invalid
+  // instruction) are handled by the next branch.
+  InstOptions options = InstOptions(instId - 1 >= Inst::_kIdCount - 1) | InstOptions((size_t)(_bufferEnd - writer.cursor()) < 4) | instOptions() | forcedInstOptions();
+
+  CondCode instCC = BaseInst::extractARMCondCode(instId);
+  instId = instId & uint32_t(InstIdParts::kRealId);
+
+  if (instId >= Inst::_kIdCount)
+    instId = 0;
+
+  const InstDB::InstInfo* instInfo = &InstDB::_instInfoTable[instId];
+  uint32_t encodingIndex = instInfo->_encodingDataIndex;
+
+  Opcode opcode;
+  uint32_t isign4;
+  uint32_t instFlags;
+
+  const Operand_& o3 = opExt[EmitterUtils::kOp3];
+  const Operand_* rmRel = nullptr;
+
+  uint32_t multipleOpData[4];
+  uint32_t multipleOpCount;
+
+  // These are only used when instruction uses a relative displacement.
+  OffsetFormat offsetFormat;     // Offset format.
+  uint64_t offsetValue;          // Offset value (if known).
+
+  if (ASMJIT_UNLIKELY(Support::test(options, kRequiresSpecialHandling))) {
+    if (ASMJIT_UNLIKELY(!_code))
+      return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+    // Unknown instruction.
+    if (ASMJIT_UNLIKELY(instId == 0))
+      goto InvalidInstruction;
+
+    // Condition code can only be used with 'B' instruction.
+    if (ASMJIT_UNLIKELY(instCC != CondCode::kAL && instId != Inst::kIdB))
+      goto InvalidInstruction;
+
+    // Grow request, happens rarely.
+    err = writer.ensureSpace(this, 4);
+    if (ASMJIT_UNLIKELY(err))
+      goto Failed;
+
+#ifndef ASMJIT_NO_VALIDATION
+    // Strict validation.
+    if (hasDiagnosticOption(DiagnosticOptions::kValidateAssembler)) {
+      Operand_ opArray[Globals::kMaxOpCount];
+      EmitterUtils::opArrayFromEmitArgs(opArray, o0, o1, o2, opExt);
+
+      err = _funcs.validate(arch(), BaseInst(instId, options, _extraReg), opArray, Globals::kMaxOpCount, ValidationFlags::kNone);
+      if (ASMJIT_UNLIKELY(err))
+        goto Failed;
+    }
+#endif
+  }
+
+  // Signature of the first 4 operands.
+  isign4 = (uint32_t(o0.opType())     ) +
+           (uint32_t(o1.opType()) << 3) +
+           (uint32_t(o2.opType()) << 6) +
+           (uint32_t(o3.opType()) << 9);
+  instFlags = instInfo->flags();
+
+  switch (instInfo->_encoding) {
+    // ------------------------------------------------------------------------
+    // [Base - Universal]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseOp: {
+      const InstDB::EncodingData::BaseOp& opData = InstDB::EncodingData::baseOp[encodingIndex];
+
+      if (isign4 == 0) {
+        opcode.reset(opData.opcode);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseOpImm: {
+      const InstDB::EncodingData::BaseOpImm& opData = InstDB::EncodingData::baseOpImm[encodingIndex];
+
+      if (isign4 == ENC_OPS1(Imm)) {
+        uint64_t imm = o0.as<Imm>().valueAs<uint64_t>();
+        uint32_t immMax = 1u << opData.immBits;
+
+        if (imm >= immMax)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(imm, opData.immOffset);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseR: {
+      const InstDB::EncodingData::BaseR& opData = InstDB::EncodingData::baseR[encodingIndex];
+
+      if (isign4 == ENC_OPS1(Reg)) {
+        if (!checkGpType(o0, opData.rType))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.rHiId))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode);
+        opcode.addReg(o0, opData.rShift);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRR: {
+      const InstDB::EncodingData::BaseRR& opData = InstDB::EncodingData::baseRR[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        uint32_t x;
+        if (!checkGpType(o0, opData.aType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o1, opData.bType))
+          goto InvalidInstruction;
+
+        if (opData.uniform && !checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.aHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o1, opData.bHiId))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addReg(o1, opData.bShift);
+        opcode.addReg(o0, opData.aShift);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRRR: {
+      const InstDB::EncodingData::BaseRRR& opData = InstDB::EncodingData::baseRRR[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        uint32_t x;
+        if (!checkGpType(o0, opData.aType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o1, opData.bType))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o2, opData.cType))
+          goto InvalidInstruction;
+
+        if (opData.uniform && !checkSignature(o0, o1, o2))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.aHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o1, opData.bHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o2, opData.cHiId))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, 31);
+        opcode.addReg(o2, 16);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRRRR: {
+      const InstDB::EncodingData::BaseRRRR& opData = InstDB::EncodingData::baseRRRR[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        uint32_t x;
+        if (!checkGpType(o0, opData.aType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o1, opData.bType))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o2, opData.cType))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o3, opData.dType))
+          goto InvalidInstruction;
+
+        if (opData.uniform && !checkSignature(o0, o1, o2, o3))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.aHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o1, opData.bHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o2, opData.cHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o3, opData.dHiId))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, 31);
+        opcode.addReg(o2, 16);
+        opcode.addReg(o3, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRRII: {
+      const InstDB::EncodingData::BaseRRII& opData = InstDB::EncodingData::baseRRII[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        if (!checkGpType(o0, opData.aType))
+          goto InvalidInstruction;
+
+        if (!checkGpType(o1, opData.bType))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.aHiId))
+          goto InvalidPhysId;
+
+        if (!checkGpId(o1, opData.bHiId))
+          goto InvalidPhysId;
+
+        if (o2.as<Imm>().valueAs<uint64_t>() >= Support::bitMask(opData.aImmSize + opData.aImmDiscardLsb) ||
+            o3.as<Imm>().valueAs<uint64_t>() >= Support::bitMask(opData.bImmSize + opData.bImmDiscardLsb))
+          goto InvalidImmediate;
+
+        uint32_t aImm = o2.as<Imm>().valueAs<uint32_t>() >> opData.aImmDiscardLsb;
+        uint32_t bImm = o3.as<Imm>().valueAs<uint32_t>() >> opData.bImmDiscardLsb;
+
+        if ((aImm << opData.aImmDiscardLsb) != o2.as<Imm>().valueAs<uint32_t>() ||
+            (bImm << opData.bImmDiscardLsb) != o3.as<Imm>().valueAs<uint32_t>())
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(aImm, opData.aImmOffset);
+        opcode.addImm(bImm, opData.bImmOffset);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Mov]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseMov: {
+      // MOV is a pseudo instruction that uses various instructions depending on its signature.
+      uint32_t x = diff(o0.as<Reg>().type(), RegType::kARM_GpW);
+      if (x > 1)
+        goto InvalidInstruction;
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (!o0.as<Reg>().isGp())
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        bool hasSP = o0.as<Gp>().isSP() || o1.as<Gp>().isSP();
+        if (hasSP) {
+          // Cannot be combined with ZR.
+          if (!checkGpId(o0, o1, kSP))
+            goto InvalidPhysId;
+
+          // MOV Rd, Rm -> ADD Rd, Rn, #0.
+          opcode.reset(0b00010001000000000000000000000000);
+          opcode.addImm(x, 31);
+          opcode.addReg(o1, 5);
+          opcode.addReg(o0, 0);
+          goto EmitOp;
+        }
+        else {
+          if (!checkGpId(o0, o1, kZR))
+            goto InvalidPhysId;
+
+          // MOV Rd, Rm -> ORR Rd, <ZR>, Rm.
+          opcode.reset(0b00101010000000000000001111100000);
+          opcode.addImm(x, 31);
+          opcode.addReg(o1, 16);
+          opcode.addReg(o0, 0);
+          goto EmitOp;
+        }
+      }
+
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        if (!o0.as<Reg>().isGp())
+          goto InvalidInstruction;
+
+        uint64_t immValue = o1.as<Imm>().valueAs<uint64_t>();
+        if (!x)
+          immValue &= 0xFFFFFFFFu;
+
+        // Prefer a single MOVN/MOVZ instruction over a logical instruction.
+        multipleOpCount = encodeMovSequence64(multipleOpData, immValue, o0.id() & 31, x);
+        if (multipleOpCount == 1 && !o0.as<Gp>().isSP()) {
+          opcode.reset(multipleOpData[0]);
+          goto EmitOp;
+        }
+
+        // Logical instructions use 13-bit immediate pattern encoded as N:ImmR:ImmS.
+        LogicalImm logicalImm;
+        if (!o0.as<Gp>().isZR()) {
+          if (Utils::encodeLogicalImm(immValue, x ? 64 : 32, &logicalImm)) {
+            if (!checkGpId(o0, kSP))
+              goto InvalidPhysId;
+
+            opcode.reset(0b00110010000000000000001111100000);
+            opcode.addImm(x, 31);
+            opcode.addLogicalImm(logicalImm);
+            opcode.addReg(o0, 0);
+            goto EmitOp;
+          }
+        }
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        goto EmitOp_Multiple;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseMovKNZ: {
+      const InstDB::EncodingData::BaseMovKNZ& opData = InstDB::EncodingData::baseMovKNZ[encodingIndex];
+
+      uint32_t x = diff(o0.as<Reg>().type(), RegType::kARM_GpW);
+      if (x > 1)
+        goto InvalidInstruction;
+
+      if (!checkGpId(o0, kZR))
+        goto InvalidPhysId;
+
+      opcode.reset(opData.opcode);
+      opcode.addImm(x, 31);
+
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        uint64_t imm16 = o1.as<Imm>().valueAs<uint64_t>();
+        if (imm16 > 0xFFFFu)
+          goto InvalidImmediate;
+
+        opcode.addImm(imm16, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Imm, Imm)) {
+        uint64_t imm16 = o1.as<Imm>().valueAs<uint64_t>();
+        uint32_t shiftType = o2.as<Imm>().predicate();
+        uint64_t shiftValue = o2.as<Imm>().valueAs<uint64_t>();
+
+        if (imm16 > 0xFFFFu || shiftValue > 48 || shiftType != uint32_t(ShiftOp::kLSL))
+          goto InvalidImmediate;
+
+        // Convert shift value to 'hw' field.
+        uint32_t hw = uint32_t(shiftValue) >> 4;
+        if ((hw << 4) != uint32_t(shiftValue))
+          goto InvalidImmediate;
+
+        opcode.addImm(hw, 21);
+        opcode.addImm(imm16, 5);
+        opcode.addReg(o0, 0);
+
+        if (!x && hw > 1u)
+          goto InvalidImmediate;
+
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Adr]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseAdr: {
+      const InstDB::EncodingData::BaseAdr& opData = InstDB::EncodingData::baseAdr[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Label) || isign4 == ENC_OPS2(Reg, Imm)) {
+        if (!o0.as<Reg>().isGpX())
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addReg(o0, 0);
+        offsetFormat.resetToImmValue(opData.offsetType, 4, 5, 21, 0);
+
+        if (instId == Inst::kIdAdrp)
+          offsetFormat._immDiscardLsb = 12;
+
+        rmRel = &o1;
+        goto EmitOp_Rel;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Arithmetic and Logical]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseAddSub: {
+      const InstDB::EncodingData::BaseAddSub& opData = InstDB::EncodingData::baseAddSub[encodingIndex];
+
+      uint32_t x;
+      if (!checkGpType(o0, o1, kWX, &x))
+        goto InvalidInstruction;
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) || isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        opcode.reset(uint32_t(opData.immediateOp) << 24);
+
+        // ADD | SUB (immediate) - ZR is not allowed.
+        // ADDS|SUBS (immediate) - ZR allowed in Rd, SP allowed in Rn.
+        uint32_t aHiId = opcode.get() & B(29) ? kZR : kSP;
+        uint32_t bHiId = kSP;
+
+        if (!checkGpId(o0, aHiId) || !checkGpId(o1, bHiId))
+          goto InvalidPhysId;
+
+        // ADD|SUB (immediate) use 12-bit immediate optionally shifted by 'LSL #12'.
+        uint64_t imm = o2.as<Imm>().valueAs<uint64_t>();
+        uint32_t shift = 0;
+
+        if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+          if (o3.as<Imm>().predicate() != uint32_t(ShiftOp::kLSL))
+            goto InvalidImmediate;
+
+          if (o3.as<Imm>().value() != 0 && o3.as<Imm>().value() != 12)
+            goto InvalidImmediate;
+
+          shift = uint32_t(o3.as<Imm>().value() != 0);
+        }
+
+        // Accept immediate value of '0x00XXX000' by setting 'shift' to 12.
+        if (imm > 0xFFFu) {
+          if (shift || (imm & ~uint64_t(0xFFFu << 12)) != 0)
+            goto InvalidImmediate;
+          shift = 1;
+          imm >>= 12;
+        }
+
+        opcode.addImm(x, 31);
+        opcode.addImm(shift, 22);
+        opcode.addImm(imm, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg) || isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        if (!checkSignature(o1, o2))
+          goto InvalidInstruction;
+
+        uint32_t opSize = x ? 64 : 32;
+        uint64_t shift = 0;
+        uint32_t sType = uint32_t(ShiftOp::kLSL);
+
+        if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+          sType = o3.as<Imm>().predicate();
+          shift = o3.as<Imm>().valueAs<uint64_t>();
+        }
+
+        if (!checkGpId(o2, kZR))
+          goto InvalidPhysId;
+
+        // Shift operation - LSL, LSR, ASR.
+        if (sType <= uint32_t(ShiftOp::kASR)) {
+          bool hasSP = o0.as<Gp>().isSP() || o1.as<Gp>().isSP();
+          if (!hasSP) {
+            if (!checkGpId(o0, o1, kZR))
+              goto InvalidPhysId;
+
+            if (shift >= opSize)
+              goto InvalidImmediate;
+
+            opcode.reset(uint32_t(opData.shiftedOp) << 21);
+            opcode.addImm(x, 31);
+            opcode.addImm(sType, 22);
+            opcode.addReg(o2, 16);
+            opcode.addImm(shift, 10);
+            opcode.addReg(o1, 5);
+            opcode.addReg(o0, 0);
+            goto EmitOp;
+          }
+
+          // SP register can only be used with LSL or Extend.
+          if (sType != uint32_t(ShiftOp::kLSL))
+            goto InvalidImmediate;
+          sType = x ? uint32_t(ShiftOp::kUXTX) : uint32_t(ShiftOp::kUXTW);
+        }
+
+        // Extend operation - UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX.
+        opcode.reset(uint32_t(opData.extendedOp) << 21);
+        sType -= uint32_t(ShiftOp::kUXTB);
+
+        if (sType > 7 || shift > 4)
+          goto InvalidImmediate;
+
+        if (!(opcode.get() & B(29))) {
+          // ADD|SUB (extend) - ZR is not allowed.
+          if (!checkGpId(o0, o1, kSP))
+            goto InvalidPhysId;
+        }
+        else {
+          // ADDS|SUBS (extend) - ZR allowed in Rd, SP allowed in Rn.
+          if (!checkGpId(o0, kZR) || !checkGpId(o1, kSP))
+            goto InvalidPhysId;
+        }
+
+        opcode.addImm(x, 31);
+        opcode.addReg(o2, 16);
+        opcode.addImm(sType, 13);
+        opcode.addImm(shift, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseLogical: {
+      const InstDB::EncodingData::BaseLogical& opData = InstDB::EncodingData::baseLogical[encodingIndex];
+
+      uint32_t x;
+      if (!checkGpType(o0, o1, kWX, &x))
+        goto InvalidInstruction;
+
+      if (!checkSignature(o0, o1))
+        goto InvalidInstruction;
+
+      uint32_t opSize = x ? 64 : 32;
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) && opData.immediateOp != 0) {
+        opcode.reset(uint32_t(opData.immediateOp) << 23);
+
+        // AND|ANDS|BIC|BICS|ORR|EOR (immediate) uses a LogicalImm format described by N:R:S values.
+        uint64_t immMask = Support::lsbMask<uint64_t>(opSize);
+        uint64_t immValue = o2.as<Imm>().valueAs<uint64_t>();
+
+        if (opData.negateImm)
+          immValue ^= immMask;
+
+        // Logical instructions use 13-bit immediate pattern encoded as N:ImmS:ImmR.
+        LogicalImm logicalImm;
+        if (!Utils::encodeLogicalImm(immValue & immMask, opSize, &logicalImm))
+          goto InvalidImmediate;
+
+        // AND|BIC|ORR|EOR (immediate) can have SP on destination, but ANDS|BICS (immediate) cannot.
+        uint32_t kOpANDS = 0x3 << 29;
+        bool isANDS = (opcode.get() & kOpANDS) == kOpANDS;
+
+        if (!checkGpId(o0, isANDS ? kZR : kSP) || !checkGpId(o1, kZR))
+          goto InvalidPhysId;
+
+        opcode.addImm(x, 31);
+        opcode.addLogicalImm(logicalImm);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      if (!checkSignature(o1, o2))
+        goto InvalidInstruction;
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!checkGpId(o0, o1, o2, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(uint32_t(opData.shiftedOp) << 21);
+        opcode.addImm(x, 31);
+        opcode.addReg(o2, 16);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        if (!checkGpId(o0, o1, o2, kZR))
+          goto InvalidPhysId;
+
+        uint32_t shiftType = o3.as<Imm>().predicate();
+        uint64_t opShift = o3.as<Imm>().valueAs<uint64_t>();
+
+        if (shiftType > 0x3 || opShift >= opSize)
+          goto InvalidImmediate;
+
+        opcode.reset(uint32_t(opData.shiftedOp) << 21);
+        opcode.addImm(x, 31);
+        opcode.addImm(shiftType, 22);
+        opcode.addReg(o2, 16);
+        opcode.addImm(opShift, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseCmpCmn: {
+      const InstDB::EncodingData::BaseCmpCmn& opData = InstDB::EncodingData::baseCmpCmn[encodingIndex];
+
+      uint32_t x;
+      if (!checkGpType(o0, kWX, &x))
+        goto InvalidInstruction;
+
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        // CMN|CMP (immediate) - ZR is not allowed.
+        if (!checkGpId(o0, kSP))
+          goto InvalidPhysId;
+
+        // CMN|CMP (immediate) use 12-bit immediate optionally shifted by 'LSL #12'.
+        const Imm& imm12 = o1.as<Imm>();
+        uint32_t immShift = 0;
+        uint64_t immValue = imm12.valueAs<uint64_t>();
+
+        if (immValue > 0xFFFu) {
+          if ((immValue & ~uint64_t(0xFFFu << 12)) != 0)
+            goto InvalidImmediate;
+          immShift = 1;
+          immValue >>= 12;
+        }
+
+        opcode.reset(uint32_t(opData.immediateOp) << 24);
+        opcode.addImm(x, 31);
+        opcode.addImm(immShift, 22);
+        opcode.addImm(immValue, 10);
+        opcode.addReg(o0, 5);
+        opcode.addReg(Gp::kIdZr, 0);
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS2(Reg, Reg) || isign4 == ENC_OPS3(Reg, Reg, Imm)) {
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        uint32_t opSize = x ? 64 : 32;
+        uint32_t sType = 0;
+        uint64_t shift = 0;
+
+        if (isign4 == ENC_OPS3(Reg, Reg, Imm)) {
+          sType = o2.as<Imm>().predicate();
+          shift = o2.as<Imm>().valueAs<uint64_t>();
+        }
+
+        bool hasSP = o0.as<Gp>().isSP() || o1.as<Gp>().isSP();
+
+        // Shift operation - LSL, LSR, ASR.
+        if (sType <= uint32_t(ShiftOp::kASR)) {
+          if (!hasSP) {
+            if (shift >= opSize)
+              goto InvalidImmediate;
+
+            opcode.reset(uint32_t(opData.shiftedOp) << 21);
+            opcode.addImm(x, 31);
+            opcode.addImm(sType, 22);
+            opcode.addReg(o1, 16);
+            opcode.addImm(shift, 10);
+            opcode.addReg(o0, 5);
+            opcode.addReg(Gp::kIdZr, 0);
+            goto EmitOp;
+          }
+
+          // SP register can only be used with LSL or Extend.
+          if (sType != uint32_t(ShiftOp::kLSL))
+            goto InvalidImmediate;
+
+          sType = x ? uint32_t(ShiftOp::kUXTX) : uint32_t(ShiftOp::kUXTW);
+        }
+
+        // Extend operation - UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX.
+        sType -= uint32_t(ShiftOp::kUXTB);
+        if (sType > 7 || shift > 4)
+          goto InvalidImmediate;
+
+        opcode.reset(uint32_t(opData.extendedOp) << 21);
+        opcode.addImm(x, 31);
+        opcode.addReg(o1, 16);
+        opcode.addImm(sType, 13);
+        opcode.addImm(shift, 10);
+        opcode.addReg(o0, 5);
+        opcode.addReg(Gp::kIdZr, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseMvnNeg: {
+      const InstDB::EncodingData::BaseMvnNeg& opData = InstDB::EncodingData::baseMvnNeg[encodingIndex];
+
+      uint32_t x;
+      if (!checkGpType(o0, o1, kWX, &x))
+        goto InvalidInstruction;
+
+      opcode.reset(opData.opcode);
+      opcode.addImm(x, 31);
+      opcode.addReg(o1, 16);
+      opcode.addReg(o0, 0);
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm)) {
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        uint32_t opSize = x ? 64 : 32;
+        uint32_t shiftType = o2.as<Imm>().predicate();
+        uint64_t opShift = o2.as<Imm>().valueAs<uint64_t>();
+
+        if (shiftType > uint32_t(ShiftOp::kROR) || opShift >= opSize)
+          goto InvalidImmediate;
+
+        opcode.addImm(shiftType, 22);
+        opcode.addImm(opShift, 10);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseTst: {
+      const InstDB::EncodingData::BaseTst& opData = InstDB::EncodingData::baseTst[encodingIndex];
+
+      uint32_t x;
+      if (!checkGpType(o0, kWX, &x))
+        goto InvalidInstruction;
+
+      uint32_t opSize = x ? 64 : 32;
+
+      if (isign4 == ENC_OPS2(Reg, Imm) && opData.immediateOp != 0) {
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        // TST (immediate) uses a LogicalImm format described by N:R:S values.
+        uint64_t immMask = Support::lsbMask<uint64_t>(opSize);
+        uint64_t immValue = o1.as<Imm>().valueAs<uint64_t>();
+
+        // Logical instructions use 13-bit immediate pattern encoded as N:ImmS:ImmR.
+        LogicalImm logicalImm;
+        if (!Utils::encodeLogicalImm(immValue & immMask, opSize, &logicalImm))
+          goto InvalidImmediate;
+
+        opcode.reset(uint32_t(opData.immediateOp) << 22);
+        opcode.addLogicalImm(logicalImm);
+        opcode.addImm(x, 31);
+        opcode.addReg(o0, 5);
+        opcode.addReg(Gp::kIdZr, 0);
+        goto EmitOp;
+      }
+
+      opcode.reset(uint32_t(opData.shiftedOp) << 21);
+      opcode.addImm(x, 31);
+      opcode.addReg(o1, 16);
+      opcode.addReg(o0, 5);
+      opcode.addReg(Gp::kIdZr, 0);
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm)) {
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        uint32_t shiftType = o2.as<Imm>().predicate();
+        uint64_t opShift = o2.as<Imm>().valueAs<uint64_t>();
+
+        if (shiftType > 0x3 || opShift >= opSize)
+          goto InvalidImmediate;
+
+        opcode.addImm(shiftType, 22);
+        opcode.addImm(opShift, 10);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Bit Manipulation]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseBfc: {
+      const InstDB::EncodingData::BaseBfc& opData = InstDB::EncodingData::baseBfc[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Imm, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0))
+          goto InvalidPhysId;
+
+        uint64_t lsb = o1.as<Imm>().valueAs<uint64_t>();
+        uint64_t width = o2.as<Imm>().valueAs<uint64_t>();
+        uint32_t opSize = x ? 64 : 32;
+
+        if (lsb >= opSize || width == 0 || width > opSize)
+          goto InvalidImmediate;
+
+        uint32_t lsb32 = Support::neg(uint32_t(lsb)) & (opSize - 1);
+        uint32_t width32 = uint32_t(width) - 1;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addImm(lsb32, 16);
+        opcode.addImm(width32, 10);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseBfi: {
+      const InstDB::EncodingData::BaseBfi& opData = InstDB::EncodingData::baseBfi[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1))
+          goto InvalidPhysId;
+
+        uint64_t lsb = o2.as<Imm>().valueAs<uint64_t>();
+        uint64_t width = o3.as<Imm>().valueAs<uint64_t>();
+        uint32_t opSize = x ? 64 : 32;
+
+        if (lsb >= opSize || width == 0 || width > opSize)
+          goto InvalidImmediate;
+
+        uint32_t lImm = Support::neg(uint32_t(lsb)) & (opSize - 1);
+        uint32_t wImm = uint32_t(width) - 1;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addImm(lImm, 16);
+        opcode.addImm(wImm, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseBfm: {
+      const InstDB::EncodingData::BaseBfm& opData = InstDB::EncodingData::baseBfm[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1))
+          goto InvalidPhysId;
+
+        uint64_t immR = o2.as<Imm>().valueAs<uint64_t>();
+        uint64_t immS = o3.as<Imm>().valueAs<uint64_t>();
+        uint32_t opSize = x ? 64 : 32;
+
+        if ((immR | immS) >= opSize)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addImm(immR, 16);
+        opcode.addImm(immS, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseBfx: {
+      const InstDB::EncodingData::BaseBfx& opData = InstDB::EncodingData::baseBfx[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1))
+          goto InvalidPhysId;
+
+        uint64_t lsb = o2.as<Imm>().valueAs<uint64_t>();
+        uint64_t width = o3.as<Imm>().valueAs<uint64_t>();
+        uint32_t opSize = x ? 64 : 32;
+
+        if (lsb >= opSize || width == 0 || width > opSize)
+          goto InvalidImmediate;
+
+        uint32_t lsb32 = uint32_t(lsb);
+        uint32_t width32 = lsb32 + uint32_t(width) - 1u;
+
+        if (width32 >= opSize)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addImm(lsb32, 16);
+        opcode.addImm(width32, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseExtend: {
+      const InstDB::EncodingData::BaseExtend& opData = InstDB::EncodingData::baseExtend[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        uint32_t x;
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!o1.as<Reg>().isGpW())
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseExtract: {
+      const InstDB::EncodingData::BaseExtract& opData = InstDB::EncodingData::baseExtract[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1, o2))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, o2))
+          goto InvalidPhysId;
+
+        uint64_t lsb = o3.as<Imm>().valueAs<uint64_t>();
+        uint32_t opSize = x ? 64 : 32;
+
+        if (lsb >= opSize)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addReg(o2, 16);
+        opcode.addImm(lsb, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRev: {
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1))
+          goto InvalidPhysId;
+
+        opcode.reset(0b01011010110000000000100000000000);
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 10);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseShift: {
+      const InstDB::EncodingData::BaseShift& opData = InstDB::EncodingData::baseShift[encodingIndex];
+
+      uint32_t x;
+      if (!checkGpType(o0, kWX, &x))
+        goto InvalidInstruction;
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!checkSignature(o0, o1, o2))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, o2, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.registerOp());
+        opcode.addImm(x, 31);
+        opcode.addReg(o2, 16);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) && opData.immediateOp()) {
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        uint64_t immR = o2.as<Imm>().valueAs<uint64_t>();
+        uint32_t opSize = x ? 64 : 32;
+
+        if (immR >= opSize)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.immediateOp());
+        opcode.addImm(x, 31);
+        opcode.addImm(x, 22);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+
+        if (opcode.get() & B(10)) {
+          // ASR and LSR (immediate) has the same logic.
+          opcode.addImm(x, 15);
+          opcode.addImm(immR, 16);
+          goto EmitOp;
+        }
+
+        if (opData.ror == 0) {
+          // LSL (immediate) is an alias to UBFM
+          uint32_t ubfmImmR = Support::neg(uint32_t(immR)) & (opSize - 1);
+          uint32_t ubfmImmS = opSize - 1 - uint32_t(immR);
+
+          opcode.addImm(ubfmImmR, 16);
+          opcode.addImm(ubfmImmS, 10);
+          goto EmitOp;
+        }
+        else {
+          // ROR (immediate) is an alias to EXTR.
+          opcode.addImm(immR, 10);
+          opcode.addReg(o1, 16);
+          goto EmitOp;
+        }
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Conditionals]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseCCmp: {
+      const InstDB::EncodingData::BaseCCmp& opData = InstDB::EncodingData::baseCCmp[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm) || isign4 == ENC_OPS4(Reg, Imm, Imm, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        uint64_t nzcv = o2.as<Imm>().valueAs<uint64_t>();
+        uint64_t cond = o3.as<Imm>().valueAs<uint64_t>();
+
+        if ((nzcv | cond) > 0xFu)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(condCodeToOpcodeCond(uint32_t(cond)), 12);
+        opcode.addImm(nzcv, 0);
+
+        if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+          // CCMN|CCMP (register) form.
+          if (!checkSignature(o0, o1))
+            goto InvalidInstruction;
+
+          if (!checkGpId(o1, kZR))
+            goto InvalidPhysId;
+
+          opcode.addReg(o1, 16);
+          opcode.addReg(o0, 5);
+          goto EmitOp;
+        }
+        else {
+          // CCMN|CCMP (immediate) form.
+          uint64_t imm5 = o1.as<Imm>().valueAs<uint64_t>();
+          if (imm5 > 0x1F)
+            goto InvalidImmediate;
+
+          opcode.addImm(1, 11);
+          opcode.addImm(imm5, 16);
+          opcode.addReg(o0, 5);
+          goto EmitOp;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseCInc: {
+      const InstDB::EncodingData::BaseCInc& opData = InstDB::EncodingData::baseCInc[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, o1, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        uint64_t cond = o2.as<Imm>().valueAs<uint64_t>();
+        if (cond - 2u > 0xEu)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addReg(o1, 16);
+        opcode.addImm(condCodeToOpcodeCond(uint32_t(cond)) ^ 1u, 12);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseCSel: {
+      const InstDB::EncodingData::BaseCSel& opData = InstDB::EncodingData::baseCSel[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, o1, o2, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, o2, kZR))
+          goto InvalidPhysId;
+
+        uint64_t cond = o3.as<Imm>().valueAs<uint64_t>();
+        if (cond > 0xFu)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addReg(o2, 16);
+        opcode.addImm(condCodeToOpcodeCond(uint32_t(cond)), 12);
+        opcode.addReg(o1, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseCSet: {
+      const InstDB::EncodingData::BaseCSet& opData = InstDB::EncodingData::baseCSet[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, InstDB::kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        uint64_t cond = o1.as<Imm>().valueAs<uint64_t>();
+        if (cond - 2u >= 0xEu)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addImm(condCodeToOpcodeCond(uint32_t(cond)) ^ 1u, 12);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Special]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseAtDcIcTlbi: {
+      const InstDB::EncodingData::BaseAtDcIcTlbi& opData = InstDB::EncodingData::baseAtDcIcTlbi[encodingIndex];
+
+      if (isign4 == ENC_OPS1(Imm) || isign4 == ENC_OPS2(Imm, Reg)) {
+        if (opData.mandatoryReg && isign4 != ENC_OPS2(Imm, Reg))
+          goto InvalidInstruction;
+
+        if (o0.as<Imm>().valueAs<uint64_t>() > 0x7FFFu)
+          goto InvalidImmediate;
+
+        uint32_t imm = o0.as<Imm>().valueAs<uint32_t>();
+        if ((imm & opData.immVerifyMask) != opData.immVerifyData)
+          goto InvalidImmediate;
+
+        uint32_t rt = 31;
+        if (o1.isReg()) {
+          if (!o1.as<Reg>().isGpX())
+            goto InvalidInstruction;
+
+          if (!checkGpId(o1, kZR))
+            goto InvalidPhysId;
+
+          rt = o1.id() & 31;
+        }
+
+        opcode.reset(0b11010101000010000000000000000000);
+        opcode.addImm(imm, 5);
+        opcode.addReg(rt, 0);
+        goto EmitOp;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingBaseMrs: {
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        if (!o0.as<Reg>().isGpX())
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        if (o1.as<Imm>().valueAs<uint64_t>() > 0xFFFFu)
+          goto InvalidImmediate;
+
+        uint32_t imm = o1.as<Imm>().valueAs<uint32_t>();
+        if (!(imm & B(15)))
+          goto InvalidImmediate;
+
+        opcode.reset(0b11010101001100000000000000000000);
+        opcode.addImm(imm, 5);
+        opcode.addReg(o0, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseMsr: {
+      if (isign4 == ENC_OPS2(Imm, Reg)) {
+        if (!o1.as<Reg>().isGpX())
+          goto InvalidInstruction;
+
+        if (o0.as<Imm>().valueAs<uint64_t>() > 0xFFFFu)
+          goto InvalidImmediate;
+
+        uint32_t imm = o0.as<Imm>().valueAs<uint32_t>();
+        if (!(imm & B(15)))
+          goto InvalidImmediate;
+
+        if (!checkGpId(o1, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(0b11010101000100000000000000000000);
+        opcode.addImm(imm, 5);
+        opcode.addReg(o1, 0);
+        goto EmitOp;
+      }
+
+      if (isign4 == ENC_OPS2(Imm, Imm)) {
+        if (o0.as<Imm>().valueAs<uint64_t>() > 0x1Fu)
+          goto InvalidImmediate;
+
+        if (o1.as<Imm>().valueAs<uint64_t>() > 0xFu)
+          goto InvalidImmediate;
+
+        uint32_t op = o0.as<Imm>().valueAs<uint32_t>();
+        uint32_t cRm = o1.as<Imm>().valueAs<uint32_t>();
+
+        uint32_t op1 = uint32_t(op) >> 3;
+        uint32_t op2 = uint32_t(op) & 0x7u;
+
+        opcode.reset(0b11010101000000000100000000011111);
+        opcode.addImm(op1, 16);
+        opcode.addImm(cRm, 8);
+        opcode.addImm(op2, 5);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseSys: {
+      if (isign4 == ENC_OPS4(Imm, Imm, Imm, Imm)) {
+        if (o0.as<Imm>().valueAs<uint64_t>() > 0x7u ||
+            o1.as<Imm>().valueAs<uint64_t>() > 0xFu ||
+            o2.as<Imm>().valueAs<uint64_t>() > 0xFu ||
+            o3.as<Imm>().valueAs<uint64_t>() > 0x7u)
+          goto InvalidImmediate;
+
+        uint32_t op1 = o0.as<Imm>().valueAs<uint32_t>();
+        uint32_t cRn = o1.as<Imm>().valueAs<uint32_t>();
+        uint32_t cRm = o2.as<Imm>().valueAs<uint32_t>();
+        uint32_t op2 = o3.as<Imm>().valueAs<uint32_t>();
+        uint32_t rt = 31;
+
+        const Operand_& o4 = opExt[EmitterUtils::kOp4];
+        if (o4.isReg()) {
+          if (!o4.as<Reg>().isGpX())
+            goto InvalidInstruction;
+
+          if (!checkGpId(o4, kZR))
+            goto InvalidPhysId;
+
+          rt = o4.id() & 31;
+        }
+        else if (!o4.isNone()) {
+          goto InvalidInstruction;
+        }
+
+        opcode.reset(0b11010101000010000000000000000000);
+        opcode.addImm(op1, 16);
+        opcode.addImm(cRn, 12);
+        opcode.addImm(cRm, 8);
+        opcode.addImm(op2, 5);
+        opcode.addImm(rt, 0);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Branch]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseBranchReg: {
+      const InstDB::EncodingData::BaseBranchReg& opData = InstDB::EncodingData::baseBranchReg[encodingIndex];
+
+      if (isign4 == ENC_OPS1(Reg)) {
+        if (!o0.as<Reg>().isGpX())
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode);
+        opcode.addReg(o0, 5);
+        goto EmitOp;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseBranchRel: {
+      const InstDB::EncodingData::BaseBranchRel& opData = InstDB::EncodingData::baseBranchRel[encodingIndex];
+
+      if (isign4 == ENC_OPS1(Label) || isign4 == ENC_OPS1(Imm)) {
+        opcode.reset(opData.opcode);
+        rmRel = &o0;
+
+        if (instCC != CondCode::kAL) {
+          opcode |= B(30);
+          opcode.addImm(condCodeToOpcodeCond(uint32_t(instCC)), 0);
+          offsetFormat.resetToImmValue(OffsetType::kSignedOffset, 4, 5, 19, 2);
+          goto EmitOp_Rel;
+        }
+
+        offsetFormat.resetToImmValue(OffsetType::kSignedOffset, 4, 0, 26, 2);
+        goto EmitOp_Rel;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseBranchCmp: {
+      const InstDB::EncodingData::BaseBranchCmp& opData = InstDB::EncodingData::baseBranchCmp[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Label) || isign4 == ENC_OPS2(Reg, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode);
+        opcode.addImm(x, 31);
+        opcode.addReg(o0, 0);
+        offsetFormat.resetToImmValue(OffsetType::kSignedOffset, 4, 5, 19, 2);
+
+        rmRel = &o1;
+        goto EmitOp_Rel;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseBranchTst: {
+      const InstDB::EncodingData::BaseBranchTst& opData = InstDB::EncodingData::baseBranchTst[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Imm, Label) || isign4 == ENC_OPS3(Reg, Imm, Imm)) {
+        uint32_t x;
+        if (!checkGpType(o0, kWX, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        uint64_t imm = o1.as<Imm>().valueAs<uint64_t>();
+
+        opcode.reset(opData.opcode);
+        if (imm >= 32) {
+          if (!x)
+            goto InvalidImmediate;
+          opcode.addImm(x, 31);
+          imm &= 0x1F;
+        }
+
+        opcode.addReg(o0, 0);
+        opcode.addImm(imm, 19);
+        offsetFormat.resetToImmValue(OffsetType::kSignedOffset, 4, 5, 14, 2);
+
+        rmRel = &o2;
+        goto EmitOp_Rel;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Base - Load / Store]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingBaseLdSt: {
+      const InstDB::EncodingData::BaseLdSt& opData = InstDB::EncodingData::baseLdSt[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        rmRel = &m;
+
+        uint32_t x;
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        // Instructions that work with either word or dword have the unsigned
+        // offset shift set to 2 (word), so we set it to 3 (dword) if this is
+        // X version of the instruction.
+        uint32_t xShiftMask = uint32_t(opData.uOffsetShift == 2);
+        uint32_t immShift = uint32_t(opData.uOffsetShift) + (x & xShiftMask);
+
+        if (!armCheckMemBaseIndexRel(m))
+          goto InvalidAddress;
+
+        int64_t offset = m.offset();
+        if (m.hasBaseReg()) {
+          // [Base {Offset | Index}]
+          if (m.hasIndex()) {
+            uint32_t opt = armShiftOpToLdStOptMap[m.predicate()];
+            if (opt == 0xFF)
+              goto InvalidAddress;
+
+            uint32_t shift = m.shift();
+            uint32_t s = shift != 0;
+
+            if (s && shift != immShift)
+              goto InvalidAddressScale;
+
+            opcode.reset(uint32_t(opData.registerOp) << 21);
+            opcode.xorImm(x, opData.xOffset);
+            opcode.addImm(opt, 13);
+            opcode.addImm(s, 12);
+            opcode |= B(11);
+            opcode.addReg(o0, 0);
+            goto EmitOp_MemBaseIndex_Rn5_Rm16;
+          }
+
+          // Makes it easier to work with the offset especially on 32-bit arch.
+          if (!Support::isInt32(offset))
+            goto InvalidDisplacement;
+          int32_t offset32 = int32_t(offset);
+
+          if (m.isPreOrPost()) {
+            if (!Support::isInt9(offset32))
+              goto InvalidDisplacement;
+
+            opcode.reset(uint32_t(opData.prePostOp) << 21);
+            opcode.xorImm(x, opData.xOffset);
+            opcode.addImm(offset32 & 0x1FF, 12);
+            opcode.addImm(m.isPreIndex(), 11);
+            opcode |= B(10);
+            opcode.addReg(o0, 0);
+            goto EmitOp_MemBase_Rn5;
+          }
+          else {
+            uint32_t imm12 = uint32_t(offset32) >> immShift;
+
+            // Alternative form of LDUR/STUR and related instructions as described by AArch64 reference manual:
+            //
+            // If this instruction is not encodable with scaled unsigned offset, try unscaled signed offset.
+            if (!Support::isUInt12(imm12) || (imm12 << immShift) != uint32_t(offset32)) {
+              instId = opData.uAltInstId;
+              instInfo = &InstDB::_instInfoTable[instId];
+              encodingIndex = instInfo->_encodingDataIndex;
+              goto Case_BaseLdurStur;
+            }
+
+            opcode.reset(uint32_t(opData.uOffsetOp) << 22);
+            opcode.xorImm(x, opData.xOffset);
+            opcode.addImm(imm12, 10);
+            opcode.addReg(o0, 0);
+            goto EmitOp_MemBase_Rn5;
+          }
+        }
+        else {
+          if (!opData.literalOp)
+            goto InvalidAddress;
+
+          opcode.reset(uint32_t(opData.literalOp) << 24);
+          opcode.xorImm(x, opData.xOffset);
+          opcode.addReg(o0, 0);
+          offsetFormat.resetToImmValue(OffsetType::kSignedOffset, 4, 5, 19, 2);
+          goto EmitOp_Rel;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseLdpStp: {
+      const InstDB::EncodingData::BaseLdpStp& opData = InstDB::EncodingData::baseLdpStp[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Mem)) {
+        const Mem& m = o2.as<Mem>();
+        rmRel = &m;
+
+        uint32_t x;
+        if (!checkGpType(o0, o1, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        if (m.baseType() != RegType::kARM_GpX || m.hasIndex())
+          goto InvalidAddress;
+
+        if (m.isOffset64Bit())
+          goto InvalidDisplacement;
+
+        uint32_t offsetShift = opData.offsetShift + x;
+        int32_t offset32 = m.offsetLo32() >> offsetShift;
+
+        // Make sure we didn't lose bits by applying the mandatory offset shift.
+        if (uint32_t(offset32) << offsetShift != uint32_t(m.offsetLo32()))
+          goto InvalidDisplacement;
+
+        // Offset is encoded as 7-bit immediate.
+        if (!Support::isInt7(offset32))
+          goto InvalidDisplacement;
+
+        if (m.isPreOrPost() && offset32 != 0) {
+          if (!opData.prePostOp)
+            goto InvalidAddress;
+
+          opcode.reset(uint32_t(opData.prePostOp) << 22);
+          opcode.addImm(m.isPreIndex(), 24);
+        }
+        else {
+          opcode.reset(uint32_t(opData.offsetOp) << 22);
+        }
+
+        opcode.addImm(x, opData.xOffset);
+        opcode.addImm(offset32 & 0x7F, 15);
+        opcode.addReg(o1, 10);
+        opcode.addReg(o0, 0);
+        goto EmitOp_MemBase_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseStx: {
+      const InstDB::EncodingData::BaseStx& opData = InstDB::EncodingData::baseStx[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Mem)) {
+        const Mem& m = o2.as<Mem>();
+        uint32_t x;
+
+        if (!o0.as<Reg>().isGpW() || !checkGpType(o1, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o0, 16);
+        opcode.addReg(o1, 0);
+
+        rmRel = &m;
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseLdxp: {
+      const InstDB::EncodingData::BaseLdxp& opData = InstDB::EncodingData::baseLdxp[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Mem)) {
+        const Mem& m = o2.as<Mem>();
+        uint32_t x;
+
+        if (!checkGpType(o0, opData.rType, &x) || !checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o1, 10);
+        opcode.addReg(o0, 0);
+
+        rmRel = &m;
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseStxp: {
+      const InstDB::EncodingData::BaseStxp& opData = InstDB::EncodingData::baseStxp[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
+        const Mem& m = o3.as<Mem>();
+        uint32_t x;
+
+        if (!o0.as<Reg>().isGpW() || !checkGpType(o1, opData.rType, &x) || !checkSignature(o1, o2))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, o2, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o0, 16);
+        opcode.addReg(o2, 10);
+        opcode.addReg(o1, 0);
+
+        rmRel = &m;
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRM_NoImm: {
+      const InstDB::EncodingData::BaseRM_NoImm& opData = InstDB::EncodingData::baseRM_NoImm[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        rmRel = &m;
+
+        uint32_t x;
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.rHiId))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o0, 0);
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRM_SImm9: {
+Case_BaseLdurStur:
+      const InstDB::EncodingData::BaseRM_SImm9& opData = InstDB::EncodingData::baseRM_SImm9[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        rmRel = &m;
+
+        uint32_t x;
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.rHiId))
+          goto InvalidPhysId;
+
+        if (m.hasBaseReg() && !m.hasIndex()) {
+          if (m.isOffset64Bit())
+            goto InvalidDisplacement;
+
+          int32_t offset32 = m.offsetLo32() >> opData.immShift;
+          if (Support::shl(offset32, opData.immShift) != m.offsetLo32())
+            goto InvalidDisplacement;
+
+          if (!Support::isInt9(offset32))
+            goto InvalidDisplacement;
+
+          if (m.isFixedOffset()) {
+            opcode.reset(opData.offsetOp());
+          }
+          else {
+            if (!opData.prePostOp())
+              goto InvalidInstruction;
+
+            opcode.reset(opData.prePostOp());
+            opcode.xorImm(m.isPreIndex(), 11);
+          }
+
+          opcode.xorImm(x, opData.xOffset);
+          opcode.addImm(offset32 & 0x1FF, 12);
+          opcode.addReg(o0, 0);
+          goto EmitOp_MemBase_Rn5;
+        }
+
+        goto InvalidAddress;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseRM_SImm10: {
+      const InstDB::EncodingData::BaseRM_SImm10& opData = InstDB::EncodingData::baseRM_SImm10[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        rmRel = &m;
+
+        uint32_t x;
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, opData.rHiId))
+          goto InvalidPhysId;
+
+        if (m.hasBaseReg() && !m.hasIndex()) {
+          if (m.isOffset64Bit())
+            goto InvalidDisplacement;
+
+          int32_t offset32 = m.offsetLo32() >> opData.immShift;
+          if (Support::shl(offset32, opData.immShift) != m.offsetLo32())
+            goto InvalidDisplacement;
+
+          if (!Support::isInt10(offset32))
+            goto InvalidDisplacement;
+
+          if (m.isPostIndex())
+            goto InvalidAddress;
+
+          // Offset has 10 bits, sign is stored in the 10th bit.
+          offset32 &= 0x3FF;
+
+          opcode.reset(opData.opcode());
+          opcode.xorImm(m.isPreIndex(), 11);
+          opcode.xorImm(x, opData.xOffset);
+          opcode.addImm(offset32 >> 9, 22);
+          opcode.addImm(offset32, 12);
+          opcode.addReg(o0, 0);
+          goto EmitOp_MemBase_Rn5;
+        }
+
+        goto InvalidAddress;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseAtomicOp: {
+      const InstDB::EncodingData::BaseAtomicOp& opData = InstDB::EncodingData::baseAtomicOp[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Mem)) {
+        const Mem& m = o2.as<Mem>();
+        uint32_t x;
+
+        if (!checkGpType(o0, opData.rType, &x) || !checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, o1, kZR))
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o0, 16);
+        opcode.addReg(o1, 0);
+
+        rmRel = &m;
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseAtomicSt: {
+      const InstDB::EncodingData::BaseAtomicSt& opData = InstDB::EncodingData::baseAtomicSt[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        uint32_t x;
+
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkGpId(o0, kZR))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o0, 16);
+        opcode.addReg(Gp::kIdZr, 0);
+
+        rmRel = &m;
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingBaseAtomicCasp: {
+      const InstDB::EncodingData::BaseAtomicCasp& opData = InstDB::EncodingData::baseAtomicCasp[encodingIndex];
+      const Operand_& o4 = opExt[EmitterUtils::kOp4];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg) && o4.isMem()) {
+        const Mem& m = o4.as<Mem>();
+        uint32_t x;
+
+        if (!checkGpType(o0, opData.rType, &x))
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1, o2, o3))
+          goto InvalidInstruction;
+
+        if (!checkEven(o0, o2) || !checkGpId(o0, o2, kZR))
+          goto InvalidPhysId;
+
+        if (!checkConsecutive(o0, o1) || !checkConsecutive(o2, o3))
+          goto InvalidPhysId;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(x, opData.xOffset);
+        opcode.addReg(o0, 16);
+        opcode.addReg(o2, 0);
+
+        rmRel = &m;
+        goto EmitOp_MemBaseNoImm_Rn5;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [FSimd - Instructions]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingFSimdSV: {
+      const InstDB::EncodingData::FSimdSV& opData = InstDB::EncodingData::fSimdSV[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        uint32_t q = diff(o1.as<Reg>().type(), RegType::kARM_VecD);
+        if (q > 1)
+          goto InvalidInstruction;
+
+        if (o0.as<Vec>().hasElementType())
+          goto InvalidInstruction;
+
+        // This operation is only defined for:
+        //   hD, vS.{4|8}h (16-bit)
+        //   sD, vS.4s     (32-bit)
+        uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+        uint32_t elementSz = o1.as<Vec>().elementType() - Vec::kElementTypeH;
+
+        // Size greater than 1 means 64-bit elements, not supported.
+        if ((sz | elementSz) > 1 || sz != elementSz)
+          goto InvalidInstruction;
+
+        // Size 1 (32-bit float) requires at least 4 elements.
+        if (sz && !q)
+          goto InvalidInstruction;
+
+        // Bit flipping according to sz.
+        static const uint32_t szBits[] = { B(29), 0 };
+
+        opcode.reset(opData.opcode << 10);
+        opcode ^= szBits[sz];
+        opcode.addImm(q, 30);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingFSimdVV: {
+      const InstDB::EncodingData::FSimdVV& opData = InstDB::EncodingData::fSimdVV[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        if (!pickFpOpcode(o0.as<Vec>(), opData.scalarOp(), opData.scalarHf(), opData.vectorOp(), opData.vectorHf(), &opcode))
+          goto InvalidInstruction;
+
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingFSimdVVV: {
+      const InstDB::EncodingData::FSimdVVV& opData = InstDB::EncodingData::fSimdVVV[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        if (!pickFpOpcode(o0.as<Vec>(), opData.scalarOp(), opData.scalarHf(), opData.vectorOp(), opData.vectorHf(), &opcode))
+          goto InvalidInstruction;
+
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingFSimdVVVe: {
+      const InstDB::EncodingData::FSimdVVVe& opData = InstDB::EncodingData::fSimdVVVe[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!o2.as<Vec>().hasElementIndex()) {
+          if (!matchSignature(o0, o1, o2, instFlags))
+            goto InvalidInstruction;
+
+          if (!pickFpOpcode(o0.as<Vec>(), opData.scalarOp(), opData.scalarHf(), opData.vectorOp(), opData.vectorHf(), &opcode))
+            goto InvalidInstruction;
+
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+        else {
+          if (!matchSignature(o0, o1, instFlags))
+            goto InvalidInstruction;
+
+          uint32_t q = o1.as<Reg>().isVecQ();
+          uint32_t sz;
+
+          if (!pickFpOpcode(o0.as<Vec>(), opData.elementScalarOp(), InstDB::kHF_D, opData.elementVectorOp(), InstDB::kHF_D, &opcode, &sz))
+            goto InvalidInstruction;
+
+          if (sz == 0 && o2.as<Reg>().id() > 15)
+            goto InvalidPhysId;
+
+          uint32_t elementIndex = o2.as<Vec>().elementIndex();
+          if (elementIndex > (7u >> sz))
+            goto InvalidElementIndex;
+
+          uint32_t hlm = elementIndex << sz;
+          opcode.addImm(q, 30);
+          opcode.addImm(hlm & 3u, 20);
+          opcode.addImm(hlm >> 2, 11);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingFSimdVVVV: {
+      const InstDB::EncodingData::FSimdVVVV& opData = InstDB::EncodingData::fSimdVVVV[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        if (!matchSignature(o0, o1, o2, o3, instFlags))
+          goto InvalidInstruction;
+
+        if (!pickFpOpcode(o0.as<Vec>(), opData.scalarOp(), opData.scalarHf(), opData.vectorOp(), opData.vectorHf(), &opcode))
+          goto InvalidInstruction;
+
+        goto EmitOp_Rd0_Rn5_Rm16_Ra10;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcadd: {
+      const InstDB::EncodingData::SimdFcadd& opData = InstDB::EncodingData::simdFcadd[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        if (!checkSignature(o0, o1, o2) || o0.as<Vec>().hasElementIndex())
+          goto InvalidInstruction;
+
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+        if (q > 1)
+          goto InvalidInstruction;
+
+        uint32_t sz = o0.as<Vec>().elementType() - Vec::kElementTypeB;
+        if (sz == 0 || sz > 3)
+          goto InvalidInstruction;
+
+        // 0 <- 90deg.
+        // 1 <- 270deg.
+        uint32_t rot = 0;
+        if (o3.as<Imm>().value() == 270)
+          rot = 1;
+        else if (o3.as<Imm>().value() != 90)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(q, 30);
+        opcode.addImm(sz, 22);
+        opcode.addImm(rot, 12);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFccmpFccmpe: {
+      const InstDB::EncodingData::SimdFccmpFccmpe& opData = InstDB::EncodingData::simdFccmpFccmpe[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+        if (sz > 2)
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1) || o0.as<Vec>().hasElementType())
+          goto InvalidInstruction;
+
+        uint64_t nzcv = o2.as<Imm>().valueAs<uint64_t>();
+        uint64_t cond = o3.as<Imm>().valueAs<uint64_t>();
+
+        if ((nzcv | cond) > 0xFu)
+          goto InvalidImmediate;
+
+        uint32_t type = (sz - 1) & 0x3u;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(type, 22);
+        opcode.addImm(condCodeToOpcodeCond(uint32_t(cond)), 12);
+        opcode.addImm(nzcv, 0);
+
+        goto EmitOp_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcm: {
+      const InstDB::EncodingData::SimdFcm& opData = InstDB::EncodingData::simdFcm[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg) && opData.hasRegisterOp()) {
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        if (!pickFpOpcode(o0.as<Vec>(), opData.registerScalarOp(), opData.registerScalarHf(), opData.registerVectorOp(), opData.registerVectorHf(), &opcode))
+          goto InvalidInstruction;
+
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) && opData.hasZeroOp()) {
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (o2.as<Imm>().value() != 0 || o2.as<Imm>().predicate() != 0)
+          goto InvalidImmediate;
+
+        if (!pickFpOpcode(o0.as<Vec>(), opData.zeroScalarOp(), InstDB::kHF_B, opData.zeroVectorOp(), InstDB::kHF_B, &opcode))
+          goto InvalidInstruction;
+
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcmla: {
+      const InstDB::EncodingData::SimdFcmla& opData = InstDB::EncodingData::simdFcmla[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+        if (q > 1)
+          goto InvalidInstruction;
+
+        uint32_t sz = o0.as<Vec>().elementType() - Vec::kElementTypeB;
+        if (sz == 0 || sz > 3)
+          goto InvalidInstruction;
+
+        uint32_t rot = 0;
+        switch (o3.as<Imm>().value()) {
+          case 0  : rot = 0; break;
+          case 90 : rot = 1; break;
+          case 180: rot = 2; break;
+          case 270: rot = 3; break;
+          default:
+            goto InvalidImmediate;
+        }
+
+        if (!o2.as<Vec>().hasElementIndex()) {
+          if (!checkSignature(o1, o2))
+            goto InvalidInstruction;
+
+          opcode.reset(opData.regularOp());
+          opcode.addImm(q, 30);
+          opcode.addImm(sz, 22);
+          opcode.addImm(rot, 11);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+        else {
+          if (o0.as<Vec>().elementType() != o2.as<Vec>().elementType())
+            goto InvalidInstruction;
+
+          // Only allowed vectors are: 4H, 8H, and 4S.
+          if (!(sz == 1 || (q == 1 && sz == 2)))
+            goto InvalidInstruction;
+
+          // Element index ranges:
+          //   4H - ElementIndex[0..1] (index 2..3 is UNDEFINED).
+          //   8H - ElementIndex[0..3].
+          //   4S - ElementIndex[0..1].
+          uint32_t elementIndex = o2.as<Vec>().elementIndex();
+          uint32_t hlFieldShift = sz == 1 ? 0u : 1u;
+          uint32_t maxElementIndex = q == 1 && sz == 1 ? 3u : 1u;
+
+          if (elementIndex > maxElementIndex)
+            goto InvalidElementIndex;
+
+          uint32_t hl = elementIndex << hlFieldShift;
+
+          opcode.reset(opData.elementOp());
+          opcode.addImm(q, 30);
+          opcode.addImm(sz, 22);
+          opcode.addImm(hl & 1u, 21); // L field.
+          opcode.addImm(hl >> 1, 11); // H field.
+          opcode.addImm(rot, 13);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcmpFcmpe: {
+      const InstDB::EncodingData::SimdFcmpFcmpe& opData = InstDB::EncodingData::simdFcmpFcmpe[encodingIndex];
+
+      uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+      uint32_t type = (sz - 1) & 0x3u;
+
+      if (sz > 2)
+        goto InvalidInstruction;
+
+      if (o0.as<Vec>().hasElementType())
+        goto InvalidInstruction;
+
+      opcode.reset(opData.opcode());
+      opcode.addImm(type, 22);
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        goto EmitOp_Rn5_Rm16;
+      }
+
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        if (o1.as<Imm>().value() != 0 || o1.as<Imm>().predicate() != 0)
+          goto InvalidInstruction;
+
+        opcode |= B(3);
+        goto EmitOp_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcsel: {
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        if (!checkSignature(o0, o1, o2))
+          goto InvalidInstruction;
+
+        uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+        uint32_t type = (sz - 1) & 0x3u;
+
+        if (sz > 2 || o0.as<Vec>().hasElementType())
+          goto InvalidInstruction;
+
+        uint64_t cond = o3.as<Imm>().valueAs<uint64_t>();
+        if (cond > 0xFu)
+          goto InvalidImmediate;
+
+        opcode.reset(0b00011110001000000000110000000000);
+        opcode.addImm(type, 22);
+        opcode.addImm(condCodeToOpcodeCond(uint32_t(cond)), 12);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcvt: {
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        uint32_t dstSz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+        uint32_t srcSz = diff(o1.as<Reg>().type(), RegType::kARM_VecH);
+
+        if ((dstSz | srcSz) > 3)
+          goto InvalidInstruction;
+
+        if (o0.as<Vec>().hasElementType() || o1.as<Vec>().hasElementType())
+          goto InvalidInstruction;
+
+        // Table that provides 'type' and 'opc' according to the dst/src combination.
+        static const uint8_t table[] = {
+          0xFFu, // H <- H (Invalid).
+          0x03u, // H <- S (type=00 opc=11).
+          0x13u, // H <- D (type=01 opc=11).
+          0xFFu, // H <- Q (Invalid).
+          0x30u, // S <- H (type=11 opc=00).
+          0xFFu, // S <- S (Invalid).
+          0x10u, // S <- D (type=01 opc=00).
+          0xFFu, // S <- Q (Invalid).
+          0x31u, // D <- H (type=11 opc=01).
+          0x01u, // D <- S (type=00 opc=01).
+          0xFFu, // D <- D (Invalid).
+          0xFFu, // D <- Q (Invalid).
+          0xFFu, // Q <- H (Invalid).
+          0xFFu, // Q <- S (Invalid).
+          0xFFu, // Q <- D (Invalid).
+          0xFFu  // Q <- Q (Invalid).
+        };
+
+        uint32_t typeOpc = table[(dstSz << 2) | srcSz];
+        opcode.reset(0b0001111000100010010000 << 10);
+        opcode.addImm(typeOpc >> 4, 22);
+        opcode.addImm(typeOpc & 15, 15);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcvtLN: {
+      const InstDB::EncodingData::SimdFcvtLN& opData = InstDB::EncodingData::simdFcvtLN[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        // Scalar form - only FCVTXN.
+        if (o0.as<Vec>().isVecS() && o1.as<Vec>().isVecD()) {
+          if (!opData.hasScalar())
+            goto InvalidInstruction;
+
+          if (o0.as<Vec>().hasElementType() || o1.as<Vec>().hasElementType())
+            goto InvalidInstruction;
+
+          opcode.reset(opData.scalarOp());
+          opcode |= B(22); // sz bit must be 1, the only supported combination of FCVTXN.
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        opcode.reset(opData.vectorOp());
+
+        const Vec& rL = (instFlags & InstDB::kInstFlagLong) ? o0.as<Vec>() : o1.as<Vec>();
+        const Vec& rN = (instFlags & InstDB::kInstFlagLong) ? o1.as<Vec>() : o0.as<Vec>();
+
+        uint32_t q = diff(rN.type(), RegType::kARM_VecD);
+        if (uint32_t(opcode.hasQ()) != q)
+          goto InvalidInstruction;
+
+        if (rL.isVecS4() && rN.elementType() == Vec::kElementTypeH && !opData.isCvtxn()) {
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (rL.isVecD2() && rN.elementType() == Vec::kElementTypeS) {
+          opcode |= B(22);
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFcvtSV: {
+      const InstDB::EncodingData::SimdFcvtSV& opData = InstDB::EncodingData::simdFcvtSV[encodingIndex];
+
+      // So we can support both IntToFloat and FloatToInt conversions.
+      const Operand_& oGp = opData.isFloatToInt() ? o0 : o1;
+      const Operand_& oVec = opData.isFloatToInt() ? o1 : o0;
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (oGp.as<Reg>().isGp() && oVec.as<Reg>().isVec()) {
+          uint32_t x = oGp.as<Reg>().isGpX();
+          uint32_t type = diff(oVec.as<Reg>().type(), RegType::kARM_VecH);
+
+          if (type > 2u)
+            goto InvalidInstruction;
+
+          type = (type - 1u) & 0x3;
+          opcode.reset(opData.generalOp());
+          opcode.addImm(type, 22);
+          opcode.addImm(x, 31);
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (o0.as<Reg>().isVec() && o1.as<Reg>().isVec()) {
+          if (!checkSignature(o0, o1))
+            goto InvalidInstruction;
+
+          if (!pickFpOpcode(o0.as<Vec>(), opData.scalarIntOp(), InstDB::kHF_B, opData.vectorIntOp(), InstDB::kHF_B, &opcode))
+            goto InvalidInstruction;
+
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) && opData.isFixedPoint()) {
+        if (o2.as<Imm>().valueAs<uint64_t>() >= 64)
+          goto InvalidInstruction;
+
+        uint32_t scale = o2.as<Imm>().valueAs<uint32_t>();
+        if (scale == 0)
+          goto InvalidInstruction;
+
+        if (oGp.as<Reg>().isGp() && oVec.as<Reg>().isVec()) {
+          uint32_t x = oGp.as<Reg>().isGpX();
+          uint32_t type = diff(oVec.as<Reg>().type(), RegType::kARM_VecH);
+
+          uint32_t scaleLimit = 32u << x;
+          if (scale > scaleLimit)
+            goto InvalidInstruction;
+
+          type = (type - 1u) & 0x3;
+          opcode.reset(opData.generalOp() ^ B(21));
+          opcode.addImm(type, 22);
+          opcode.addImm(x, 31);
+          opcode.addImm(64u - scale, 10);
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (o0.as<Reg>().isVec() && o1.as<Reg>().isVec()) {
+          if (!checkSignature(o0, o1))
+            goto InvalidInstruction;
+
+          uint32_t sz;
+          if (!pickFpOpcode(o0.as<Vec>(), opData.scalarFpOp(), InstDB::kHF_0, opData.vectorFpOp(), InstDB::kHF_0, &opcode, &sz))
+            goto InvalidInstruction;
+
+          uint32_t scaleLimit = 16u << sz;
+          if (scale > scaleLimit)
+            goto InvalidInstruction;
+
+          uint32_t imm = Support::neg(scale) & Support::lsbMask<uint32_t>(sz + 4 + 1);
+          opcode.addImm(imm, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFmlal: {
+      const InstDB::EncodingData::SimdFmlal& opData = InstDB::EncodingData::simdFmlal[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+        uint32_t qIsOptional = opData.optionalQ();
+
+        if (qIsOptional) {
+          // This instruction works with either 64-bit or 128-bit registers,
+          // encoded by Q bit.
+          if (q > 1)
+            goto InvalidInstruction;
+        }
+        else {
+          // This instruction requires 128-bit vector registers.
+          if (q != 1)
+            goto InvalidInstruction;
+
+          // The instruction is ehtier B (bottom) or T (top), which is part of
+          // the opcode, which uses Q bit, so we have to clear it explicitly.
+          q = 0;
+        }
+
+        if (uint32_t(o0.as<Reg>().type()) != uint32_t(o1.as<Reg>().type()) + qIsOptional ||
+            o0.as<Vec>().elementType() != opData.tA ||
+            o1.as<Vec>().elementType() != opData.tB)
+          goto InvalidInstruction;
+
+        if (!o2.as<Vec>().hasElementIndex()) {
+          if (!checkSignature(o1, o2))
+            goto InvalidInstruction;
+
+          opcode.reset(opData.vectorOp());
+          opcode.addImm(q, 30);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+        else {
+          if (o2.as<Vec>().elementType() != opData.tElement)
+            goto InvalidInstruction;
+
+          if (o2.as<Reg>().id() > 15)
+            goto InvalidPhysId;
+
+          uint32_t elementIndex = o2.as<Vec>().elementIndex();
+          if (elementIndex > 7u)
+            goto InvalidElementIndex;
+
+          opcode.reset(opData.elementOp());
+          opcode.addImm(q, 30);
+          opcode.addImm(elementIndex & 3u, 20);
+          opcode.addImm(elementIndex >> 2, 11);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdFmov: {
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        // FMOV Gp <-> Vec opcode:
+        opcode.reset(0b00011110001001100000000000000000);
+
+        if (o0.as<Reg>().isGp() && o1.as<Reg>().isVec()) {
+          // FMOV Wd, Hn      (sf=0 type=11 rmode=00 op=110)
+          // FMOV Xd, Hn      (sf=1 type=11 rmode=00 op=110)
+          // FMOV Wd, Sn      (sf=0 type=00 rmode=00 op=110)
+          // FMOV Xd, Dn      (sf=1 type=11 rmode=00 op=110)
+          // FMOV Xd, Vn.d[1] (sf=1 type=10 rmode=01 op=110)
+          uint32_t x = o0.as<Reg>().isGpX();
+          uint32_t sz = diff(o1.as<Reg>().type(), RegType::kARM_VecH);
+
+          uint32_t type = (sz - 1) & 0x3u;
+          uint32_t rModeOp = 0b00110;
+
+          if (o1.as<Vec>().hasElementIndex()) {
+            // Special case.
+            if (!x || !o1.as<Vec>().isVecD2() || o1.as<Vec>().elementIndex() != 1)
+              goto InvalidInstruction;
+            type = 0b10;
+            rModeOp = 0b01110;
+          }
+          else {
+            // Must be scalar.
+            if (sz > 2)
+              goto InvalidInstruction;
+
+            if (o1.as<Vec>().hasElementType())
+              goto InvalidInstruction;
+
+            if (o1.as<Vec>().isVecS() && x)
+              goto InvalidInstruction;
+
+            if (o1.as<Vec>().isVecD() && !x)
+              goto InvalidInstruction;
+          }
+
+          opcode.addImm(x, 31);
+          opcode.addImm(type, 22);
+          opcode.addImm(rModeOp, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (o0.as<Reg>().isVec() && o1.as<Reg>().isGp()) {
+          // FMOV Hd, Wn      (sf=0 type=11 rmode=00 op=111)
+          // FMOV Hd, Xn      (sf=1 type=11 rmode=00 op=111)
+          // FMOV Sd, Wn      (sf=0 type=00 rmode=00 op=111)
+          // FMOV Dd, Xn      (sf=1 type=11 rmode=00 op=111)
+          // FMOV Vd.d[1], Xn (sf=1 type=10 rmode=01 op=111)
+          uint32_t x = o1.as<Reg>().isGpX();
+          uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+
+          uint32_t type = (sz - 1) & 0x3u;
+          uint32_t rModeOp = 0b00111;
+
+          if (o0.as<Vec>().hasElementIndex()) {
+            // Special case.
+            if (!x || !o0.as<Vec>().isVecD2() || o0.as<Vec>().elementIndex() != 1)
+              goto InvalidInstruction;
+            type = 0b10;
+            rModeOp = 0b01111;
+          }
+          else {
+            // Must be scalar.
+            if (sz > 2)
+              goto InvalidInstruction;
+
+            if (o0.as<Vec>().hasElementType())
+              goto InvalidInstruction;
+
+            if (o0.as<Vec>().isVecS() && x)
+              goto InvalidInstruction;
+
+            if (o0.as<Vec>().isVecD() && !x)
+              goto InvalidInstruction;
+          }
+
+          opcode.addImm(x, 31);
+          opcode.addImm(type, 22);
+          opcode.addImm(rModeOp, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (checkSignature(o0, o1)) {
+          uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+          if (sz > 2)
+            goto InvalidInstruction;
+
+          if (o0.as<Vec>().hasElementType())
+            goto InvalidInstruction;
+
+          uint32_t type = (sz - 1) & 0x3;
+          opcode.reset(0b00011110001000000100000000000000);
+          opcode.addImm(type, 22);
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      if (isign4 == ENC_OPS2(Reg, Imm)) {
+        if (o0.as<Reg>().isVec()) {
+          double fpValue;
+          if (o1.as<Imm>().isDouble())
+            fpValue = o1.as<Imm>().valueAs<double>();
+          else if (o1.as<Imm>().isInt32())
+            fpValue = o1.as<Imm>().valueAs<int32_t>();
+          else
+            goto InvalidImmediate;
+
+          if (!Utils::isFP64Imm8(fpValue))
+            goto InvalidImmediate;
+
+          uint32_t imm8 = Utils::encodeFP64ToImm8(fpValue);
+          if (!o0.as<Vec>().hasElementType()) {
+            // FMOV (scalar, immediate).
+            uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+            uint32_t type = (sz - 1u) & 0x3u;
+
+            if (sz > 2)
+              goto InvalidInstruction;
+
+            opcode.reset(0b00011110001000000001000000000000);
+            opcode.addImm(type, 22);
+            opcode.addImm(imm8, 13);
+            goto EmitOp_Rd0;
+          }
+          else {
+            uint32_t q = diff(o0.as<Vec>().type(), RegType::kARM_VecD);
+            uint32_t sz = o0.as<Vec>().elementType() - Vec::kElementTypeH;
+
+            if (q > 1 || sz > 2)
+              goto InvalidInstruction;
+
+            static const uint32_t szBits[3] = { B(11), B(0), B(29) };
+            opcode.reset(0b00001111000000001111010000000000);
+            opcode ^= szBits[sz];
+            opcode.addImm(q, 30);
+            opcode.addImm(imm8 >> 5, 16);
+            opcode.addImm(imm8 & 31, 5);
+            goto EmitOp_Rd0;
+          }
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingFSimdPair: {
+      const InstDB::EncodingData::FSimdPair& opData = InstDB::EncodingData::fSimdPair[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        // This operation is only defined for:
+        //   hD, vS.2h (16-bit)
+        //   sD, vS.2s (32-bit)
+        //   dD, vS.2d (64-bit)
+        uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecH);
+        if (sz > 2)
+          goto InvalidInstruction;
+
+        static const uint32_t szSignatures[3] = {
+          VecS::kSignature | (Vec::kSignatureElementH),
+          VecD::kSignature | (Vec::kSignatureElementS),
+          VecV::kSignature | (Vec::kSignatureElementD)
+        };
+
+        if (o1.signature() != szSignatures[sz])
+          goto InvalidInstruction;
+
+        static const uint32_t szBits[] = { B(29), 0, B(22) };
+        opcode.reset(opData.scalarOp());
+        opcode ^= szBits[sz];
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!checkSignature(o0, o1, o2))
+          goto InvalidInstruction;
+
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+        if (q > 1)
+          goto InvalidInstruction;
+
+        uint32_t sz = o0.as<Vec>().elementType() - Vec::kElementTypeH;
+        if (sz > 2)
+          goto InvalidInstruction;
+
+        static const uint32_t szBits[3] = { B(22) | B(21) | B(15) | B(14), 0, B(22) };
+        opcode.reset(opData.vectorOp());
+        opcode ^= szBits[sz];
+        opcode.addImm(q, 30);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [ISimd - Instructions]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingISimdSV: {
+      const InstDB::EncodingData::ISimdSV& opData = InstDB::EncodingData::iSimdSV[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        // The first destination operand is scalar, which matches element-type of source vectors.
+        uint32_t L = (instFlags & InstDB::kInstFlagLong) != 0;
+        if (diff(o0.as<Vec>().type(), RegType::kARM_VecB) != o1.as<Vec>().elementType() - Vec::kElementTypeB + L)
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o1.as<Reg>().type(), o1.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(sizeOp.q(), 30);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVV: {
+      const InstDB::EncodingData::ISimdVV& opData = InstDB::EncodingData::iSimdVV[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        const Operand_& sop = significantSimdOp(o0, o1, instFlags);
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVx: {
+      const InstDB::EncodingData::ISimdVVx& opData = InstDB::EncodingData::iSimdVVx[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (o0.signature() != opData.op0Signature ||
+            o1.signature() != opData.op1Signature)
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVV: {
+      const InstDB::EncodingData::ISimdVVV& opData = InstDB::EncodingData::iSimdVVV[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        const Operand_& sop = significantSimdOp(o0, o1, instFlags);
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVVx: {
+      const InstDB::EncodingData::ISimdVVVx& opData = InstDB::EncodingData::iSimdVVVx[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (o0.signature() != opData.op0Signature ||
+            o1.signature() != opData.op1Signature ||
+            o2.signature() != opData.op2Signature)
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdWWV: {
+      // Special case for wide add/sub [s|b][add|sub][w]{2}.
+      const InstDB::EncodingData::ISimdWWV& opData = InstDB::EncodingData::iSimdWWV[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o2.as<Reg>().type(), o2.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1) || !o0.as<Reg>().isVecV() || o0.as<Vec>().elementType() != o2.as<Vec>().elementType() + 1)
+          goto InvalidInstruction;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVVe: {
+      const InstDB::EncodingData::ISimdVVVe& opData = InstDB::EncodingData::iSimdVVVe[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        const Operand_& sop = significantSimdOp(o0, o1, instFlags);
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        if (!o2.as<Vec>().hasElementIndex()) {
+          SizeOp sizeOp = armElementTypeToSizeOp(opData.regularVecType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+          if (!sizeOp.isValid())
+            goto InvalidInstruction;
+
+          if (!checkSignature(o1, o2))
+            goto InvalidInstruction;
+
+          opcode.reset(uint32_t(opData.regularOp) << 10);
+          opcode.addImm(sizeOp.qs(), 30);
+          opcode.addImm(sizeOp.scalar(), 28);
+          opcode.addImm(sizeOp.size(), 22);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+        else {
+          SizeOp sizeOp = armElementTypeToSizeOp(opData.elementVecType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+          if (!sizeOp.isValid())
+            goto InvalidInstruction;
+
+          uint32_t elementIndex = o2.as<Vec>().elementIndex();
+          LMHImm lmh;
+
+          if (!encodeLMH(sizeOp.size(), elementIndex, &lmh))
+            goto InvalidElementIndex;
+
+          if (o2.as<Reg>().id() > lmh.maxRmId)
+            goto InvalidPhysId;
+
+          opcode.reset(uint32_t(opData.elementOp) << 10);
+          opcode.addImm(sizeOp.q(), 30);
+          opcode.addImm(sizeOp.size(), 22);
+          opcode.addImm(lmh.lm, 20);
+          opcode.addImm(lmh.h, 11);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVVI: {
+      const InstDB::EncodingData::ISimdVVVI& opData = InstDB::EncodingData::iSimdVVVI[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        const Operand_& sop = significantSimdOp(o0, o1, instFlags);
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        uint64_t immValue = o3.as<Imm>().valueAs<uint64_t>();
+        uint32_t immSize = opData.immSize;
+
+        if (opData.imm64HasOneBitLess && !sizeOp.q())
+          immSize--;
+
+        uint32_t immMax = 1u << immSize;
+        if (immValue >= immMax)
+          goto InvalidImmediate;
+
+        opcode.reset(opData.opcode());
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        opcode.addImm(immValue, opData.immShift);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVVV: {
+      const InstDB::EncodingData::ISimdVVVV& opData = InstDB::EncodingData::iSimdVVVV[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        const Operand_& sop = significantSimdOp(o0, o1, instFlags);
+        if (!matchSignature(o0, o1, o2, o3, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.opcode) << 10);
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5_Rm16_Ra10;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingISimdVVVVx: {
+      const InstDB::EncodingData::ISimdVVVVx& opData = InstDB::EncodingData::iSimdVVVVx[encodingIndex];
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        if (o0.signature() != opData.op0Signature ||
+            o1.signature() != opData.op1Signature ||
+            o2.signature() != opData.op2Signature ||
+            o3.signature() != opData.op3Signature)
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.opcode) << 10);
+        goto EmitOp_Rd0_Rn5_Rm16_Ra10;
+      }
+
+      break;
+    }
+
+
+    case InstDB::kEncodingISimdPair: {
+      const InstDB::EncodingData::ISimdPair& opData = InstDB::EncodingData::iSimdPair[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg) && opData.opcode2) {
+        if (o0.as<Vec>().isVecD1() && o1.as<Vec>().isVecD2()) {
+          opcode.reset(uint32_t(opData.opcode2) << 10);
+          opcode.addImm(0x3, 22); // size.
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.opType3, o0.as<Reg>().type(), o0.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.opcode3) << 10);
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdBicOrr: {
+      const InstDB::EncodingData::SimdBicOrr& opData = InstDB::EncodingData::simdBicOrr[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(InstDB::kVO_V_B, o0.as<Reg>().type(), o0.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.registerOp) << 10);
+        opcode.addImm(sizeOp.q(), 30);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      if (isign4 == ENC_OPS2(Reg, Imm) || isign4 == ENC_OPS3(Reg, Imm, Imm)) {
+        SizeOp sizeOp = armElementTypeToSizeOp(InstDB::kVO_V_HS, o0.as<Reg>().type(), o0.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        if (o1.as<Imm>().valueAs<uint64_t>() > 0xFFFFFFFFu)
+          goto InvalidImmediate;
+
+        uint32_t imm = o1.as<Imm>().valueAs<uint32_t>();
+        uint32_t shift = 0;
+        uint32_t maxShift = (8u << sizeOp.size()) - 8u;
+
+        if (o2.isImm()) {
+          if (o2.as<Imm>().predicate() != uint32_t(ShiftOp::kLSL))
+            goto InvalidImmediate;
+
+          if (imm > 0xFFu || o2.as<Imm>().valueAs<uint64_t>() > maxShift)
+            goto InvalidImmediate;
+
+          shift = o2.as<Imm>().valueAs<uint32_t>();
+          if ((shift & 0x7u) != 0u)
+            goto InvalidImmediate;
+        }
+        else if (imm) {
+          shift = Support::ctz(imm) & 0x7u;
+          imm >>= shift;
+
+          if (imm > 0xFFu || shift > maxShift)
+            goto InvalidImmediate;
+        }
+
+        uint32_t cmode = 0x1u | ((shift / 8u) << 1);
+        if (sizeOp.size() == 1)
+          cmode |= B(3);
+
+        // The immediate value is split into ABC and DEFGH parts.
+        uint32_t abc = (imm >> 5) & 0x7u;
+        uint32_t defgh = imm & 0x1Fu;
+
+        opcode.reset(uint32_t(opData.immediateOp) << 10);
+        opcode.addImm(sizeOp.q(), 30);
+        opcode.addImm(abc, 16);
+        opcode.addImm(cmode, 12);
+        opcode.addImm(defgh, 5);
+        goto EmitOp_Rd0;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdCmp: {
+      const InstDB::EncodingData::SimdCmp& opData = InstDB::EncodingData::simdCmp[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg) && opData.regOp) {
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o0.as<Reg>().type(), o0.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.regOp) << 10);
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) && opData.zeroOp) {
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        if (o2.as<Imm>().value() != 0)
+          goto InvalidImmediate;
+
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o0.as<Reg>().type(), o0.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.zeroOp) << 10);
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdDot: {
+      const InstDB::EncodingData::SimdDot& opData = InstDB::EncodingData::simdDot[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+        uint32_t size = 2;
+
+        if (q > 1u)
+          goto InvalidInstruction;
+
+        if (!o2.as<Vec>().hasElementIndex()) {
+          if (!opData.vectorOp)
+            goto InvalidInstruction;
+
+          if (o0.as<Reg>().type() != o1.as<Reg>().type() || o1.as<Reg>().type() != o2.as<Reg>().type())
+            goto InvalidInstruction;
+
+          if (o0.as<Vec>().elementType() != opData.tA ||
+              o1.as<Vec>().elementType() != opData.tB ||
+              o2.as<Vec>().elementType() != opData.tB)
+            goto InvalidInstruction;
+
+          opcode.reset(uint32_t(opData.vectorOp) << 10);
+          opcode.addImm(q, 30);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+        else {
+          if (!opData.elementOp)
+            goto InvalidInstruction;
+
+          if (o0.as<Reg>().type() != o1.as<Reg>().type() || !o2.as<Reg>().isVecV())
+            goto InvalidInstruction;
+
+          if (o0.as<Vec>().elementType() != opData.tA ||
+              o1.as<Vec>().elementType() != opData.tB ||
+              o2.as<Vec>().elementType() != opData.tElement)
+            goto InvalidInstruction;
+
+          uint32_t elementIndex = o2.as<Vec>().elementIndex();
+          LMHImm lmh;
+
+          if (!encodeLMH(size, elementIndex, &lmh))
+            goto InvalidElementIndex;
+
+          if (o2.as<Reg>().id() > lmh.maxRmId)
+            goto InvalidPhysId;
+
+          opcode.reset(uint32_t(opData.elementOp) << 10);
+          opcode.addImm(q, 30);
+          opcode.addImm(lmh.lm, 20);
+          opcode.addImm(lmh.h, 11);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdDup: SimdDup: {
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        // Truth table of valid encodings of `Q:1|ElementType:3`
+        uint32_t kValidEncodings = B(Vec::kElementTypeB + 0) |
+                                   B(Vec::kElementTypeH + 0) |
+                                   B(Vec::kElementTypeS + 0) |
+                                   B(Vec::kElementTypeB + 8) |
+                                   B(Vec::kElementTypeH + 8) |
+                                   B(Vec::kElementTypeS + 8) |
+                                   B(Vec::kElementTypeD + 8) ;
+
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+
+        if (o1.as<Reg>().isGp()) {
+          // DUP - Vec (scalar|vector) <- GP register.
+          //
+          // NOTE: This is only scalar for `dup d, x` case, otherwise the value
+          // would be duplicated across all vector elements (1, 2, 4, 8, or 16).
+          uint32_t elementType = o0.as<Vec>().elementType();
+          if (q > 1 || !Support::bitTest(kValidEncodings, (q << 3) | elementType))
+            goto InvalidInstruction;
+
+          uint32_t lsbIndex = elementType - 1u;
+          uint32_t imm5 = 1u << lsbIndex;
+
+          opcode.reset(0b0000111000000000000011 << 10);
+          opcode.addImm(q, 30);
+          opcode.addImm(imm5, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (!o1.as<Reg>().isVec() || !o1.as<Vec>().hasElementIndex())
+          goto InvalidInstruction;
+
+        uint32_t dstIndex = o1.as<Vec>().elementIndex();
+        if (!o0.as<Vec>().hasElementType()) {
+          // DUP - Vec (scalar) <- Vec[N].
+          uint32_t lsbIndex = diff(o0.as<Reg>().type(), RegType::kARM_VecB);
+
+          if (lsbIndex != o1.as<Vec>().elementType() - Vec::kElementTypeB || lsbIndex > 3)
+            goto InvalidInstruction;
+
+          uint32_t imm5 = ((dstIndex << 1) | 1u) << lsbIndex;
+          if (imm5 > 31)
+            goto InvalidElementIndex;
+
+          opcode.reset(0b0101111000000000000001 << 10);
+          opcode.addImm(imm5, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+        else {
+          // DUP - Vec (all) <- Vec[N].
+          uint32_t elementType = o0.as<Vec>().elementType();
+          if (q > 1 || !Support::bitTest(kValidEncodings, (q << 3) | elementType))
+            goto InvalidInstruction;
+
+          uint32_t lsbIndex = elementType - 1u;
+          uint32_t imm5 = ((dstIndex << 1) | 1u) << lsbIndex;
+
+          if (imm5 > 31)
+            goto InvalidElementIndex;
+
+          opcode.reset(0b0000111000000000000001 << 10);
+          opcode.addImm(q, 30);
+          opcode.addImm(imm5, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdIns: SimdIns: {
+      if (isign4 == ENC_OPS2(Reg, Reg) && o0.as<Reg>().isVecV()) {
+        if (!o0.as<Vec>().hasElementIndex())
+          goto InvalidInstruction;
+
+        uint32_t elementType = o0.as<Vec>().elementType();
+        uint32_t dstIndex = o0.as<Vec>().elementIndex();
+        uint32_t lsbIndex = elementType - 1u;
+
+        uint32_t imm5 = ((dstIndex << 1) | 1u) << lsbIndex;
+        if (imm5 > 31)
+          goto InvalidElementIndex;
+
+        if (o1.as<Reg>().isGp()) {
+          // INS - Vec[N] <- GP register.
+          opcode.reset(0b0100111000000000000111 << 10);
+          opcode.addImm(imm5, 16);
+          goto EmitOp_Rd0_Rn5;
+        }
+        else if (o1.as<Reg>().isVecV() && o1.as<Vec>().hasElementIndex()) {
+          // INS - Vec[N] <- Vec[M].
+          if (o0.as<Vec>().elementType() != o1.as<Vec>().elementType())
+            goto InvalidInstruction;
+
+          uint32_t srcIndex = o1.as<Vec>().elementIndex();
+          if (o0.as<Reg>().type() != o1.as<Reg>().type())
+            goto InvalidInstruction;
+
+          uint32_t imm4 = srcIndex << lsbIndex;
+          if (imm4 > 15)
+            goto InvalidElementIndex;
+
+          opcode.reset(0b0110111000000000000001 << 10);
+          opcode.addImm(imm5, 16);
+          opcode.addImm(imm4, 11);
+          goto EmitOp_Rd0_Rn5;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdMov: {
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        if (o0.as<Reg>().isVec() && o1.as<Reg>().isVec()) {
+          // INS v.x[index], v.x[index].
+          if (o0.as<Vec>().hasElementIndex() && o1.as<Vec>().hasElementIndex())
+            goto SimdIns;
+
+          // DUP {b|h|s|d}, v.{b|h|s|d}[index].
+          if (o1.as<Vec>().hasElementIndex())
+            goto SimdDup;
+
+          if (!checkSignature(o0, o1))
+            goto InvalidInstruction;
+
+          // ORR Vd, Vn, Vm
+          uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+          if (q > 1)
+            goto InvalidInstruction;
+
+          opcode.reset(0b0000111010100000000111 << 10);
+          opcode.addImm(q, 30);
+          opcode.addReg(o1, 16); // Vn == Vm.
+          goto EmitOp_Rd0_Rn5;
+        }
+
+        if (o0.as<Reg>().isVec() && o1.as<Reg>().isGp()) {
+          // INS v.x[index], Rn.
+          if (o0.as<Vec>().hasElementIndex())
+            goto SimdIns;
+
+          goto InvalidInstruction;
+        }
+
+        if (o0.as<Reg>().isGp() && o1.as<Reg>().isVec()) {
+          // UMOV Rd, V.{s|d}[index].
+          encodingIndex = 1;
+          goto SimdUmov;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdMoviMvni: {
+      const InstDB::EncodingData::SimdMoviMvni& opData = InstDB::EncodingData::simdMoviMvni[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Imm) || isign4 == ENC_OPS3(Reg, Imm, Imm)) {
+        SizeOp sizeOp = armElementTypeToSizeOp(InstDB::kVO_V_Any, o0.as<Reg>().type(), o0.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        uint64_t imm64 = o1.as<Imm>().valueAs<uint64_t>();
+        uint32_t imm8 = 0;
+        uint32_t cmode = 0;
+        uint32_t inverted = opData.inverted;
+        uint32_t op = 0;
+        uint32_t shift = 0;
+        uint32_t shiftOp = uint32_t(ShiftOp::kLSL);
+
+        if (sizeOp.size() == 3u) {
+          // The second immediate should not be present, however, we accept
+          // an immediate value of zero as some user code may still pass it.
+          if (o2.isImm() && o0.as<Imm>().value() != 0)
+            goto InvalidImmediate;
+
+          if (Utils::isByteMaskImm8(imm64)) {
+            imm8 = encodeImm64ByteMaskToImm8(imm64);
+          }
+          else {
+            // Change from D to S and from 64-bit imm to 32-bit imm if this
+            // is not a byte-mask pattern.
+            if ((imm64 >> 32) == (imm64 & 0xFFFFFFFFu)) {
+              imm64 &= 0xFFFFFFFFu;
+              sizeOp.decrementSize();
+            }
+            else {
+              goto InvalidImmediate;
+            }
+          }
+        }
+
+        if (sizeOp.size() < 3u) {
+          if (imm64 > 0xFFFFFFFFu)
+            goto InvalidImmediate;
+          imm8 = uint32_t(imm64);
+
+          if (sizeOp.size() == 2) {
+            if ((imm8 >> 16) == (imm8 & 0xFFFFu)) {
+              imm8 >>= 16;
+              sizeOp.decrementSize();
+            }
+          }
+
+          if (sizeOp.size() == 1) {
+            if (imm8 > 0xFFFFu)
+              goto InvalidImmediate;
+
+            if ((imm8 >> 8) == (imm8 & 0xFFu)) {
+              imm8 >>= 8;
+              sizeOp.decrementSize();
+            }
+          }
+
+          uint32_t maxShift = (8u << sizeOp.size()) - 8u;
+          if (o2.isImm()) {
+            if (imm8 > 0xFFu || o2.as<Imm>().valueAs<uint64_t>() > maxShift)
+              goto InvalidImmediate;
+
+            shift = o2.as<Imm>().valueAs<uint32_t>();
+            shiftOp = o2.as<Imm>().predicate();
+          }
+          else if (imm8) {
+            shift = Support::ctz(imm8) & ~0x7u;
+            imm8 >>= shift;
+
+            if (imm8 > 0xFFu || shift > maxShift)
+              goto InvalidImmediate;
+          }
+
+          if ((shift & 0x7u) != 0u)
+            goto InvalidImmediate;
+        }
+
+        shift /= 8u;
+
+        switch (sizeOp.size()) {
+          case 0:
+            if (shiftOp != uint32_t(ShiftOp::kLSL))
+              goto InvalidImmediate;
+
+            if (inverted) {
+              imm8 = ~imm8 & 0xFFu;
+              inverted = 0;
+            }
+
+            cmode = B(3) | B(2) | B(1);
+            break;
+
+          case 1:
+            if (shiftOp != uint32_t(ShiftOp::kLSL))
+              goto InvalidImmediate;
+
+            cmode = B(3) | (shift << 1);
+            op = inverted;
+            break;
+
+          case 2:
+            if (shiftOp == uint32_t(ShiftOp::kLSL)) {
+              cmode = shift << 1;
+            }
+            else if (shiftOp == uint32_t(ShiftOp::kMSL)) {
+              if (shift == 0 || shift > 2)
+                goto InvalidImmediate;
+              cmode = B(3) | B(2) | (shift - 1u);
+            }
+            else {
+              goto InvalidImmediate;
+            }
+
+            op = inverted;
+            break;
+
+          case 3:
+            if (inverted) {
+              imm8 = ~imm8 & 0xFFu;
+              inverted = 0;
+            }
+
+            op = 1;
+            cmode = B(3) | B(2) | B(1);
+            break;
+        }
+
+        // The immediate value is split into ABC and DEFGH parts.
+        uint32_t abc = (imm8 >> 5) & 0x7u;
+        uint32_t defgh = imm8 & 0x1Fu;
+
+        opcode.reset(uint32_t(opData.opcode) << 10);
+        opcode.addImm(sizeOp.q(), 30);
+        opcode.addImm(op, 29);
+        opcode.addImm(abc, 16);
+        opcode.addImm(cmode, 12);
+        opcode.addImm(defgh, 5);
+        goto EmitOp_Rd0;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdShift: {
+      const InstDB::EncodingData::SimdShift& opData = InstDB::EncodingData::simdShift[encodingIndex];
+
+      const Operand_& sop = significantSimdOp(o0, o1, instFlags);
+      SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, sop.as<Reg>().type(), sop.as<Vec>().elementType());
+
+      if (!sizeOp.isValid())
+        goto InvalidInstruction;
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm) && opData.immediateOp) {
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        if (o2.as<Imm>().valueAs<uint64_t>() > 63)
+          goto InvalidImmediate;
+
+        uint32_t lsbShift = sizeOp.size() + 3u;
+        uint32_t lsbMask = (1u << lsbShift) - 1u;
+        uint32_t imm = o2.as<Imm>().valueAs<uint32_t>();
+
+        // Some instructions use IMM and some X - IMM, so negate if required.
+        if (opData.invertedImm) {
+          if (imm == 0 || imm > (1u << lsbShift))
+            goto InvalidImmediate;
+          imm = Support::neg(imm) & lsbMask;
+        }
+
+        if (imm > lsbMask)
+          goto InvalidImmediate;
+        imm |= (1u << lsbShift);
+
+        opcode.reset(uint32_t(opData.immediateOp) << 10);
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(imm, 16);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg) && opData.registerOp) {
+        if (!matchSignature(o0, o1, o2, instFlags))
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.registerOp) << 10);
+        opcode.addImm(sizeOp.qs(), 30);
+        opcode.addImm(sizeOp.scalar(), 28);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5_Rm16;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdShiftES: {
+      const InstDB::EncodingData::SimdShiftES& opData = InstDB::EncodingData::simdShiftES[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Imm)) {
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o1.as<Reg>().type(), o1.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        // The immediate value must match the element size.
+        uint64_t shift = o2.as<Imm>().valueAs<uint64_t>();
+        uint32_t shiftOp = o2.as<Imm>().predicate();
+
+        if (shift != (8u << sizeOp.size()) || shiftOp != uint32_t(ShiftOp::kLSL))
+          goto InvalidImmediate;
+
+        opcode.reset(uint32_t(opData.opcode) << 10);
+        opcode.addImm(sizeOp.q(), 30);
+        opcode.addImm(sizeOp.size(), 22);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdSm3tt: {
+      const InstDB::EncodingData::SimdSm3tt& opData = InstDB::EncodingData::simdSm3tt[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (o0.as<Vec>().isVecS4() && o1.as<Vec>().isVecS4() && o2.as<Vec>().isVecS4() && o2.as<Vec>().hasElementIndex()) {
+          uint32_t imm2 = o2.as<Vec>().elementIndex();
+          if (imm2 > 3)
+            goto InvalidElementIndex;
+
+          opcode.reset(uint32_t(opData.opcode) << 10);
+          opcode.addImm(imm2, 12);
+          goto EmitOp_Rd0_Rn5_Rm16;
+        }
+      }
+
+      break;
+    }
+
+
+    case InstDB::kEncodingSimdSmovUmov: SimdUmov: {
+      const InstDB::EncodingData::SimdSmovUmov& opData = InstDB::EncodingData::simdSmovUmov[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg) && o0.as<Reg>().isGp() && o1.as<Reg>().isVec()) {
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o1.as<Reg>().type(), o1.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        if (!o1.as<Vec>().hasElementIndex())
+          goto InvalidInstruction;
+
+        uint32_t x = o0.as<Gp>().isGpX();
+        uint32_t gpMustBeX = uint32_t(sizeOp.size() >= 3u - opData.isSigned);
+
+        if (opData.isSigned) {
+          if (gpMustBeX && !x)
+            goto InvalidInstruction;
+        }
+        else {
+          if (x != gpMustBeX)
+            goto InvalidInstruction;
+        }
+
+        uint32_t elementIndex = o1.as<Vec>().elementIndex();
+        uint32_t maxElementIndex = 15u >> sizeOp.size();
+
+        if (elementIndex > maxElementIndex)
+          goto InvalidElementIndex;
+
+        uint32_t imm5 = (1u | (elementIndex << 1)) << sizeOp.size();
+
+        opcode.reset(uint32_t(opData.opcode) << 10);
+        opcode.addImm(x, 30);
+        opcode.addImm(imm5, 16);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdSxtlUxtl: {
+      const InstDB::EncodingData::SimdSxtlUxtl& opData = InstDB::EncodingData::simdSxtlUxtl[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Reg)) {
+        SizeOp sizeOp = armElementTypeToSizeOp(opData.vecOpType, o1.as<Reg>().type(), o1.as<Vec>().elementType());
+        if (!sizeOp.isValid())
+          goto InvalidInstruction;
+
+        if (!matchSignature(o0, o1, instFlags))
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.opcode) << 10);
+        opcode.addImm(sizeOp.q(), 30);
+        opcode.addImm(1u, sizeOp.size() + 19);
+        goto EmitOp_Rd0_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdTblTbx: {
+      const InstDB::EncodingData::SimdTblTbx& opData = InstDB::EncodingData::simdTblTbx[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Reg) || isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        // TBL/TBX <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta>
+        // TBL/TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta>
+        // TBL/TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta>
+        // TBL/TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta>
+        opcode.reset(uint32_t(opData.opcode) << 10);
+
+        const Operand_& o4 = opExt[EmitterUtils::kOp4];
+        const Operand_& o5 = opExt[EmitterUtils::kOp5];
+
+        uint32_t q = diff(o0.as<Reg>().type(), RegType::kARM_VecD);
+        if (q > 1 || o0.as<Vec>().hasElementIndex())
+          goto InvalidInstruction;
+
+        if (!o1.as<Vec>().isVecB16() || o1.as<Vec>().hasElementIndex())
+          goto InvalidInstruction;
+
+        uint32_t len = uint32_t(!o3.isNone()) + uint32_t(!o4.isNone()) + uint32_t(!o5.isNone());
+        opcode.addImm(q, 30);
+        opcode.addImm(len, 13);
+
+        switch (len) {
+          case 0:
+            if (!checkSignature(o0, o2))
+              goto InvalidInstruction;
+
+            if (o2.id() > 31)
+              goto InvalidPhysId;
+
+            opcode.addReg(o2, 16);
+            goto EmitOp_Rd0_Rn5;
+
+          case 1:
+            if (!checkSignature(o0, o3))
+              goto InvalidInstruction;
+
+            if (o3.id() > 31)
+              goto InvalidPhysId;
+
+            opcode.addReg(o3, 16);
+            goto EmitOp_Rd0_Rn5;
+
+          case 2:
+            if (!checkSignature(o0, o4))
+              goto InvalidInstruction;
+
+            if (o4.id() > 31)
+              goto InvalidPhysId;
+
+            opcode.addReg(o4, 16);
+            goto EmitOp_Rd0_Rn5;
+
+          case 3:
+            if (!checkSignature(o0, o5))
+              goto InvalidInstruction;
+
+            if (o5.id() > 31)
+              goto InvalidPhysId;
+
+            opcode.addReg(o5, 16);
+            goto EmitOp_Rd0_Rn5;
+
+          default:
+            // Should never happen.
+            goto InvalidInstruction;
+        }
+      }
+
+      break;
+    }
+
+    // ------------------------------------------------------------------------
+    // [Simd - Load / Store]
+    // ------------------------------------------------------------------------
+
+    case InstDB::kEncodingSimdLdSt: {
+      const InstDB::EncodingData::SimdLdSt& opData = InstDB::EncodingData::simdLdSt[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        rmRel = &m;
+
+        // Width  |       SZ |        XY | XSZ
+        // -------+----------+-----------+-----
+        // 8-bit  | size==00 | opc == 01 | 000
+        // 16-bit | size==01 | opc == 01 | 001
+        // 32-bit | size==10 | opc == 01 | 010
+        // 64-bit | size==11 | opc == 01 | 011
+        // 128-bit| size==00 | opc == 11 | 100
+        uint32_t xsz = diff(o0.as<Reg>().type(), RegType::kARM_VecB);
+        if (xsz > 4u || o0.as<Vec>().hasElementIndex())
+          goto InvalidRegType;
+
+        if (!checkVecId(o0))
+          goto InvalidPhysId;
+
+        if (!armCheckMemBaseIndexRel(m))
+          goto InvalidAddress;
+
+        int64_t offset = m.offset();
+        if (m.hasBaseReg()) {
+          // [Base {Offset | Index}]
+          if (m.hasIndex()) {
+            uint32_t opt = armShiftOpToLdStOptMap[m.predicate()];
+            if (opt == 0xFFu)
+              goto InvalidAddress;
+
+            uint32_t shift = m.shift();
+            uint32_t s = (shift != 0);
+
+            if (s && shift != xsz)
+              goto InvalidAddressScale;
+
+            opcode.reset(uint32_t(opData.registerOp) << 21);
+            opcode.addImm(xsz & 3u, 30);
+            opcode.addImm(xsz >> 2, 23);
+            opcode.addImm(opt, 13);
+            opcode.addImm(s, 12);
+            opcode |= B(11);
+            opcode.addReg(o0, 0);
+            goto EmitOp_MemBaseIndex_Rn5_Rm16;
+          }
+
+          // Makes it easier to work with the offset especially on 32-bit arch.
+          if (!Support::isInt32(offset))
+            goto InvalidDisplacement;
+          int32_t offset32 = int32_t(offset);
+
+          if (m.isPreOrPost()) {
+            if (!Support::isInt9(offset32))
+              goto InvalidDisplacement;
+
+            opcode.reset(uint32_t(opData.prePostOp) << 21);
+            opcode.addImm(xsz & 3u, 30);
+            opcode.addImm(xsz >> 2, 23);
+            opcode.addImm(offset32 & 0x1FF, 12);
+            opcode.addImm(m.isPreIndex(), 11);
+            opcode |= B(10);
+            opcode.addReg(o0, 0);
+            goto EmitOp_MemBase_Rn5;
+          }
+          else {
+            uint32_t imm12 = uint32_t(offset32) >> xsz;
+
+            // If this instruction is not encodable with scaled unsigned offset, try unscaled signed offset.
+            if (!Support::isUInt12(imm12) || (imm12 << xsz) != uint32_t(offset32)) {
+              instId = opData.uAltInstId;
+              instInfo = &InstDB::_instInfoTable[instId];
+              encodingIndex = instInfo->_encodingDataIndex;
+              goto Case_SimdLdurStur;
+            }
+
+            opcode.reset(uint32_t(opData.uOffsetOp) << 22);
+            opcode.addImm(xsz & 3u, 30);
+            opcode.addImm(xsz >> 2, 23);
+            opcode.addImm(imm12, 10);
+            opcode.addReg(o0, 0);
+            goto EmitOp_MemBase_Rn5;
+          }
+        }
+        else {
+          if (!opData.literalOp)
+            goto InvalidAddress;
+
+          if (xsz < 2u)
+            goto InvalidRegType;
+
+          uint32_t opc = xsz - 2u;
+          opcode.reset(uint32_t(opData.literalOp) << 24);
+          opcode.addImm(opc, 30);
+          opcode.addReg(o0, 0);
+          offsetFormat.resetToImmValue(OffsetType::kSignedOffset, 4, 5, 19, 2);
+          goto EmitOp_Rel;
+        }
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdLdpStp: {
+      const InstDB::EncodingData::SimdLdpStp& opData = InstDB::EncodingData::simdLdpStp[encodingIndex];
+
+      if (isign4 == ENC_OPS3(Reg, Reg, Mem)) {
+        const Mem& m = o2.as<Mem>();
+        rmRel = &m;
+
+        uint32_t opc = diff(o0.as<Reg>().type(), RegType::kARM_VecS);
+        if (opc > 2u || o0.as<Vec>().hasElementTypeOrIndex())
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1))
+          goto InvalidInstruction;
+
+        if (!checkVecId(o0, o1))
+          goto InvalidPhysId;
+
+        if (m.baseType() != RegType::kARM_GpX || m.hasIndex())
+          goto InvalidAddress;
+
+        if (m.isOffset64Bit())
+          goto InvalidDisplacement;
+
+        uint32_t offsetShift = 2u + opc;
+        int32_t offset32 = m.offsetLo32() >> offsetShift;
+
+        // Make sure we didn't lose bits by applying the mandatory offset shift.
+        if (Support::shl(offset32, offsetShift) != m.offsetLo32())
+          goto InvalidDisplacement;
+
+        // Offset is encoded as a 7-bit immediate.
+        if (!Support::isInt7(offset32))
+          goto InvalidDisplacement;
+
+        if (m.isPreOrPost() && offset32 != 0) {
+          if (!opData.prePostOp)
+            goto InvalidAddress;
+
+          opcode.reset(uint32_t(opData.prePostOp) << 22);
+          opcode.addImm(m.isPreIndex(), 24);
+        }
+        else {
+          opcode.reset(uint32_t(opData.offsetOp) << 22);
+        }
+
+        opcode.addImm(opc, 30);
+        opcode.addImm(offset32 & 0x7F, 15);
+        opcode.addReg(o1, 10);
+        opcode.addReg(o0, 0);
+        goto EmitOp_MemBase_Rn5;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdLdurStur: {
+Case_SimdLdurStur:
+      const InstDB::EncodingData::SimdLdurStur& opData = InstDB::EncodingData::simdLdurStur[encodingIndex];
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        const Mem& m = o1.as<Mem>();
+        rmRel = &m;
+
+        uint32_t sz = diff(o0.as<Reg>().type(), RegType::kARM_VecB);
+        if (sz > 4 || o0.as<Vec>().hasElementTypeOrIndex())
+          goto InvalidInstruction;
+
+        if (!checkVecId(o0))
+          goto InvalidPhysId;
+
+        if (!armCheckMemBaseIndexRel(m))
+          goto InvalidAddress;
+
+        if (m.hasBaseReg() && !m.hasIndex() && !m.isPreOrPost()) {
+          if (m.isOffset64Bit())
+            goto InvalidDisplacement;
+
+          int32_t offset32 = m.offsetLo32();
+          if (!Support::isInt9(offset32))
+            goto InvalidDisplacement;
+
+          opcode.reset(uint32_t(opData.opcode) << 10);
+          opcode.addImm(sz & 3u, 30);
+          opcode.addImm(sz >> 2, 23);
+          opcode.addImm(offset32 & 0x1FF, 12);
+          opcode.addReg(o0, 0);
+          goto EmitOp_MemBase_Rn5;
+        }
+
+        goto InvalidAddress;
+      }
+
+      break;
+    }
+
+    case InstDB::kEncodingSimdLdNStN: {
+      const InstDB::EncodingData::SimdLdNStN& opData = InstDB::EncodingData::simdLdNStN[encodingIndex];
+      const Operand_& o4 = opExt[EmitterUtils::kOp4];
+
+      uint32_t n = 1;
+
+      if (isign4 == ENC_OPS2(Reg, Mem)) {
+        if (opData.n != 1)
+          goto InvalidInstruction;
+
+        rmRel = &o1;
+      }
+      else if (isign4 == ENC_OPS3(Reg, Reg, Mem)) {
+        if (opData.n != 1 && opData.n != 2)
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1) || !checkConsecutive(o0, o1))
+          goto InvalidInstruction;
+
+        n = 2;
+        rmRel = &o2;
+      }
+      else if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem) && o4.isNone()) {
+        if (opData.n != 1 && opData.n != 3)
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1, o2) || !checkConsecutive(o0, o1, o2))
+          goto InvalidInstruction;
+
+        n = 3;
+        rmRel = &o3;
+      }
+      else if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg) && o4.isMem()) {
+        if (opData.n != 1 && opData.n != 4)
+          goto InvalidInstruction;
+
+        if (!checkSignature(o0, o1, o2, o3) || !checkConsecutive(o0, o1, o2, o3))
+          goto InvalidInstruction;
+
+        n = 4;
+        rmRel = &o4;
+      }
+      else {
+        goto InvalidInstruction;
+      }
+
+      // We will use `v` and `m` from now as those are relevant for encoding.
+      const Vec& v = o0.as<Vec>();
+      const Mem& m = rmRel->as<Mem>();
+
+      uint32_t q = 0;
+      uint32_t rm = 0;
+      uint32_t rn = m.baseId();
+      uint32_t sz = v.elementType() - Vec::kElementTypeB;
+      uint32_t opcSsize = sz;
+      uint32_t offsetPossibility = 0;
+
+      if (sz > 3)
+        goto InvalidInstruction;
+
+      if (m.baseType() != RegType::kARM_GpX)
+        goto InvalidAddress;
+
+      // Rn cannot be ZR, but can be SP.
+      if (rn > 30 && rn != Gp::kIdSp)
+        goto InvalidAddress;
+
+      rn &= 31;
+
+      if (opData.replicate) {
+        if (n != opData.n)
+          goto InvalidInstruction;
+
+        // Replicates to the whole register, element index cannot be used.
+        if (v.hasElementIndex())
+          goto InvalidInstruction;
+
+        q = diff(v.type(), RegType::kARM_VecD);
+        if (q > 1)
+          goto InvalidInstruction;
+
+        opcode.reset(uint32_t(opData.singleOp) << 10);
+        offsetPossibility = (1u << sz) * n;
+      }
+      else if (v.hasElementIndex()) {
+        if (n != opData.n)
+          goto InvalidInstruction;
+
+        // LDx/STx (single structure).
+        static const uint8_t opcSsizeBySzS[] = { 0x0u << 3, 0x2u << 3, 0x4u << 3, (0x4u << 3) | 1u };
+
+        opcode.reset(uint32_t(opData.singleOp) << 10);
+        opcSsize = opcSsizeBySzS[sz];
+        offsetPossibility =  (1u << sz) * opData.n;
+
+        uint32_t elementIndex = v.elementIndex();
+        uint32_t maxElementIndex = 15 >> sz;
+
+        if (elementIndex > maxElementIndex)
+          goto InvalidElementIndex;
+
+        elementIndex <<= sz;
+        q = elementIndex >> 3;
+        opcSsize |= elementIndex & 0x7u;
+      }
+      else {
+        // LDx/STx (multiple structures).
+        static const uint8_t opcSsizeByN[] = { 0u, 0x7u << 2, 0xAu << 2, 0x6u << 2, 0x2u << 2 };
+
+        q = diff(v.type(), RegType::kARM_VecD);
+        if (q > 1)
+          goto InvalidInstruction;
+
+        if (opData.n == 1)
+          opcSsize |= opcSsizeByN[n];
+
+        opcode.reset(uint32_t(opData.multipleOp) << 10);
+        offsetPossibility = (8u << q) * n;
+      }
+
+      if (m.hasIndex()) {
+        if (m.hasOffset() || !m.isPostIndex())
+          goto InvalidAddress;
+
+        rm = m.indexId();
+        if (rm > 30)
+          goto InvalidAddress;
+
+        // Bit 23 - PostIndex.
+        opcode |= B(23);
+      }
+      else {
+        if (m.hasOffset()) {
+          if (m.offset() != int32_t(offsetPossibility) || !m.isPostIndex())
+            goto InvalidAddress;
+          rm = 31;
+
+          // Bit 23 - PostIndex.
+          opcode |= B(23);
+        }
+      }
+
+      opcode.addImm(q, 30);
+      opcode.addImm(rm, 16);
+      opcode.addImm(opcSsize, 10);
+      opcode.addImm(rn, 5);
+      goto EmitOp_Rd0;
+    }
+
+    default:
+      break;
+  }
+
+  goto InvalidInstruction;
+
+  // --------------------------------------------------------------------------
+  // [EmitGp - Single]
+  // --------------------------------------------------------------------------
+
+EmitOp_Rd0:
+  if (!checkValidRegs(o0))
+    goto InvalidPhysId;
+
+  opcode.addReg(o0, 0);
+  goto EmitOp;
+
+EmitOp_Rn5:
+  if (!checkValidRegs(o0))
+    goto InvalidPhysId;
+
+  opcode.addReg(o0, 5);
+  goto EmitOp;
+
+EmitOp_Rn5_Rm16:
+  if (!checkValidRegs(o0, o1))
+    goto InvalidPhysId;
+
+  opcode.addReg(o0, 5);
+  opcode.addReg(o1, 16);
+  goto EmitOp;
+
+EmitOp_Rd0_Rn5:
+  if (!checkValidRegs(o0, o1))
+    goto InvalidPhysId;
+
+  opcode.addReg(o0, 0);
+  opcode.addReg(o1, 5);
+  goto EmitOp;
+
+EmitOp_Rd0_Rn5_Rm16_Ra10:
+  if (!checkValidRegs(o0, o1, o2, o3))
+    goto InvalidPhysId;
+
+  opcode.addReg(o0, 0);
+  opcode.addReg(o1, 5);
+  opcode.addReg(o2, 16);
+  opcode.addReg(o3, 10);
+  goto EmitOp;
+
+EmitOp_Rd0_Rn5_Rm16:
+  if (!checkValidRegs(o0, o1, o3))
+    goto InvalidPhysId;
+
+  opcode.addReg(o0, 0);
+  opcode.addReg(o1, 5);
+  opcode.addReg(o2, 16);
+  goto EmitOp;
+
+  // --------------------------------------------------------------------------
+  // [EmitGp - Multiple]
+  // --------------------------------------------------------------------------
+
+EmitOp_Multiple:
+  {
+    ASMJIT_ASSERT(multipleOpCount > 0);
+    err = writer.ensureSpace(this, multipleOpCount * 4u);
+    if (ASMJIT_UNLIKELY(err))
+      goto Failed;
+
+    for (uint32_t i = 0; i < multipleOpCount; i++)
+      writer.emit32uLE(multipleOpData[i]);
+
+    goto EmitDone;
+  }
+
+  // --------------------------------------------------------------------------
+  // [EmitGp - Memory]
+  // --------------------------------------------------------------------------
+
+EmitOp_MemBase_Rn5:
+  if (!checkMemBase(rmRel->as<Mem>()))
+    goto InvalidAddress;
+
+  opcode.addReg(rmRel->as<Mem>().baseId(), 5);
+  goto EmitOp;
+
+EmitOp_MemBaseNoImm_Rn5:
+  if (!checkMemBase(rmRel->as<Mem>()) || rmRel->as<Mem>().hasIndex())
+    goto InvalidAddress;
+
+  if (rmRel->as<Mem>().hasOffset())
+    goto InvalidDisplacement;
+
+  opcode.addReg(rmRel->as<Mem>().baseId(), 5);
+  goto EmitOp;
+
+EmitOp_MemBaseIndex_Rn5_Rm16:
+  if (!rmRel->as<Mem>().hasBaseReg())
+    goto InvalidAddress;
+
+  if (rmRel->as<Mem>().indexId() > 30 && rmRel->as<Mem>().indexId() != Gp::kIdZr)
+    goto InvalidPhysId;
+
+  opcode.addReg(rmRel->as<Mem>().indexId(), 16);
+  opcode.addReg(rmRel->as<Mem>().baseId(), 5);
+  goto EmitOp;
+
+  // --------------------------------------------------------------------------
+  // [EmitOp - PC Relative]
+  // --------------------------------------------------------------------------
+
+EmitOp_Rel:
+  {
+    if (rmRel->isLabel() || rmRel->isMem()) {
+      uint32_t labelId;
+      int64_t labelOffset = 0;
+
+      if (rmRel->isLabel()) {
+        labelId = rmRel->as<Label>().id();
+      }
+      else {
+        labelId = rmRel->as<Mem>().baseId();
+        labelOffset = rmRel->as<Mem>().offset();
+      }
+
+      LabelEntry* label = _code->labelEntry(labelId);
+      if (ASMJIT_UNLIKELY(!label))
+        goto InvalidLabel;
+
+      if (offsetFormat.type() == OffsetType::kAArch64_ADRP) {
+        // TODO: [ARM] Always create relocation entry.
+      }
+
+      if (label->isBoundTo(_section)) {
+        // Label bound to the current section.
+        offsetValue = label->offset() - uint64_t(offset()) + uint64_t(labelOffset);
+        goto EmitOp_DispImm;
+      }
+      else {
+        // Record non-bound label.
+        size_t codeOffset = writer.offsetFrom(_bufferData);
+        LabelLink* link = _code->newLabelLink(label, _section->id(), codeOffset, intptr_t(labelOffset), offsetFormat);
+
+        if (ASMJIT_UNLIKELY(!link))
+          goto OutOfMemory;
+
+        goto EmitOp;
+      }
+    }
+  }
+
+  if (rmRel->isImm()) {
+    uint64_t baseAddress = _code->baseAddress();
+    uint64_t targetOffset = rmRel->as<Imm>().valueAs<uint64_t>();
+
+    size_t codeOffset = writer.offsetFrom(_bufferData);
+
+    if (baseAddress == Globals::kNoBaseAddress || _section->id() != 0) {
+      // Create a new RelocEntry as we cannot calculate the offset right now.
+      RelocEntry* re;
+      err = _code->newRelocEntry(&re, RelocType::kAbsToRel);
+      if (err)
+        goto Failed;
+
+      re->_sourceSectionId = _section->id();
+      re->_sourceOffset = codeOffset;
+      re->_format = offsetFormat;
+      re->_payload = rmRel->as<Imm>().valueAs<uint64_t>() + 4u;
+      goto EmitOp;
+    }
+    else {
+      uint64_t pc = baseAddress + codeOffset;
+
+      if (offsetFormat.type() == OffsetType::kAArch64_ADRP)
+        pc &= ~uint64_t(4096 - 1);
+
+      offsetValue = targetOffset - pc;
+      goto EmitOp_DispImm;
+    }
+  }
+
+  goto InvalidInstruction;
+
+EmitOp_DispImm:
+  {
+    if ((offsetValue & Support::lsbMask<uint32_t>(offsetFormat.immDiscardLsb())) != 0)
+      goto InvalidDisplacement;
+
+    int64_t dispImm64 = int64_t(offsetValue) >> offsetFormat.immDiscardLsb();
+    if (!Support::isEncodableOffset64(dispImm64, offsetFormat.immBitCount()))
+      goto InvalidDisplacement;
+
+    uint32_t dispImm32 = uint32_t(dispImm64 & Support::lsbMask<uint32_t>(offsetFormat.immBitCount()));
+    switch (offsetFormat.type()) {
+      case OffsetType::kSignedOffset: {
+        opcode.addImm(dispImm32, offsetFormat.immBitShift());
+        goto EmitOp;
+      }
+
+      case OffsetType::kAArch64_ADR:
+      case OffsetType::kAArch64_ADRP: {
+        uint32_t immLo = dispImm32 & 0x3u;
+        uint32_t immHi = dispImm32 >> 2;
+        opcode.addImm(immLo, 29);
+        opcode.addImm(immHi, 5);
+        goto EmitOp;
+      }
+
+      default:
+        goto InvalidDisplacement;
+    }
+  }
+
+  // --------------------------------------------------------------------------
+  // [EmitOp - Opcode]
+  // --------------------------------------------------------------------------
+
+EmitOp:
+  writer.emit32uLE(opcode.get());
+  goto EmitDone;
+
+  // --------------------------------------------------------------------------
+  // [Done]
+  // --------------------------------------------------------------------------
+
+EmitDone:
+  if (Support::test(options, InstOptions::kReserved)) {
+#ifndef ASMJIT_NO_LOGGING
+    if (_logger)
+      EmitterUtils::logInstructionEmitted(this, BaseInst::composeARMInstId(instId, instCC), options, o0, o1, o2, opExt, 0, 0, writer.cursor());
+#endif
+  }
+
+  resetExtraReg();
+  resetInstOptions();
+  resetInlineComment();
+
+  writer.done(this);
+  return kErrorOk;
+
+  // --------------------------------------------------------------------------
+  // [Error Handler]
+  // --------------------------------------------------------------------------
+
+#define ERROR_HANDLER(ERR) ERR: err = DebugUtils::errored(kError##ERR); goto Failed;
+  ERROR_HANDLER(OutOfMemory)
+  ERROR_HANDLER(InvalidAddress)
+  ERROR_HANDLER(InvalidAddressScale)
+  ERROR_HANDLER(InvalidDisplacement)
+  ERROR_HANDLER(InvalidElementIndex)
+  ERROR_HANDLER(InvalidLabel)
+  ERROR_HANDLER(InvalidImmediate)
+  ERROR_HANDLER(InvalidInstruction)
+  ERROR_HANDLER(InvalidPhysId)
+  ERROR_HANDLER(InvalidRegType)
+#undef ERROR_HANDLER
+
+Failed:
+#ifndef ASMJIT_NO_LOGGING
+  return EmitterUtils::logInstructionFailed(this, err, instId, options, o0, o1, o2, opExt);
+#else
+  resetExtraReg();
+  resetInstOptions();
+  resetInlineComment();
+  return reportError(err);
+#endif
+}
+
+#undef ENC_OPS1
+#undef ENC_OPS2
+#undef ENC_OPS3
+#undef ENC_OPS4
+
+// a64::Assembler - Align
+// ======================
+
+Error Assembler::align(AlignMode alignMode, uint32_t alignment) {
+  constexpr uint32_t kNopA64 = 0xD503201Fu; // [11010101|00000011|00100000|00011111].
+
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  if (ASMJIT_UNLIKELY(uint32_t(alignMode) > uint32_t(AlignMode::kMaxValue)))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (alignment <= 1)
+    return kErrorOk;
+
+  if (ASMJIT_UNLIKELY(alignment > Globals::kMaxAlignment || !Support::isPowerOf2(alignment)))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  uint32_t i = uint32_t(Support::alignUpDiff<size_t>(offset(), alignment));
+  if (i == 0)
+    return kErrorOk;
+
+  CodeWriter writer(this);
+  ASMJIT_PROPAGATE(writer.ensureSpace(this, i));
+
+  switch (alignMode) {
+    case AlignMode::kCode: {
+      uint32_t pattern = kNopA64;
+
+      if (ASMJIT_UNLIKELY(offset() & 0x3u))
+        return DebugUtils::errored(kErrorInvalidState);
+
+      while (i >= 4) {
+        writer.emit32uLE(pattern);
+        i -= 4;
+      }
+
+      ASMJIT_ASSERT(i == 0);
+      break;
+    }
+
+    case AlignMode::kData:
+    case AlignMode::kZero:
+      writer.emitZeros(i);
+      break;
+  }
+
+  writer.done(this);
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    StringTmp<128> sb;
+    sb.appendChars(' ', _logger->indentation(FormatIndentationGroup::kCode));
+    sb.appendFormat("align %u\n", alignment);
+    _logger->log(sb);
+  }
+#endif
+
+  return kErrorOk;
+}
+
+// a64::Assembler - Events
+// =======================
+
+Error Assembler::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+  return kErrorOk;
+}
+
+Error Assembler::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
diff --git a/lib/lepton/asmjit/arm/a64assembler.h b/lib/lepton/asmjit/arm/a64assembler.h
new file mode 100644
index 0000000000..f1ac72b8d5
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64assembler.h
@@ -0,0 +1,72 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64ASSEMBLER_H_INCLUDED
+#define ASMJIT_ARM_A64ASSEMBLER_H_INCLUDED
+
+#include "../core/assembler.h"
+#include "../arm/a64emitter.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64 assembler implementation.
+class ASMJIT_VIRTAPI Assembler
+  : public BaseAssembler,
+    public EmitterExplicitT<Assembler> {
+
+public:
+  typedef BaseAssembler Base;
+
+  //! \name Construction / Destruction
+  //! \{
+
+  ASMJIT_API Assembler(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Assembler() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Gets whether the current ARM mode is THUMB (alternative to 32-bit ARM encoding).
+  inline bool isInThumbMode() const noexcept { return _environment.isArchThumb(); }
+
+  //! Gets the current code alignment of the current mode (ARM vs THUMB).
+  inline uint32_t codeAlignment() const noexcept { return isInThumbMode() ? 2 : 4; }
+
+  //! \}
+
+  //! \name Emit
+  //! \{
+
+  ASMJIT_API Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override;
+
+  //! \}
+
+  //! \name Align
+  //! \{
+
+  ASMJIT_API Error align(AlignMode alignMode, uint32_t alignment) override;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64ASSEMBLER_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64builder.cpp b/lib/lepton/asmjit/arm/a64builder.cpp
new file mode 100644
index 0000000000..3a52b2a578
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64builder.cpp
@@ -0,0 +1,51 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64) && !defined(ASMJIT_NO_BUILDER)
+
+#include "../arm/a64assembler.h"
+#include "../arm/a64builder.h"
+#include "../arm/a64emithelper_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::Builder - Construction & Destruction
+// =========================================
+
+Builder::Builder(CodeHolder* code) noexcept : BaseBuilder() {
+  _archMask = uint64_t(1) << uint32_t(Arch::kAArch64);
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Builder::~Builder() noexcept {}
+
+// a64::Builder - Events
+// =====================
+
+Error Builder::onAttach(CodeHolder* code) noexcept {
+  return Base::onAttach(code);
+}
+
+Error Builder::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+// a64::Builder - Finalize
+// =======================
+
+Error Builder::finalize() {
+  ASMJIT_PROPAGATE(runPasses());
+  Assembler a(_code);
+  a.addEncodingOptions(encodingOptions());
+  a.addDiagnosticOptions(diagnosticOptions());
+  return serializeTo(&a);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64 && !ASMJIT_NO_BUILDER
diff --git a/lib/lepton/asmjit/arm/a64builder.h b/lib/lepton/asmjit/arm/a64builder.h
new file mode 100644
index 0000000000..adc99aafc8
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64builder.h
@@ -0,0 +1,57 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64BUILDER_H_INCLUDED
+#define ASMJIT_ARM_A64BUILDER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_BUILDER
+
+#include "../core/builder.h"
+#include "../arm/a64emitter.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64 builder implementation.
+class ASMJIT_VIRTAPI Builder
+  : public BaseBuilder,
+    public EmitterExplicitT<Builder> {
+public:
+  ASMJIT_NONCOPYABLE(Builder)
+  typedef BaseBuilder Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Builder(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Builder() noexcept;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+
+  //! \name Finalize
+  //! \{
+
+  ASMJIT_API Error finalize() override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_BUILDER
+#endif // ASMJIT_ARM_A64BUILDER_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64compiler.cpp b/lib/lepton/asmjit/arm/a64compiler.cpp
new file mode 100644
index 0000000000..d6c4ed28ff
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64compiler.cpp
@@ -0,0 +1,60 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64) && !defined(ASMJIT_NO_COMPILER)
+
+#include "../arm/a64assembler.h"
+#include "../arm/a64compiler.h"
+#include "../arm/a64emithelper_p.h"
+#include "../arm/a64rapass_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::Compiler - Construction & Destruction
+// ==========================================
+
+Compiler::Compiler(CodeHolder* code) noexcept : BaseCompiler() {
+  _archMask = uint64_t(1) << uint32_t(Arch::kAArch64);
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Compiler::~Compiler() noexcept {}
+
+// a64::Compiler - Events
+// ======================
+
+Error Compiler::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+  Error err = addPassT<ARMRAPass>();
+
+  if (ASMJIT_UNLIKELY(err)) {
+    onDetach(code);
+    return err;
+  }
+
+  return kErrorOk;
+}
+
+Error Compiler::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+// a64::Compiler - Finalize
+// ========================
+
+Error Compiler::finalize() {
+  ASMJIT_PROPAGATE(runPasses());
+  Assembler a(_code);
+  a.addEncodingOptions(encodingOptions());
+  a.addDiagnosticOptions(diagnosticOptions());
+  return serializeTo(&a);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64 && !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/arm/a64compiler.h b/lib/lepton/asmjit/arm/a64compiler.h
new file mode 100644
index 0000000000..bed408a98f
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64compiler.h
@@ -0,0 +1,247 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMCOMPILER_H_INCLUDED
+#define ASMJIT_ARM_ARMCOMPILER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/type.h"
+#include "../arm/a64emitter.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64 compiler implementation.
+class ASMJIT_VIRTAPI Compiler
+  : public BaseCompiler,
+    public EmitterExplicitT<Compiler> {
+public:
+  ASMJIT_NONCOPYABLE(Compiler)
+  typedef BaseCompiler Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Compiler(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Compiler() noexcept;
+
+  //! \}
+
+  //! \name Virtual Registers
+  //! \{
+
+  //! \cond INTERNAL
+  template<typename RegT, typename Type>
+  inline RegT _newRegInternal(const Type& type) {
+    RegT reg(Globals::NoInit);
+    _newReg(&reg, type, nullptr);
+    return reg;
+  }
+
+  template<typename RegT, typename Type, typename... Args>
+  inline RegT _newRegInternal(const Type& type, const char* s, Args&&... args) {
+#ifndef ASMJIT_NO_LOGGING
+    RegT reg(Globals::NoInit);
+    if (sizeof...(Args) == 0)
+      _newReg(&reg, type, s);
+    else
+      _newRegFmt(&reg, type, s, std::forward<Args>(args)...);
+    return reg;
+#else
+    DebugUtils::unused(std::forward<Args>(args)...);
+    RegT reg(Globals::NoInit);
+    _newReg(&reg, type, nullptr);
+    return reg;
+#endif
+  }
+  //! \endcond
+
+  template<typename RegT, typename... Args>
+  inline RegT newSimilarReg(const RegT& ref, Args&&... args) {
+    return _newRegInternal<RegT>(ref, std::forward<Args>(args)...);
+  }
+
+  template<typename... Args>
+  inline Reg newReg(TypeId typeId, Args&&... args) { return _newRegInternal<Reg>(typeId, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newGp(TypeId typeId, Args&&... args) { return _newRegInternal<Gp>(typeId, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Vec newVec(TypeId typeId, Args&&... args) { return _newRegInternal<Vec>(typeId, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newInt32(Args&&... args) { return _newRegInternal<Gp>(TypeId::kInt32, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newUInt32(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUInt32, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newInt64(Args&&... args) { return _newRegInternal<Gp>(TypeId::kInt64, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newUInt64(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUInt64, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newIntPtr(Args&&... args) { return _newRegInternal<Gp>(TypeId::kIntPtr, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newUIntPtr(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUIntPtr, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newGpw(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUInt32, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newGpx(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUInt64, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newGpz(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUIntPtr, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Vec newVecS(Args&&... args) { return _newRegInternal<Vec>(TypeId::kFloat32, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Vec newVecD(Args&&... args) { return _newRegInternal<Vec>(TypeId::kFloat64, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Vec newVecQ(Args&&... args) { return _newRegInternal<Vec>(TypeId::kUInt8x16, std::forward<Args>(args)...); }
+
+  //! \}
+
+  //! \name Stack
+  //! \{
+
+  //! Creates a new memory chunk allocated on the current function's stack.
+  inline Mem newStack(uint32_t size, uint32_t alignment, const char* name = nullptr) {
+    Mem m(Globals::NoInit);
+    _newStack(&m, size, alignment, name);
+    return m;
+  }
+
+  //! \}
+
+  //! \name Constants
+  //! \{
+
+  //! Put data to a constant-pool and get a memory reference to it.
+  inline Mem newConst(ConstPoolScope scope, const void* data, size_t size) {
+    Mem m(Globals::NoInit);
+    _newConst(&m, scope, data, size);
+    return m;
+  }
+
+  //! Put a BYTE `val` to a constant-pool (8 bits).
+  inline Mem newByteConst(ConstPoolScope scope, uint8_t val) noexcept { return newConst(scope, &val, 1); }
+  //! Put a HWORD `val` to a constant-pool (16 bits).
+  inline Mem newHWordConst(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a WORD `val` to a constant-pool (32 bits).
+  inline Mem newWordConst(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DWORD `val` to a constant-pool (64 bits).
+  inline Mem newDWordConst(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
+
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newInt16Const(ConstPoolScope scope, int16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newUInt16Const(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newInt32Const(ConstPoolScope scope, int32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newUInt32Const(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newInt64Const(ConstPoolScope scope, int64_t val) noexcept { return newConst(scope, &val, 8); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newUInt64Const(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
+
+  //! Put a SP-FP `val` to a constant-pool.
+  inline Mem newFloatConst(ConstPoolScope scope, float val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DP-FP `val` to a constant-pool.
+  inline Mem newDoubleConst(ConstPoolScope scope, double val) noexcept { return newConst(scope, &val, 8); }
+
+  //! \}
+
+  //! \name Instruction Options
+  //! \{
+
+  //! Force the compiler to not follow the conditional or unconditional jump.
+  inline Compiler& unfollow() noexcept { _instOptions |= InstOptions::kUnfollow; return *this; }
+
+  //! \}
+
+  //! \name Compiler specific
+  //! \{
+
+  //! Special pseudo-instruction that can be used to load a memory address into `o0` GP register.
+  //!
+  //! \note At the moment this instruction is only useful to load a stack allocated address into a GP register
+  //! for further use. It makes very little sense to use it for anything else. The semantics of this instruction
+  //! is the same as X86 `LEA` (load effective address) instruction.
+  inline Error loadAddressOf(const Gp& o0, const Mem& o1) { return _emitter()->_emitI(Inst::kIdAdr, o0, o1); }
+
+  //! \}
+
+  //! \name Function Call & Ret Intrinsics
+  //! \{
+
+  //! Invoke a function call without `target` type enforcement.
+  inline Error invoke_(InvokeNode** out, const Operand_& target, const FuncSignature& signature) {
+    return addInvokeNode(out, Inst::kIdBlr, target, signature);
+  }
+
+  //! Invoke a function call of the given `target` and `signature` and store the added node to `out`.
+  //!
+  //! Creates a new \ref InvokeNode, initializes all the necessary members to match the given function `signature`,
+  //! adds the node to the compiler, and stores its pointer to `out`. The operation is atomic, if anything fails
+  //! nullptr is stored in `out` and error code is returned.
+  inline Error invoke(InvokeNode** out, const Gp& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Mem& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Label& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Imm& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, uint64_t target, const FuncSignature& signature) { return invoke_(out, Imm(int64_t(target)), signature); }
+
+  //! Return.
+  inline Error ret() { return addRet(Operand(), Operand()); }
+  //! \overload
+  inline Error ret(const BaseReg& o0) { return addRet(o0, Operand()); }
+  //! \overload
+  inline Error ret(const BaseReg& o0, const BaseReg& o1) { return addRet(o0, o1); }
+
+  //! \}
+
+  //! \name Jump Tables Support
+  //! \{
+
+  using EmitterExplicitT<Compiler>::br;
+
+  //! Adds a jump to the given `target` with the provided jump `annotation`.
+  inline Error br(const BaseReg& target, JumpAnnotation* annotation) { return emitAnnotatedJump(Inst::kIdBr, target, annotation); }
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+
+  //! \name Finalize
+  //! \{
+
+  ASMJIT_API Error finalize() override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_ARM_ARMCOMPILER_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64emithelper.cpp b/lib/lepton/asmjit/arm/a64emithelper.cpp
new file mode 100644
index 0000000000..1e8da619a6
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64emithelper.cpp
@@ -0,0 +1,464 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../core/formatter.h"
+#include "../core/funcargscontext_p.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../arm/a64emithelper_p.h"
+#include "../arm/a64formatter_p.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::EmitHelper - Emit Operations
+// =================================
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitRegMove(
+  const Operand_& dst_,
+  const Operand_& src_, TypeId typeId, const char* comment) {
+
+  Emitter* emitter = _emitter->as<Emitter>();
+
+  // Invalid or abstract TypeIds are not allowed.
+  ASMJIT_ASSERT(TypeUtils::isValid(typeId) && !TypeUtils::isAbstract(typeId));
+
+  emitter->setInlineComment(comment);
+
+  if (dst_.isReg() && src_.isMem()) {
+    Reg dst(dst_.as<Reg>());
+    Mem src(src_.as<Mem>());
+
+    switch (typeId) {
+      case TypeId::kInt8:
+      case TypeId::kUInt8:
+        return emitter->ldrb(dst.as<Gp>(), src);
+
+      case TypeId::kInt16:
+      case TypeId::kUInt16:
+        return emitter->ldrh(dst.as<Gp>(), src);
+
+      case TypeId::kInt32:
+      case TypeId::kUInt32:
+        return emitter->ldr(dst.as<Gp>().w(), src);
+
+      case TypeId::kInt64:
+      case TypeId::kUInt64:
+        return emitter->ldr(dst.as<Gp>().x(), src);
+
+      default: {
+        if (TypeUtils::isFloat32(typeId) || TypeUtils::isVec32(typeId))
+          return emitter->ldr(dst.as<Vec>().s(), src);
+
+        if (TypeUtils::isFloat64(typeId) || TypeUtils::isVec64(typeId))
+          return emitter->ldr(dst.as<Vec>().d(), src);
+
+        if (TypeUtils::isVec128(typeId))
+          return emitter->ldr(dst.as<Vec>().q(), src);
+
+        break;
+      }
+    }
+  }
+
+  if (dst_.isMem() && src_.isReg()) {
+    Mem dst(dst_.as<Mem>());
+    Reg src(src_.as<Reg>());
+
+    switch (typeId) {
+      case TypeId::kInt8:
+      case TypeId::kUInt8:
+        return emitter->strb(src.as<Gp>(), dst);
+
+      case TypeId::kInt16:
+      case TypeId::kUInt16:
+        return emitter->strh(src.as<Gp>(), dst);
+
+      case TypeId::kInt32:
+      case TypeId::kUInt32:
+        return emitter->str(src.as<Gp>().w(), dst);
+
+      case TypeId::kInt64:
+      case TypeId::kUInt64:
+        return emitter->str(src.as<Gp>().x(), dst);
+
+      default: {
+        if (TypeUtils::isFloat32(typeId) || TypeUtils::isVec32(typeId))
+          return emitter->str(src.as<Vec>().s(), dst);
+
+        if (TypeUtils::isFloat64(typeId) || TypeUtils::isVec64(typeId))
+          return emitter->str(src.as<Vec>().d(), dst);
+
+        if (TypeUtils::isVec128(typeId))
+          return emitter->str(src.as<Vec>().q(), dst);
+
+        break;
+      }
+    }
+  }
+
+  if (dst_.isReg() && src_.isReg()) {
+    Reg dst(dst_.as<Reg>());
+    Reg src(src_.as<Reg>());
+
+    switch (typeId) {
+      case TypeId::kInt8:
+      case TypeId::kUInt8:
+      case TypeId::kInt16:
+      case TypeId::kUInt16:
+      case TypeId::kInt32:
+      case TypeId::kUInt32:
+      case TypeId::kInt64:
+      case TypeId::kUInt64:
+        return emitter->mov(dst.as<Gp>().x(), src.as<Gp>().x());
+
+      default: {
+        if (TypeUtils::isFloat32(typeId) || TypeUtils::isVec32(typeId))
+          return emitter->fmov(dst.as<Vec>().s(), src.as<Vec>().s());
+
+        if (TypeUtils::isFloat64(typeId) || TypeUtils::isVec64(typeId))
+          return emitter->mov(dst.as<Vec>().b8(), src.as<Vec>().b8());
+
+        if (TypeUtils::isVec128(typeId))
+          return emitter->mov(dst.as<Vec>().b16(), src.as<Vec>().b16());
+
+        break;
+      }
+    }
+  }
+
+  emitter->setInlineComment(nullptr);
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+Error EmitHelper::emitRegSwap(
+  const BaseReg& a,
+  const BaseReg& b, const char* comment) {
+
+  DebugUtils::unused(a, b, comment);
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+// TODO: [ARM] EmitArgMove is unfinished.
+Error EmitHelper::emitArgMove(
+  const BaseReg& dst_, TypeId dstTypeId,
+  const Operand_& src_, TypeId srcTypeId, const char* comment) {
+
+  // Deduce optional `dstTypeId`, which may be `TypeId::kVoid` in some cases.
+  if (dstTypeId == TypeId::kVoid) {
+    const ArchTraits& archTraits = ArchTraits::byArch(_emitter->arch());
+    dstTypeId = archTraits.regTypeToTypeId(dst_.type());
+  }
+
+  // Invalid or abstract TypeIds are not allowed.
+  ASMJIT_ASSERT(TypeUtils::isValid(dstTypeId) && !TypeUtils::isAbstract(dstTypeId));
+  ASMJIT_ASSERT(TypeUtils::isValid(srcTypeId) && !TypeUtils::isAbstract(srcTypeId));
+
+  Reg dst(dst_.as<Reg>());
+  Operand src(src_);
+
+  uint32_t dstSize = TypeUtils::sizeOf(dstTypeId);
+  uint32_t srcSize = TypeUtils::sizeOf(srcTypeId);
+
+  if (TypeUtils::isInt(dstTypeId)) {
+    if (TypeUtils::isInt(srcTypeId)) {
+      uint32_t x = dstSize == 8;
+
+      dst.setSignature(OperandSignature{x ? uint32_t(GpX::kSignature) : uint32_t(GpW::kSignature)});
+      _emitter->setInlineComment(comment);
+
+      if (src.isReg()) {
+        src.setSignature(dst.signature());
+        return _emitter->emit(Inst::kIdMov, dst, src);
+      }
+      else if (src.isMem()) {
+        InstId instId = Inst::kIdNone;
+          switch (srcTypeId) {
+          case TypeId::kInt8: instId = Inst::kIdLdrsb; break;
+          case TypeId::kUInt8: instId = Inst::kIdLdrb; break;
+          case TypeId::kInt16: instId = Inst::kIdLdrsh; break;
+          case TypeId::kUInt16: instId = Inst::kIdLdrh; break;
+          case TypeId::kInt32: instId = x ? Inst::kIdLdrsw : Inst::kIdLdr; break;
+          case TypeId::kUInt32: instId = Inst::kIdLdr; x = 0; break;
+          case TypeId::kInt64: instId = Inst::kIdLdr; break;
+          case TypeId::kUInt64: instId = Inst::kIdLdr; break;
+          default:
+            return DebugUtils::errored(kErrorInvalidState);
+        }
+        return _emitter->emit(instId, dst, src);
+      }
+    }
+  }
+
+  if (TypeUtils::isFloat(dstTypeId) || TypeUtils::isVec(dstTypeId)) {
+    if (TypeUtils::isFloat(srcTypeId) || TypeUtils::isVec(srcTypeId)) {
+      switch (srcSize) {
+        case 2: dst.as<Vec>().setSignature(OperandSignature{VecH::kSignature}); break;
+        case 4: dst.as<Vec>().setSignature(OperandSignature{VecS::kSignature}); break;
+        case 8: dst.as<Vec>().setSignature(OperandSignature{VecD::kSignature}); break;
+        case 16: dst.as<Vec>().setSignature(OperandSignature{VecV::kSignature}); break;
+        default:
+          return DebugUtils::errored(kErrorInvalidState);
+      }
+
+      _emitter->setInlineComment(comment);
+
+      if (src.isReg()) {
+        InstId instId = srcSize <= 4 ? Inst::kIdFmov_v : Inst::kIdMov_v;
+        src.setSignature(dst.signature());
+        return _emitter->emit(instId, dst, src);
+      }
+      else if (src.isMem()) {
+        return _emitter->emit(Inst::kIdLdr_v, dst, src);
+      }
+    }
+  }
+
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+// a64::EmitHelper - Emit Prolog & Epilog
+// ======================================
+
+struct LoadStoreInstructions {
+  InstId singleInstId;
+  InstId pairInstId;
+};
+
+struct PrologEpilogInfo {
+  struct RegPair {
+    uint8_t ids[2];
+    uint16_t offset;
+  };
+
+  struct GroupData {
+    RegPair pairs[16];
+    uint32_t pairCount;
+  };
+
+  Support::Array<GroupData, 2> groups;
+  uint32_t sizeTotal;
+
+  Error init(const FuncFrame& frame) noexcept {
+    uint32_t offset = 0;
+
+    for (RegGroup group : Support::EnumValues<RegGroup, RegGroup::kGp, RegGroup::kVec>{}) {
+      GroupData& data = groups[group];
+
+      uint32_t n = 0;
+      uint32_t pairCount = 0;
+      RegPair* pairs = data.pairs;
+
+      uint32_t slotSize = frame.saveRestoreRegSize(group);
+      uint32_t savedRegs = frame.savedRegs(group);
+
+      if (group == RegGroup::kGp && frame.hasPreservedFP()) {
+        // Must be at the beginning of the push/pop sequence.
+        ASMJIT_ASSERT(pairCount == 0);
+
+        pairs[0].offset = uint16_t(offset);
+        pairs[0].ids[0] = Gp::kIdFp;
+        pairs[0].ids[1] = Gp::kIdLr;
+        offset += slotSize * 2;
+        pairCount++;
+
+        savedRegs &= ~Support::bitMask(Gp::kIdFp, Gp::kIdLr);
+      }
+
+      Support::BitWordIterator<uint32_t> it(savedRegs);
+      while (it.hasNext()) {
+        pairs[pairCount].ids[n] = uint8_t(it.next());
+
+        if (++n == 2) {
+          pairs[pairCount].offset = uint16_t(offset);
+          offset += slotSize * 2;
+
+          n = 0;
+          pairCount++;
+        }
+      }
+
+      if (n == 1) {
+        pairs[pairCount].ids[1] = uint8_t(BaseReg::kIdBad);
+        pairs[pairCount].offset = uint16_t(offset);
+        offset += slotSize * 2;
+        pairCount++;
+      }
+
+      data.pairCount = pairCount;
+    }
+
+    sizeTotal = offset;
+    return kErrorOk;
+  }
+};
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitProlog(const FuncFrame& frame) {
+  Emitter* emitter = _emitter->as<Emitter>();
+
+  PrologEpilogInfo pei;
+  ASMJIT_PROPAGATE(pei.init(frame));
+
+  static const Support::Array<Reg, 2> groupRegs = {{ x0, d0 }};
+  static const Support::Array<LoadStoreInstructions, 2> groupInsts = {{
+    { Inst::kIdStr  , Inst::kIdStp   },
+    { Inst::kIdStr_v, Inst::kIdStp_v }
+  }};
+
+  uint32_t adjustInitialOffset = pei.sizeTotal;
+
+  for (RegGroup group : Support::EnumValues<RegGroup, RegGroup::kGp, RegGroup::kVec>{}) {
+    const PrologEpilogInfo::GroupData& data = pei.groups[group];
+    uint32_t pairCount = data.pairCount;
+
+    Reg regs[2] = { groupRegs[group], groupRegs[group] };
+    Mem mem = ptr(sp);
+
+    const LoadStoreInstructions& insts = groupInsts[group];
+    for (uint32_t i = 0; i < pairCount; i++) {
+      const PrologEpilogInfo::RegPair& pair = data.pairs[i];
+
+      regs[0].setId(pair.ids[0]);
+      regs[1].setId(pair.ids[1]);
+      mem.setOffsetLo32(pair.offset);
+
+      if (pair.offset == 0 && adjustInitialOffset) {
+        mem.setOffset(-int(adjustInitialOffset));
+        mem.makePreIndex();
+      }
+
+      if (pair.ids[1] == BaseReg::kIdBad)
+        ASMJIT_PROPAGATE(emitter->emit(insts.singleInstId, regs[0], mem));
+      else
+        ASMJIT_PROPAGATE(emitter->emit(insts.pairInstId, regs[0], regs[1], mem));
+
+      mem.resetToFixedOffset();
+
+      if (i == 0 && frame.hasPreservedFP()) {
+        ASMJIT_PROPAGATE(emitter->mov(x29, sp));
+      }
+    }
+  }
+
+  if (frame.hasStackAdjustment()) {
+    uint32_t adj = frame.stackAdjustment();
+    if (adj <= 0xFFFu) {
+      ASMJIT_PROPAGATE(emitter->sub(sp, sp, adj));
+    }
+    else if (adj <= 0xFFFFFFu)  {
+      // TODO: [ARM] Prolog - we must touch the pages otherwise it's undefined.
+      ASMJIT_PROPAGATE(emitter->sub(sp, sp, adj & 0x000FFFu));
+      ASMJIT_PROPAGATE(emitter->sub(sp, sp, adj & 0xFFF000u));
+    }
+    else {
+      return DebugUtils::errored(kErrorInvalidState);
+    }
+  }
+
+  return kErrorOk;
+}
+
+// TODO: [ARM] Emit epilog.
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitEpilog(const FuncFrame& frame) {
+  Emitter* emitter = _emitter->as<Emitter>();
+
+  PrologEpilogInfo pei;
+  ASMJIT_PROPAGATE(pei.init(frame));
+
+  static const Support::Array<Reg, 2> groupRegs = {{ x0, d0 }};
+  static const Support::Array<LoadStoreInstructions, 2> groupInsts = {{
+    { Inst::kIdLdr  , Inst::kIdLdp   },
+    { Inst::kIdLdr_v, Inst::kIdLdp_v }
+  }};
+
+  uint32_t adjustInitialOffset = pei.sizeTotal;
+
+  if (frame.hasStackAdjustment()) {
+    uint32_t adj = frame.stackAdjustment();
+    if (adj <= 0xFFFu) {
+      ASMJIT_PROPAGATE(emitter->add(sp, sp, adj));
+    }
+    else if (adj <= 0xFFFFFFu)  {
+      ASMJIT_PROPAGATE(emitter->add(sp, sp, adj & 0x000FFFu));
+      ASMJIT_PROPAGATE(emitter->add(sp, sp, adj & 0xFFF000u));
+    }
+    else {
+      return DebugUtils::errored(kErrorInvalidState);
+    }
+  }
+
+  for (int g = 1; g >= 0; g--) {
+    RegGroup group = RegGroup(g);
+    const PrologEpilogInfo::GroupData& data = pei.groups[group];
+    uint32_t pairCount = data.pairCount;
+
+    Reg regs[2] = { groupRegs[group], groupRegs[group] };
+    Mem mem = ptr(sp);
+
+    const LoadStoreInstructions& insts = groupInsts[group];
+
+    for (int i = int(pairCount) - 1; i >= 0; i--) {
+      const PrologEpilogInfo::RegPair& pair = data.pairs[i];
+
+      regs[0].setId(pair.ids[0]);
+      regs[1].setId(pair.ids[1]);
+      mem.setOffsetLo32(pair.offset);
+
+      if (pair.offset == 0 && adjustInitialOffset) {
+        mem.setOffset(int(adjustInitialOffset));
+        mem.makePostIndex();
+      }
+
+      if (pair.ids[1] == BaseReg::kIdBad)
+        ASMJIT_PROPAGATE(emitter->emit(insts.singleInstId, regs[0], mem));
+      else
+        ASMJIT_PROPAGATE(emitter->emit(insts.pairInstId, regs[0], regs[1], mem));
+
+      mem.resetToFixedOffset();
+    }
+  }
+
+  ASMJIT_PROPAGATE(emitter->ret(x30));
+
+  return kErrorOk;
+}
+
+static Error ASMJIT_CDECL Emitter_emitProlog(BaseEmitter* emitter, const FuncFrame& frame) {
+  EmitHelper emitHelper(emitter);
+  return emitHelper.emitProlog(frame);
+}
+
+static Error ASMJIT_CDECL Emitter_emitEpilog(BaseEmitter* emitter, const FuncFrame& frame) {
+  EmitHelper emitHelper(emitter);
+  return emitHelper.emitEpilog(frame);
+}
+
+static Error ASMJIT_CDECL Emitter_emitArgsAssignment(BaseEmitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args) {
+  EmitHelper emitHelper(emitter);
+  return emitHelper.emitArgsAssignment(frame, args);
+}
+
+void assignEmitterFuncs(BaseEmitter* emitter) {
+  emitter->_funcs.emitProlog = Emitter_emitProlog;
+  emitter->_funcs.emitEpilog = Emitter_emitEpilog;
+  emitter->_funcs.emitArgsAssignment = Emitter_emitArgsAssignment;
+
+#ifndef ASMJIT_NO_LOGGING
+  emitter->_funcs.formatInstruction = FormatterInternal::formatInstruction;
+#endif
+
+#ifndef ASMJIT_NO_VALIDATION
+  emitter->_funcs.validate = InstInternal::validate;
+#endif
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
diff --git a/lib/lepton/asmjit/arm/a64emithelper_p.h b/lib/lepton/asmjit/arm/a64emithelper_p.h
new file mode 100644
index 0000000000..b1ba1a9296
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64emithelper_p.h
@@ -0,0 +1,50 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMEMITHELPER_P_H_INCLUDED
+#define ASMJIT_ARM_ARMEMITHELPER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+
+#include "../core/emithelper_p.h"
+#include "../core/func.h"
+#include "../arm/a64emitter.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+class EmitHelper : public BaseEmitHelper {
+public:
+  inline explicit EmitHelper(BaseEmitter* emitter = nullptr) noexcept
+    : BaseEmitHelper(emitter) {}
+
+  Error emitRegMove(
+    const Operand_& dst_,
+    const Operand_& src_, TypeId typeId, const char* comment = nullptr) override;
+
+  Error emitRegSwap(
+    const BaseReg& a,
+    const BaseReg& b, const char* comment = nullptr) override;
+
+  Error emitArgMove(
+    const BaseReg& dst_, TypeId dstTypeId,
+    const Operand_& src_, TypeId srcTypeId, const char* comment = nullptr) override;
+
+  Error emitProlog(const FuncFrame& frame);
+  Error emitEpilog(const FuncFrame& frame);
+};
+
+void assignEmitterFuncs(BaseEmitter* emitter);
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_ARMEMITHELPER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64emitter.h b/lib/lepton/asmjit/arm/a64emitter.h
new file mode 100644
index 0000000000..54354eaca8
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64emitter.h
@@ -0,0 +1,1228 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64EMITTER_H_INCLUDED
+#define ASMJIT_ARM_A64EMITTER_H_INCLUDED
+
+#include "../core/emitter.h"
+#include "../core/support.h"
+#include "../arm/a64instdb.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+#define ASMJIT_INST_0x(NAME, ID) \
+  inline Error NAME() { return _emitter()->_emitI(Inst::kId##ID); }
+
+#define ASMJIT_INST_1x(NAME, ID, T0) \
+  inline Error NAME(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID, o0); }
+
+#define ASMJIT_INST_2x(NAME, ID, T0, T1) \
+  inline Error NAME(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID, o0, o1); }
+
+#define ASMJIT_INST_3x(NAME, ID, T0, T1, T2) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2); }
+
+#define ASMJIT_INST_4x(NAME, ID, T0, T1, T2, T3) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3); }
+
+#define ASMJIT_INST_5x(NAME, ID, T0, T1, T2, T3, T4) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3, o4); }
+
+#define ASMJIT_INST_6x(NAME, ID, T0, T1, T2, T3, T4, T5) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4, const T5& o5) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3, o4, o5); }
+
+#define ASMJIT_INST_1cc(NAME, ID, T0) \
+  inline Error NAME(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID, o0); } \
+  \
+  inline Error NAME(CondCode cc, const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, cc), o0); } \
+  \
+  inline Error NAME##_eq(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kEQ), o0); } \
+  inline Error NAME##_ne(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kNE), o0); } \
+  inline Error NAME##_cs(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kCS), o0); } \
+  inline Error NAME##_hs(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kHS), o0); } \
+  inline Error NAME##_cc(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kCC), o0); } \
+  inline Error NAME##_lo(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kLO), o0); } \
+  inline Error NAME##_mi(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kMI), o0); } \
+  inline Error NAME##_pl(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kPL), o0); } \
+  inline Error NAME##_vs(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kVS), o0); } \
+  inline Error NAME##_vc(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kVC), o0); } \
+  inline Error NAME##_hi(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kHI), o0); } \
+  inline Error NAME##_ls(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kLS), o0); } \
+  inline Error NAME##_ge(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kGE), o0); } \
+  inline Error NAME##_lt(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kLT), o0); } \
+  inline Error NAME##_gt(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kGT), o0); } \
+  inline Error NAME##_le(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kLE), o0); } \
+  inline Error NAME##_al(const T0& o0) { return _emitter()->_emitI(BaseInst::composeARMInstId(Inst::kId##ID, CondCode::kAL), o0); }
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! ARM emitter.
+//!
+//! NOTE: This class cannot be instantiated, you can only cast to it and use it as emitter that emits to either
+//! \ref Assembler, \ref Builder, or \ref Compiler (use withcaution with \ref Compiler as it expects virtual
+//! registers to be used).
+template<typename This>
+struct EmitterExplicitT {
+  //! \cond
+
+  // These two are unfortunately reported by the sanitizer. We know what we do, however, the sanitizer doesn't.
+  // I have tried to use reinterpret_cast instead, but that would generate bad code when compiled by MSC.
+  ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF inline This* _emitter() noexcept { return static_cast<This*>(this); }
+  ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF inline const This* _emitter() const noexcept { return static_cast<const This*>(this); }
+
+  //! \endcond
+
+  // --------------------------------------------------------------------------
+  // [Options]
+  // --------------------------------------------------------------------------
+
+protected:
+  inline This& _addInstOptions(InstOptions options) noexcept {
+    static_cast<This*>(this)->addInstOptions(options);
+    return *static_cast<This*>(this);
+  }
+
+public:
+  //! \name General Purpose Instructions
+  //! \{
+
+  ASMJIT_INST_3x(adc, Adc, Gp, Gp, Gp)
+  ASMJIT_INST_3x(adcs, Adcs, Gp, Gp, Gp)
+
+  ASMJIT_INST_3x(add, Add, Gp, Gp, Gp)
+  ASMJIT_INST_4x(add, Add, Gp, Gp, Gp, Imm)
+  ASMJIT_INST_3x(add, Add, Gp, Gp, Imm)
+  ASMJIT_INST_4x(add, Add, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_3x(adds, Adds, Gp, Gp, Gp)
+  ASMJIT_INST_3x(adds, Adds, Gp, Gp, Imm)
+  ASMJIT_INST_4x(adds, Adds, Gp, Gp, Gp, Imm)
+  ASMJIT_INST_4x(adds, Adds, Gp, Gp, Imm, Imm)
+
+  ASMJIT_INST_2x(adr, Adr, Gp, Imm)
+  ASMJIT_INST_2x(adr, Adr, Gp, Label)
+  ASMJIT_INST_2x(adrp, Adrp, Gp, Imm)
+  ASMJIT_INST_2x(adrp, Adrp, Gp, Label)
+
+  ASMJIT_INST_3x(and_, And, Gp, Gp, Imm)
+  ASMJIT_INST_3x(and_, And, Gp, Gp, Gp)
+  ASMJIT_INST_4x(and_, And, Gp, Gp, Gp, Imm)
+  ASMJIT_INST_3x(ands, Ands, Gp, Gp, Imm)
+  ASMJIT_INST_3x(ands, Ands, Gp, Gp, Gp)
+  ASMJIT_INST_4x(ands, Ands, Gp, Gp, Gp, Imm)
+
+  ASMJIT_INST_3x(asr, Asr, Gp, Gp, Imm)
+  ASMJIT_INST_3x(asr, Asr, Gp, Gp, Gp)
+  ASMJIT_INST_3x(asrv, Asrv, Gp, Gp, Gp)
+
+  ASMJIT_INST_2x(at, At, Imm, Gp)
+
+  ASMJIT_INST_3x(bfc, Bfc, Gp, Imm, Imm)
+  ASMJIT_INST_4x(bfi, Bfi, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_4x(bfm, Bfm, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_4x(bfxil, Bfxil, Gp, Gp, Imm, Imm)
+
+  ASMJIT_INST_3x(bic, Bic, Gp, Gp, Imm);
+  ASMJIT_INST_3x(bic, Bic, Gp, Gp, Gp);
+  ASMJIT_INST_4x(bic, Bic, Gp, Gp, Gp, Imm);
+  ASMJIT_INST_3x(bics, Bics, Gp, Gp, Imm);
+  ASMJIT_INST_3x(bics, Bics, Gp, Gp, Gp);
+  ASMJIT_INST_4x(bics, Bics, Gp, Gp, Gp, Imm);
+
+  ASMJIT_INST_1x(brk, Brk, Imm)
+
+  ASMJIT_INST_4x(ccmn, Ccmn, Gp, Gp, Imm, Imm);
+  ASMJIT_INST_4x(ccmn, Ccmn, Gp, Imm, Imm, Imm);
+  ASMJIT_INST_4x(ccmp, Ccmp, Gp, Gp, Imm, Imm);
+  ASMJIT_INST_4x(ccmp, Ccmp, Gp, Imm, Imm, Imm);
+
+  ASMJIT_INST_3x(cinc, Cinc, Gp, Gp, Imm);
+  ASMJIT_INST_3x(cinv, Cinv, Gp, Gp, Imm);
+
+  ASMJIT_INST_1x(clrex, Clrex, Imm)
+
+  ASMJIT_INST_2x(cls, Cls, Gp, Gp)
+  ASMJIT_INST_2x(clz, Clz, Gp, Gp)
+
+  ASMJIT_INST_2x(cmn, Cmn, Gp, Gp)
+  ASMJIT_INST_3x(cmn, Cmn, Gp, Gp, Imm)
+  ASMJIT_INST_2x(cmn, Cmn, Gp, Imm)
+  ASMJIT_INST_3x(cmn, Cmn, Gp, Imm, Imm)
+  ASMJIT_INST_2x(cmp, Cmp, Gp, Gp)
+  ASMJIT_INST_3x(cmp, Cmp, Gp, Gp, Imm)
+  ASMJIT_INST_2x(cmp, Cmp, Gp, Imm)
+  ASMJIT_INST_3x(cmp, Cmp, Gp, Imm, Imm)
+
+  ASMJIT_INST_3x(cneg, Cneg, Gp, Gp, Imm);
+
+  ASMJIT_INST_4x(csel, Csel, Gp, Gp, Gp, Imm);
+  ASMJIT_INST_2x(cset, Cset, Gp, Imm);
+  ASMJIT_INST_2x(csetm, Csetm, Gp, Imm);
+
+  ASMJIT_INST_4x(csinc, Csinc, Gp, Gp, Gp, Imm);
+  ASMJIT_INST_4x(csinv, Csinv, Gp, Gp, Gp, Imm);
+  ASMJIT_INST_4x(csneg, Csneg, Gp, Gp, Gp, Imm);
+
+  ASMJIT_INST_2x(dc, Dc, Imm, Gp)
+  ASMJIT_INST_1x(dmb, Dmb, Imm)
+  ASMJIT_INST_1x(dsb, Dsb, Imm)
+  ASMJIT_INST_0x(drps, Drps)
+
+  ASMJIT_INST_3x(eon, Eon, Gp, Gp, Gp)
+  ASMJIT_INST_4x(eon, Eon, Gp, Gp, Gp, Imm)
+
+  ASMJIT_INST_3x(eor, Eor, Gp, Gp, Imm)
+  ASMJIT_INST_3x(eor, Eor, Gp, Gp, Gp)
+  ASMJIT_INST_4x(eor, Eor, Gp, Gp, Gp, Imm)
+
+  ASMJIT_INST_0x(eret, Eret)
+  ASMJIT_INST_0x(esb, Esb)
+
+  ASMJIT_INST_4x(extr, Extr, Gp, Gp, Gp, Imm)
+
+  ASMJIT_INST_1x(hlt, Hlt, Imm)
+  ASMJIT_INST_1x(hvc, Hvc, Imm)
+  ASMJIT_INST_2x(ic, Ic, Imm, Gp)
+  ASMJIT_INST_1x(isb, Isb, Imm)
+
+  ASMJIT_INST_3x(lsl, Lsl, Gp, Gp, Imm)
+  ASMJIT_INST_3x(lsl, Lsl, Gp, Gp, Gp)
+  ASMJIT_INST_3x(lslv, Lslv, Gp, Gp, Gp)
+
+  ASMJIT_INST_3x(lsr, Lsr, Gp, Gp, Imm)
+  ASMJIT_INST_3x(lsr, Lsr, Gp, Gp, Gp)
+  ASMJIT_INST_3x(lsrv, Lsrv, Gp, Gp, Gp)
+
+  ASMJIT_INST_4x(madd, Madd, Gp, Gp, Gp, Gp)
+  ASMJIT_INST_3x(mneg, Mneg, Gp, Gp, Gp)
+
+  ASMJIT_INST_2x(mov, Mov, Gp, Gp)
+  ASMJIT_INST_2x(mov, Mov, Gp, Imm)
+  ASMJIT_INST_2x(movk, Movk, Gp, Imm)
+  ASMJIT_INST_3x(movk, Movk, Gp, Imm, Imm)
+  ASMJIT_INST_2x(movn, Movn, Gp, Imm)
+  ASMJIT_INST_3x(movn, Movn, Gp, Imm, Imm)
+  ASMJIT_INST_2x(movz, Movz, Gp, Imm)
+  ASMJIT_INST_3x(movz, Movz, Gp, Imm, Imm)
+
+  ASMJIT_INST_2x(mrs, Mrs, Gp, Imm)
+  ASMJIT_INST_2x(msr, Msr, Imm, Gp)
+  ASMJIT_INST_2x(msr, Msr, Imm, Imm)
+
+  ASMJIT_INST_4x(msub, Msub, Gp, Gp, Gp, Gp)
+  ASMJIT_INST_3x(mul, Mul, Gp, Gp, Gp)
+
+  ASMJIT_INST_2x(mvn, Mvn, Gp, Gp)
+  ASMJIT_INST_3x(mvn, Mvn, Gp, Gp, Imm)
+
+  ASMJIT_INST_2x(neg, Neg, Gp, Gp)
+  ASMJIT_INST_3x(neg, Neg, Gp, Gp, Imm)
+  ASMJIT_INST_2x(negs, Negs, Gp, Gp)
+  ASMJIT_INST_3x(negs, Negs, Gp, Gp, Imm)
+
+  ASMJIT_INST_2x(ngc, Ngc, Gp, Gp)
+  ASMJIT_INST_2x(ngcs, Ngcs, Gp, Gp)
+
+  ASMJIT_INST_3x(orn, Orn, Gp, Gp, Gp)
+  ASMJIT_INST_4x(orn, Orn, Gp, Gp, Gp, Imm)
+
+  ASMJIT_INST_3x(orr, Orr, Gp, Gp, Imm)
+  ASMJIT_INST_3x(orr, Orr, Gp, Gp, Gp)
+  ASMJIT_INST_4x(orr, Orr, Gp, Gp, Gp, Imm)
+
+  ASMJIT_INST_2x(rbit, Rbit, Gp, Gp)
+  ASMJIT_INST_1x(ret, Ret, Gp)
+
+  ASMJIT_INST_2x(rev, Rev, Gp, Gp)
+  ASMJIT_INST_2x(rev16, Rev16, Gp, Gp)
+  ASMJIT_INST_2x(rev32, Rev32, Gp, Gp)
+  ASMJIT_INST_2x(rev64, Rev64, Gp, Gp)
+
+  ASMJIT_INST_3x(ror, Ror, Gp, Gp, Imm)
+  ASMJIT_INST_3x(ror, Ror, Gp, Gp, Gp)
+  ASMJIT_INST_3x(rorv, Rorv, Gp, Gp, Gp)
+
+  ASMJIT_INST_3x(sbc, Sbc, Gp, Gp, Gp)
+  ASMJIT_INST_3x(sbcs, Sbcs, Gp, Gp, Gp)
+
+  ASMJIT_INST_4x(sbfiz, Sbfiz, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_4x(sbfm, Sbfm, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_4x(sbfx, Sbfx, Gp, Gp, Imm, Imm)
+
+  ASMJIT_INST_3x(sdiv, Sdiv, Gp, Gp, Gp)
+
+  ASMJIT_INST_4x(smaddl, Smaddl, Gp, Gp, Gp, Gp)
+  ASMJIT_INST_1x(smc, Smc, Imm)
+  ASMJIT_INST_3x(smnegl, Smnegl, Gp, Gp, Gp)
+  ASMJIT_INST_4x(smsubl, Smsubl, Gp, Gp, Gp, Gp)
+  ASMJIT_INST_3x(smulh, Smulh, Gp, Gp, Gp)
+  ASMJIT_INST_3x(smull, Smull, Gp, Gp, Gp)
+
+  ASMJIT_INST_3x(sub, Sub, Gp, Gp, Gp)
+  ASMJIT_INST_4x(sub, Sub, Gp, Gp, Gp, Imm)
+  ASMJIT_INST_3x(sub, Sub, Gp, Gp, Imm)
+  ASMJIT_INST_4x(sub, Sub, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_3x(subs, Subs, Gp, Gp, Gp)
+  ASMJIT_INST_4x(subs, Subs, Gp, Gp, Gp, Imm)
+  ASMJIT_INST_3x(subs, Subs, Gp, Gp, Imm)
+  ASMJIT_INST_4x(subs, Subs, Gp, Gp, Imm, Imm)
+
+  ASMJIT_INST_1x(svc, Svc, Imm)
+
+  ASMJIT_INST_2x(sxtb, Sxtb, Gp, Gp)
+  ASMJIT_INST_2x(sxth, Sxth, Gp, Gp)
+  ASMJIT_INST_2x(sxtw, Sxtw, Gp, Gp)
+
+  ASMJIT_INST_4x(sys, Sys, Imm, Imm, Imm, Imm)
+  ASMJIT_INST_5x(sys, Sys, Imm, Imm, Imm, Imm, Gp)
+
+  ASMJIT_INST_2x(tlbi, Tlbi, Imm, Gp)
+  ASMJIT_INST_2x(tst, Tst, Gp, Imm)
+  ASMJIT_INST_2x(tst, Tst, Gp, Gp)
+  ASMJIT_INST_3x(tst, Tst, Gp, Gp, Imm)
+
+  ASMJIT_INST_3x(udiv, Udiv, Gp, Gp, Gp)
+
+  ASMJIT_INST_4x(ubfiz, Ubfiz, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_4x(ubfm, Ubfm, Gp, Gp, Imm, Imm)
+  ASMJIT_INST_4x(ubfx, Ubfx, Gp, Gp, Imm, Imm)
+
+  ASMJIT_INST_4x(umaddl, Umaddl, Gp, Gp, Gp, Gp)
+  ASMJIT_INST_3x(umnegl, Umnegl, Gp, Gp, Gp)
+  ASMJIT_INST_4x(umsubl, Umsubl, Gp, Gp, Gp, Gp)
+  ASMJIT_INST_3x(umull, Umull, Gp, Gp, Gp)
+  ASMJIT_INST_3x(umulh, Umulh, Gp, Gp, Gp)
+
+  ASMJIT_INST_2x(uxtb, Uxtb, Gp, Gp)
+  ASMJIT_INST_2x(uxth, Uxth, Gp, Gp)
+
+  ASMJIT_INST_0x(csdb, Csdb)
+  ASMJIT_INST_1x(dcps1, Dcps1, Imm)
+  ASMJIT_INST_1x(dcps2, Dcps2, Imm)
+  ASMJIT_INST_1x(dcps3, Dcps3, Imm)
+  ASMJIT_INST_0x(dgh, Dgh)
+  ASMJIT_INST_0x(pssbb, Pssbb)
+  ASMJIT_INST_0x(ssbb, Ssbb)
+  ASMJIT_INST_1x(udf, Udf, Imm)
+  ASMJIT_INST_1x(setf8, Setf8, Gp)
+  ASMJIT_INST_1x(setf16, Setf16, Gp)
+
+  //! \}
+
+  //! \name ARMv8.4 Instructions
+  //! \{
+
+  ASMJIT_INST_0x(cfinv, Cfinv)
+
+  //! \}
+
+  //! \name ARMv8.5 Instructions
+  //! \{
+
+  ASMJIT_INST_0x(axflag, Axflag)
+  ASMJIT_INST_0x(xaflag, Xaflag)
+
+  //! \}
+
+  //! \name Branch Instructions
+  //! \{
+
+  ASMJIT_INST_1cc(b, B, Imm)
+  ASMJIT_INST_1cc(b, B, Label)
+  ASMJIT_INST_1x(bl, Bl, Imm)
+  ASMJIT_INST_1x(bl, Bl, Label)
+  ASMJIT_INST_1x(blr, Blr, Gp)
+  ASMJIT_INST_1x(br, Br, Gp)
+  ASMJIT_INST_2x(cbz, Cbz, Gp, Imm)
+  ASMJIT_INST_2x(cbz, Cbz, Gp, Label)
+  ASMJIT_INST_2x(cbnz, Cbnz, Gp, Imm)
+  ASMJIT_INST_2x(cbnz, Cbnz, Gp, Label)
+  ASMJIT_INST_3x(tbnz, Tbnz, Gp, Imm, Imm)
+  ASMJIT_INST_3x(tbnz, Tbnz, Gp, Imm, Label)
+  ASMJIT_INST_3x(tbz, Tbz, Gp, Imm, Imm)
+  ASMJIT_INST_3x(tbz, Tbz, Gp, Imm, Label)
+
+  //! \}
+
+  //! \name Load & Store Instructions
+  //! \{
+
+  ASMJIT_INST_3x(cas, Cas, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casa, Casa, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casab, Casab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casah, Casah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casal, Casal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casalb, Casalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casalh, Casalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casb, Casb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(cash, Cash, Gp, Gp, Mem)
+  ASMJIT_INST_3x(casl, Casl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(caslb, Caslb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(caslh, Caslh, Gp, Gp, Mem)
+
+  ASMJIT_INST_5x(casp, Casp, Gp, Gp, Gp, Gp, Mem)
+  ASMJIT_INST_5x(caspa, Caspa, Gp, Gp, Gp, Gp, Mem)
+  ASMJIT_INST_5x(caspal, Caspal, Gp, Gp, Gp, Gp, Mem)
+  ASMJIT_INST_5x(caspl, Caspl, Gp, Gp, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(ldadd, Ldadd, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldadda, Ldadda, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddab, Ldaddab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddah, Ldaddah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddal, Ldaddal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddalb, Ldaddalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddalh, Ldaddalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddb, Ldaddb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddh, Ldaddh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddl, Ldaddl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddlb, Ldaddlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaddlh, Ldaddlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(ldar, Ldar, Gp, Mem)
+  ASMJIT_INST_2x(ldarb, Ldarb, Gp, Mem)
+  ASMJIT_INST_2x(ldarh, Ldarh, Gp, Mem)
+
+  ASMJIT_INST_2x(ldaxr, Ldaxr, Gp, Mem)
+  ASMJIT_INST_2x(ldaxrb, Ldaxrb, Gp, Mem)
+  ASMJIT_INST_2x(ldaxrh, Ldaxrh, Gp, Mem)
+
+  ASMJIT_INST_3x(ldclr, Ldclr, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclra, Ldclra, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrab, Ldclrab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrah, Ldclrah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclral, Ldclral, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclralb, Ldclralb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclralh, Ldclralh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrb, Ldclrb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrh, Ldclrh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrl, Ldclrl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrlb, Ldclrlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldclrlh, Ldclrlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(ldeor, Ldeor, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeora, Ldeora, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorab, Ldeorab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorah, Ldeorah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeoral, Ldeoral, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeoralb, Ldeoralb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeoralh, Ldeoralh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorb, Ldeorb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorh, Ldeorh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorl, Ldeorl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorlb, Ldeorlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldeorlh, Ldeorlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(ldlar, Ldlar, Gp, Mem)
+  ASMJIT_INST_2x(ldlarb, Ldlarb, Gp, Mem)
+  ASMJIT_INST_2x(ldlarh, Ldlarh, Gp, Mem)
+
+  ASMJIT_INST_3x(ldnp, Ldnp, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(ldp, Ldp, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldpsw, Ldpsw, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(ldr, Ldr, Gp, Mem)
+  ASMJIT_INST_2x(ldrb, Ldrb, Gp, Mem)
+  ASMJIT_INST_2x(ldrh, Ldrh, Gp, Mem)
+  ASMJIT_INST_2x(ldrsb, Ldrsb, Gp, Mem)
+  ASMJIT_INST_2x(ldrsh, Ldrsh, Gp, Mem)
+  ASMJIT_INST_2x(ldrsw, Ldrsw, Gp, Mem)
+
+  ASMJIT_INST_3x(ldset, Ldset, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldseta, Ldseta, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetab, Ldsetab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetah, Ldsetah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetal, Ldsetal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetalb, Ldsetalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetalh, Ldsetalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetb, Ldsetb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldseth, Ldseth, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetl, Ldsetl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetlb, Ldsetlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsetlh, Ldsetlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(ldsmax, Ldsmax, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxa, Ldsmaxa, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxab, Ldsmaxab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxah, Ldsmaxah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxal, Ldsmaxal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxalb, Ldsmaxalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxalh, Ldsmaxalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxb, Ldsmaxb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxh, Ldsmaxh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxl, Ldsmaxl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxlb, Ldsmaxlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmaxlh, Ldsmaxlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(ldsmin, Ldsmin, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsmina, Ldsmina, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminab, Ldsminab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminah, Ldsminah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminal, Ldsminal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminalb, Ldsminalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminalh, Ldsminalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminb, Ldsminb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminh, Ldsminh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminl, Ldsminl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminlb, Ldsminlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldsminlh, Ldsminlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(ldtr, Ldtr, Gp, Mem)
+  ASMJIT_INST_2x(ldtrb, Ldtrb, Gp, Mem)
+  ASMJIT_INST_2x(ldtrh, Ldtrh, Gp, Mem)
+  ASMJIT_INST_2x(ldtrsb, Ldtrsb, Gp, Mem)
+  ASMJIT_INST_2x(ldtrsh, Ldtrsh, Gp, Mem)
+  ASMJIT_INST_2x(ldtrsw, Ldtrsw, Gp, Mem)
+
+  ASMJIT_INST_3x(ldumax, Ldumax, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxa, Ldumaxa, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxab, Ldumaxab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxah, Ldumaxah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxal, Ldumaxal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxalb, Ldumaxalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxalh, Ldumaxalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxb, Ldumaxb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxh, Ldumaxh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxl, Ldumaxl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxlb, Ldumaxlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumaxlh, Ldumaxlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(ldumin, Ldumin, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldumina, Ldumina, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminab, Lduminab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminah, Lduminah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminal, Lduminal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminalb, Lduminalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminalh, Lduminalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminb, Lduminb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminh, Lduminh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminl, Lduminl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminlb, Lduminlb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(lduminlh, Lduminlh, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(ldur, Ldur, Gp, Mem)
+  ASMJIT_INST_2x(ldurb, Ldurb, Gp, Mem)
+  ASMJIT_INST_2x(ldurh, Ldurh, Gp, Mem)
+  ASMJIT_INST_2x(ldursb, Ldursb, Gp, Mem)
+  ASMJIT_INST_2x(ldursh, Ldursh, Gp, Mem)
+  ASMJIT_INST_2x(ldursw, Ldursw, Gp, Mem)
+
+  ASMJIT_INST_3x(ldxp, Ldxp, Gp, Gp, Mem)
+  ASMJIT_INST_3x(ldaxp, Ldaxp, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(ldxr, Ldxr, Gp, Mem)
+  ASMJIT_INST_2x(ldxrb, Ldxrb, Gp, Mem)
+  ASMJIT_INST_2x(ldxrh, Ldxrh, Gp, Mem)
+
+  ASMJIT_INST_2x(stadd, Stadd, Gp, Mem)
+  ASMJIT_INST_2x(staddb, Staddb, Gp, Mem)
+  ASMJIT_INST_2x(staddh, Staddh, Gp, Mem)
+  ASMJIT_INST_2x(staddl, Staddl, Gp, Mem)
+  ASMJIT_INST_2x(staddlb, Staddlb, Gp, Mem)
+  ASMJIT_INST_2x(staddlh, Staddlh, Gp, Mem)
+
+  ASMJIT_INST_2x(stclr, Stclr, Gp, Mem)
+  ASMJIT_INST_2x(stclrb, Stclrb, Gp, Mem)
+  ASMJIT_INST_2x(stclrh, Stclrh, Gp, Mem)
+  ASMJIT_INST_2x(stclrl, Stclrl, Gp, Mem)
+  ASMJIT_INST_2x(stclrlb, Stclrlb, Gp, Mem)
+  ASMJIT_INST_2x(stclrlh, Stclrlh, Gp, Mem)
+
+  ASMJIT_INST_2x(steor, Steor, Gp, Mem)
+  ASMJIT_INST_2x(steorb, Steorb, Gp, Mem)
+  ASMJIT_INST_2x(steorh, Steorh, Gp, Mem)
+  ASMJIT_INST_2x(steorl, Steorl, Gp, Mem)
+  ASMJIT_INST_2x(steorlb, Steorlb, Gp, Mem)
+  ASMJIT_INST_2x(steorlh, Steorlh, Gp, Mem)
+
+  ASMJIT_INST_2x(stllr, Stllr, Gp, Mem)
+  ASMJIT_INST_2x(stllrb, Stllrb, Gp, Mem)
+  ASMJIT_INST_2x(stllrh, Stllrh, Gp, Mem)
+
+  ASMJIT_INST_2x(stlr, Stllr, Gp, Mem)
+  ASMJIT_INST_2x(stlrb, Stllrb, Gp, Mem)
+  ASMJIT_INST_2x(stlrh, Stllrh, Gp, Mem)
+
+  ASMJIT_INST_3x(stlxr, Stlxr, Gp, Gp, Mem)
+  ASMJIT_INST_3x(stlxrb, Stlxrb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(stlxrh, Stlxrh, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(stnp, Stnp, Gp, Gp, Mem)
+  ASMJIT_INST_3x(stp, Stp, Gp, Gp, Mem)
+
+  ASMJIT_INST_2x(str, Str, Gp, Mem)
+  ASMJIT_INST_2x(strb, Strb, Gp, Mem)
+  ASMJIT_INST_2x(strh, Strh, Gp, Mem)
+
+  ASMJIT_INST_2x(stset, Stset, Gp, Mem)
+  ASMJIT_INST_2x(stsetb, Stsetb, Gp, Mem)
+  ASMJIT_INST_2x(stseth, Stseth, Gp, Mem)
+  ASMJIT_INST_2x(stsetl, Stsetl, Gp, Mem)
+  ASMJIT_INST_2x(stsetlb, Stsetlb, Gp, Mem)
+  ASMJIT_INST_2x(stsetlh, Stsetlh, Gp, Mem)
+
+  ASMJIT_INST_2x(stsmax, Stsmax, Gp, Mem)
+  ASMJIT_INST_2x(stsmaxb, Stsmaxb, Gp, Mem)
+  ASMJIT_INST_2x(stsmaxh, Stsmaxh, Gp, Mem)
+  ASMJIT_INST_2x(stsmaxl, Stsmaxl, Gp, Mem)
+  ASMJIT_INST_2x(stsmaxlb, Stsmaxlb, Gp, Mem)
+  ASMJIT_INST_2x(stsmaxlh, Stsmaxlh, Gp, Mem)
+
+  ASMJIT_INST_2x(stsmin, Stsmin, Gp, Mem)
+  ASMJIT_INST_2x(stsminb, Stsminb, Gp, Mem)
+  ASMJIT_INST_2x(stsminh, Stsminh, Gp, Mem)
+  ASMJIT_INST_2x(stsminl, Stsminl, Gp, Mem)
+  ASMJIT_INST_2x(stsminlb, Stsminlb, Gp, Mem)
+  ASMJIT_INST_2x(stsminlh, Stsminlh, Gp, Mem)
+
+  ASMJIT_INST_2x(sttr, Sttr, Gp, Mem)
+  ASMJIT_INST_2x(sttrb, Sttrb, Gp, Mem)
+  ASMJIT_INST_2x(sttrh, Sttrh, Gp, Mem)
+
+  ASMJIT_INST_2x(stumax, Stumax, Gp, Mem)
+  ASMJIT_INST_2x(stumaxb, Stumaxb, Gp, Mem)
+  ASMJIT_INST_2x(stumaxh, Stumaxh, Gp, Mem)
+  ASMJIT_INST_2x(stumaxl, Stumaxl, Gp, Mem)
+  ASMJIT_INST_2x(stumaxlb, Stumaxlb, Gp, Mem)
+  ASMJIT_INST_2x(stumaxlh, Stumaxlh, Gp, Mem)
+
+  ASMJIT_INST_2x(stumin, Stumin, Gp, Mem)
+  ASMJIT_INST_2x(stuminb, Stuminb, Gp, Mem)
+  ASMJIT_INST_2x(stuminh, Stuminh, Gp, Mem)
+  ASMJIT_INST_2x(stuminl, Stuminl, Gp, Mem)
+  ASMJIT_INST_2x(stuminlb, Stuminlb, Gp, Mem)
+  ASMJIT_INST_2x(stuminlh, Stuminlh, Gp, Mem)
+
+  ASMJIT_INST_2x(stur, Stur, Gp, Mem)
+  ASMJIT_INST_2x(sturb, Sturb, Gp, Mem)
+  ASMJIT_INST_2x(sturh, Sturh, Gp, Mem)
+
+  ASMJIT_INST_4x(stxp, Stxp, Gp, Gp, Gp, Mem)
+  ASMJIT_INST_4x(stlxp, Stlxp, Gp, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(stxr, Stxr, Gp, Gp, Mem)
+  ASMJIT_INST_3x(stxrb, Stxrb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(stxrh, Stxrh, Gp, Gp, Mem)
+
+  ASMJIT_INST_3x(swp, Swp, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpa, Swpa, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpab, Swpab, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpah, Swpah, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpal, Swpal, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpalb, Swpalb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpalh, Swpalh, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpb, Swpb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swph, Swph, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swpl, Swpl, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swplb, Swplb, Gp, Gp, Mem)
+  ASMJIT_INST_3x(swplh, Swplh, Gp, Gp, Mem)
+  //! \}
+
+  //! \name CRC Instructions (ARMv8.1-A, optional in ARMv8.0-A)
+  //! \{
+
+  ASMJIT_INST_3x(crc32b, Crc32b, Gp, Gp, Gp);
+  ASMJIT_INST_3x(crc32h, Crc32h, Gp, Gp, Gp);
+  ASMJIT_INST_3x(crc32w, Crc32w, Gp, Gp, Gp);
+  ASMJIT_INST_3x(crc32x, Crc32x, Gp, Gp, Gp);
+
+  ASMJIT_INST_3x(crc32cb, Crc32cb, Gp, Gp, Gp);
+  ASMJIT_INST_3x(crc32ch, Crc32ch, Gp, Gp, Gp);
+  ASMJIT_INST_3x(crc32cw, Crc32cw, Gp, Gp, Gp);
+  ASMJIT_INST_3x(crc32cx, Crc32cx, Gp, Gp, Gp);
+
+  //! \}
+
+  //! \name MTE Instructions
+  //! \{
+
+  ASMJIT_INST_2x(autda, Autda, Gp, Gp);
+  ASMJIT_INST_2x(autdb, Autdb, Gp, Gp);
+  ASMJIT_INST_1x(autdza, Autdza, Gp);
+  ASMJIT_INST_1x(autdzb, Autdzb, Gp);
+  ASMJIT_INST_2x(autia, Autia, Gp, Gp);
+  ASMJIT_INST_0x(autia1716, Autia1716);
+  ASMJIT_INST_0x(autiasp, Autiasp);
+  ASMJIT_INST_0x(autiaz, Autiaz);
+  ASMJIT_INST_2x(autib, Autib, Gp, Gp);
+  ASMJIT_INST_0x(autib1716, Autib1716);
+  ASMJIT_INST_0x(autibsp, Autibsp);
+  ASMJIT_INST_0x(autibz, Autibz);
+  ASMJIT_INST_1x(autiza, Autiza, Gp);
+  ASMJIT_INST_1x(autizb, Autizb, Gp);
+
+  ASMJIT_INST_3x(gmi, Gmi, Gp, Gp, Gp);
+
+  ASMJIT_INST_2x(cmpp, Cmpp, Gp, Gp);
+  ASMJIT_INST_4x(addg, Addg, Gp, Gp, Imm, Imm);
+
+  ASMJIT_INST_2x(ldg, Ldg, Gp, Mem)
+  ASMJIT_INST_2x(ldgm, Ldgm, Gp, Mem)
+  ASMJIT_INST_2x(ldraa, Ldraa, Gp, Mem)
+  ASMJIT_INST_2x(ldrab, Ldrab, Gp, Mem)
+
+  ASMJIT_INST_2x(pacda, Pacda, Gp, Gp);
+  ASMJIT_INST_2x(pacdb, Pacdb, Gp, Gp);
+  ASMJIT_INST_1x(pacdza, Pacdza, Gp);
+  ASMJIT_INST_1x(pacdzb, Pacdzb, Gp);
+  ASMJIT_INST_3x(pacga, Pacga, Gp, Gp, Gp);
+
+  ASMJIT_INST_3x(subp, Subp, Gp, Gp, Gp);
+  ASMJIT_INST_3x(subps, Subps, Gp, Gp, Gp);
+  ASMJIT_INST_4x(subg, Subg, Gp, Gp, Imm, Imm);
+
+  ASMJIT_INST_2x(st2g, St2g, Gp, Mem)
+  ASMJIT_INST_2x(stg, Stg, Gp, Mem)
+  ASMJIT_INST_3x(stgp, Stgp, Gp, Gp, Mem)
+  ASMJIT_INST_2x(stgm, Stgm, Gp, Mem)
+  ASMJIT_INST_2x(stzg, Stzg, Gp, Mem)
+  ASMJIT_INST_2x(stz2g, Stz2g, Gp, Mem)
+  ASMJIT_INST_2x(stzgm, Stzgm, Gp, Mem)
+
+  ASMJIT_INST_1x(xpacd, Xpacd, Gp);
+  ASMJIT_INST_1x(xpaci, Xpaci, Gp);
+  ASMJIT_INST_0x(xpaclri, Xpaclri);
+
+  //! \}
+
+  //! \name Hint Instructions
+  //! \{
+
+  ASMJIT_INST_1x(hint, Hint, Imm)
+  ASMJIT_INST_0x(nop, Nop)
+  ASMJIT_INST_0x(sev, Sev)
+  ASMJIT_INST_0x(sevl, Sevl)
+  ASMJIT_INST_0x(wfe, Wfe)
+  ASMJIT_INST_0x(wfi, Wfi)
+  ASMJIT_INST_0x(yield, Yield)
+
+  //! \}
+
+  //! \name SIMD & FP Instructions
+  //! \{
+
+  ASMJIT_INST_2x(abs, Abs_v, Vec, Vec);
+  ASMJIT_INST_3x(add, Add_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(addhn, Addhn_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(addhn2, Addhn2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(addp, Addp_v, Vec, Vec);
+  ASMJIT_INST_3x(addp, Addp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(addv, Addv_v, Vec, Vec);
+  ASMJIT_INST_3x(and_, And_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(bic, Bic_v, Vec, Imm);
+  ASMJIT_INST_3x(bic, Bic_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(bic, Bic_v, Vec, Imm, Imm);
+  ASMJIT_INST_3x(bif, Bif_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(bit, Bit_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(bsl, Bsl_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(cls, Cls_v, Vec, Vec);
+  ASMJIT_INST_2x(clz, Clz_v, Vec, Vec);
+  ASMJIT_INST_3x(cmeq, Cmeq_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(cmeq, Cmeq_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(cmge, Cmge_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(cmge, Cmge_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(cmgt, Cmgt_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(cmgt, Cmgt_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(cmhi, Cmhi_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(cmhs, Cmhs_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(cmle, Cmle_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(cmlt, Cmlt_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(cmtst, Cmtst_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(cnt, Cnt_v, Vec, Vec);
+  ASMJIT_INST_2x(dup, Dup_v, Vec, Gp);
+  ASMJIT_INST_2x(dup, Dup_v, Vec, Vec);
+  ASMJIT_INST_3x(eor, Eor_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(ext, Ext_v, Vec, Vec, Vec, Imm);
+  ASMJIT_INST_3x(fabd, Fabd_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fabs, Fabs_v, Vec, Vec);
+  ASMJIT_INST_3x(facge, Facge_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(facgt, Facgt_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fadd, Fadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(faddp, Faddp_v, Vec, Vec);
+  ASMJIT_INST_3x(faddp, Faddp_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(fccmp, Fccmp_v, Vec, Vec, Imm, Imm);
+  ASMJIT_INST_4x(fccmpe, Fccmpe_v, Vec, Vec, Imm, Imm);
+  ASMJIT_INST_3x(fcmeq, Fcmeq_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fcmeq, Fcmeq_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(fcmge, Fcmge_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fcmge, Fcmge_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(fcmgt, Fcmgt_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fcmgt, Fcmgt_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(fcmle, Fcmle_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(fcmlt, Fcmlt_v, Vec, Vec, Imm);
+  ASMJIT_INST_2x(fcmp, Fcmp_v, Vec, Vec);
+  ASMJIT_INST_2x(fcmp, Fcmp_v, Vec, Imm);
+  ASMJIT_INST_2x(fcmpe, Fcmpe_v, Vec, Vec);
+  ASMJIT_INST_2x(fcmpe, Fcmpe_v, Vec, Imm);
+  ASMJIT_INST_4x(fcsel, Fcsel_v, Vec, Vec, Vec, Imm);
+  ASMJIT_INST_2x(fcvt, Fcvt_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtas, Fcvtas_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtas, Fcvtas_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtau, Fcvtau_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtau, Fcvtau_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtl, Fcvtl_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtl2, Fcvtl2_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtms, Fcvtms_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtms, Fcvtms_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtmu, Fcvtmu_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtmu, Fcvtmu_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtn, Fcvtn_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtn2, Fcvtn2_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtns, Fcvtns_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtns, Fcvtns_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtnu, Fcvtnu_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtnu, Fcvtnu_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtps, Fcvtps_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtps, Fcvtps_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtpu, Fcvtpu_v, Gp, Vec);
+  ASMJIT_INST_2x(fcvtpu, Fcvtpu_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtxn, Fcvtxn_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtxn2, Fcvtxn2_v, Vec, Vec);
+  ASMJIT_INST_2x(fcvtzs, Fcvtzs_v, Gp, Vec);
+  ASMJIT_INST_3x(fcvtzs, Fcvtzs_v, Gp, Vec, Imm);
+  ASMJIT_INST_2x(fcvtzs, Fcvtzs_v, Vec, Vec);
+  ASMJIT_INST_3x(fcvtzs, Fcvtzs_v, Vec, Vec, Imm);
+  ASMJIT_INST_2x(fcvtzu, Fcvtzu_v, Gp, Vec);
+  ASMJIT_INST_3x(fcvtzu, Fcvtzu_v, Gp, Vec, Imm);
+  ASMJIT_INST_2x(fcvtzu, Fcvtzu_v, Vec, Vec);
+  ASMJIT_INST_3x(fcvtzu, Fcvtzu_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(fdiv, Fdiv_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(fmadd, Fmadd_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmax, Fmax_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmaxnm, Fmaxnm_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmaxnmp, Fmaxnmp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fmaxnmp, Fmaxnmp_v, Vec, Vec);
+  ASMJIT_INST_2x(fmaxnmv, Fmaxnmv_v, Vec, Vec);
+  ASMJIT_INST_3x(fmaxp, Fmaxp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fmaxp, Fmaxp_v, Vec, Vec);
+  ASMJIT_INST_2x(fmaxv, Fmaxv_v, Vec, Vec);
+  ASMJIT_INST_3x(fmin, Fmin_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fminnm, Fminnm_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fminnmv, Fminnmv_v, Vec, Vec);
+  ASMJIT_INST_3x(fminnmp, Fminnmp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fminnmp, Fminnmp_v, Vec, Vec);
+  ASMJIT_INST_2x(fminp, Fminp_v, Vec, Vec);
+  ASMJIT_INST_3x(fminp, Fminp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fminv, Fminv_v, Vec, Vec);
+  ASMJIT_INST_3x(fmla, Fmla_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmls, Fmls_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fmov, Fmov_v, Gp, Vec);
+  ASMJIT_INST_2x(fmov, Fmov_v, Vec, Gp);
+  ASMJIT_INST_2x(fmov, Fmov_v, Vec, Vec);
+  ASMJIT_INST_2x(fmov, Fmov_v, Vec, Imm);
+  ASMJIT_INST_4x(fmsub, Fmsub_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmul, Fmul_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmulx, Fmulx_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fneg, Fneg_v, Vec, Vec);
+  ASMJIT_INST_4x(fnmadd, Fnmadd_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_4x(fnmsub, Fnmsub_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fnmul, Fnmul_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(frecpe, Frecpe_v, Vec, Vec);
+  ASMJIT_INST_3x(frecps, Frecps_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(frecpx, Frecpx_v, Vec, Vec);
+  ASMJIT_INST_2x(frint32x, Frint32x_v, Vec, Vec);
+  ASMJIT_INST_2x(frint32z, Frint32z_v, Vec, Vec);
+  ASMJIT_INST_2x(frint64x, Frint64x_v, Vec, Vec);
+  ASMJIT_INST_2x(frint64z, Frint64z_v, Vec, Vec);
+  ASMJIT_INST_2x(frinta, Frinta_v, Vec, Vec);
+  ASMJIT_INST_2x(frinti, Frinti_v, Vec, Vec);
+  ASMJIT_INST_2x(frintm, Frintm_v, Vec, Vec);
+  ASMJIT_INST_2x(frintn, Frintn_v, Vec, Vec);
+  ASMJIT_INST_2x(frintp, Frintp_v, Vec, Vec);
+  ASMJIT_INST_2x(frintx, Frintx_v, Vec, Vec);
+  ASMJIT_INST_2x(frintz, Frintz_v, Vec, Vec);
+  ASMJIT_INST_2x(frsqrte, Frsqrte_v, Vec, Vec);
+  ASMJIT_INST_3x(frsqrts, Frsqrts_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(fsqrt, Fsqrt_v, Vec, Vec);
+  ASMJIT_INST_3x(fsub, Fsub_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(ins, Ins_v, Vec, Gp);
+  ASMJIT_INST_2x(ins, Ins_v, Vec, Vec);
+  ASMJIT_INST_2x(ld1, Ld1_v, Vec, Mem);
+  ASMJIT_INST_3x(ld1, Ld1_v, Vec, Vec, Mem);
+  ASMJIT_INST_4x(ld1, Ld1_v, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_5x(ld1, Ld1_v, Vec, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_2x(ld1r, Ld1r_v, Vec, Mem);
+  ASMJIT_INST_3x(ld2, Ld2_v, Vec, Vec, Mem);
+  ASMJIT_INST_3x(ld2r, Ld2r_v, Vec, Vec, Mem);
+  ASMJIT_INST_4x(ld3, Ld3_v, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_4x(ld3r, Ld3r_v, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_5x(ld4, Ld4_v, Vec, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_5x(ld4r, Ld4r_v, Vec, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_3x(ldnp, Ldnp_v, Vec, Vec, Mem);
+  ASMJIT_INST_3x(ldp, Ldp_v, Vec, Vec, Mem);
+  ASMJIT_INST_2x(ldr, Ldr_v, Vec, Mem);
+  ASMJIT_INST_2x(ldur, Ldur_v, Vec, Mem);
+  ASMJIT_INST_3x(mla, Mla_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(mls, Mls_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(mov, Mov_v, Vec, Vec);
+  ASMJIT_INST_2x(mov, Mov_v, Gp, Vec);
+  ASMJIT_INST_2x(mov, Mov_v, Vec, Gp);
+  ASMJIT_INST_2x(movi, Movi_v, Vec, Imm);
+  ASMJIT_INST_3x(movi, Movi_v, Vec, Imm, Imm);
+  ASMJIT_INST_3x(mul, Mul_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(mvn, Mvn_v, Vec, Vec);
+  ASMJIT_INST_2x(mvni, Mvni_v, Vec, Imm);
+  ASMJIT_INST_3x(mvni, Mvni_v, Vec, Imm, Imm);
+  ASMJIT_INST_2x(neg, Neg_v, Vec, Vec);
+  ASMJIT_INST_2x(not_, Not_v, Vec, Vec);
+  ASMJIT_INST_3x(orn, Orn_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(orr, Orr_v, Vec, Imm);
+  ASMJIT_INST_3x(orr, Orr_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(orr, Orr_v, Vec, Imm, Imm);
+  ASMJIT_INST_3x(pmul, Pmul_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(pmull, Pmull_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(pmull2, Pmull2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(raddhn, Raddhn_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(raddhn2, Raddhn2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(rbit, Rbit_v, Vec, Vec);
+  ASMJIT_INST_2x(rev16, Rev16_v, Vec, Vec);
+  ASMJIT_INST_2x(rev32, Rev32_v, Vec, Vec);
+  ASMJIT_INST_2x(rev64, Rev64_v, Vec, Vec);
+  ASMJIT_INST_3x(rshrn, Rshrn_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(rshrn2, Rshrn2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(rsubhn, Rsubhn_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(rsubhn2, Rsubhn2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(saba, Saba_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sabal, Sabal_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sabal2, Sabal2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sabd, Sabd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sabdl, Sabdl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sabdl2, Sabdl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sadalp, Sadalp_v, Vec, Vec);
+  ASMJIT_INST_3x(saddl, Saddl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(saddl2, Saddl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(saddlp, Saddlp_v, Vec, Vec);
+  ASMJIT_INST_2x(saddlv, Saddlv_v, Vec, Vec);
+  ASMJIT_INST_3x(saddw, Saddw_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(saddw2, Saddw2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(scvtf, Scvtf_v, Vec, Gp);
+  ASMJIT_INST_3x(scvtf, Scvtf_v, Vec, Gp, Imm);
+  ASMJIT_INST_2x(scvtf, Scvtf_v, Vec, Vec);
+  ASMJIT_INST_3x(scvtf, Scvtf_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(shadd, Shadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(shl, Shl_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(shll, Shll_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(shll2, Shll2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(shrn, Shrn_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(shrn2, Shrn2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(shsub, Shsub_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sli, Sli_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(smax, Smax_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(smaxp, Smaxp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(smaxv, Smaxv_v, Vec, Vec);
+  ASMJIT_INST_3x(smin, Smin_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sminp, Sminp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sminv, Sminv_v, Vec, Vec);
+  ASMJIT_INST_3x(smlal, Smlal_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(smlal2, Smlal2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(smlsl, Smlsl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(smlsl2, Smlsl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(smov, Smov_v, Gp, Vec);
+  ASMJIT_INST_3x(smull, Smull_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(smull2, Smull2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sqabs, Sqabs_v, Vec, Vec);
+  ASMJIT_INST_3x(sqadd, Sqadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmlal, Sqdmlal_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmlal2, Sqdmlal2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmlsl, Sqdmlsl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmlsl2, Sqdmlsl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmulh, Sqdmulh_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmull, Sqdmull_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqdmull2, Sqdmull2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sqneg, Sqneg_v, Vec, Vec);
+  ASMJIT_INST_3x(sqrdmulh, Sqrdmulh_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqrshl, Sqrshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqrshrn, Sqrshrn_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqrshrn2, Sqrshrn2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqrshrun, Sqrshrun_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqrshrun2, Sqrshrun2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqshl, Sqshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqshl, Sqshl_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqshlu, Sqshlu_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqshrn, Sqshrn_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqshrn2, Sqshrn2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqshrun, Sqshrun_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqshrun2, Sqshrun2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sqsub, Sqsub_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sqxtn, Sqxtn_v, Vec, Vec);
+  ASMJIT_INST_2x(sqxtn2, Sqxtn2_v, Vec, Vec);
+  ASMJIT_INST_2x(sqxtun, Sqxtun_v, Vec, Vec);
+  ASMJIT_INST_2x(sqxtun2, Sqxtun2_v, Vec, Vec);
+  ASMJIT_INST_3x(srhadd, Srhadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sri, Sri_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(srshl, Srshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(srshr, Srshr_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(srsra, Srsra_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sshl, Sshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sshll, Sshll_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sshll2, Sshll2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(sshr, Sshr_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(ssra, Ssra_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(ssubl, Ssubl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(ssubl2, Ssubl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(ssubw, Ssubw_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(ssubw2, Ssubw2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(st1, St1_v, Vec, Mem);
+  ASMJIT_INST_3x(st1, St1_v, Vec, Vec, Mem);
+  ASMJIT_INST_4x(st1, St1_v, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_5x(st1, St1_v, Vec, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_3x(st2, St2_v, Vec, Vec, Mem);
+  ASMJIT_INST_4x(st3, St3_v, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_5x(st4, St4_v, Vec, Vec, Vec, Vec, Mem);
+  ASMJIT_INST_3x(stnp, Stnp_v, Vec, Vec, Mem);
+  ASMJIT_INST_3x(stp, Stp_v, Vec, Vec, Mem);
+  ASMJIT_INST_2x(str, Str_v, Vec, Mem);
+  ASMJIT_INST_2x(stur, Stur_v, Vec, Mem);
+  ASMJIT_INST_3x(sub, Sub_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(subhn, Subhn_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(subhn2, Subhn2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(suqadd, Suqadd_v, Vec, Vec);
+  ASMJIT_INST_2x(sxtl, Sxtl_v, Vec, Vec);
+  ASMJIT_INST_2x(sxtl2, Sxtl2_v, Vec, Vec);
+  ASMJIT_INST_3x(tbl, Tbl_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(tbl, Tbl_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_5x(tbl, Tbl_v, Vec, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_6x(tbl, Tbl_v, Vec, Vec, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(tbx, Tbx_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(tbx, Tbx_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_5x(tbx, Tbx_v, Vec, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_6x(tbx, Tbx_v, Vec, Vec, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(trn1, Trn1_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(trn2, Trn2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uaba, Uaba_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uabal, Uabal_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uabal2, Uabal2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uabd, Uabd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uabdl, Uabdl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uabdl2, Uabdl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(uadalp, Uadalp_v, Vec, Vec);
+  ASMJIT_INST_3x(uaddl, Uaddl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uaddl2, Uaddl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(uaddlp, Uaddlp_v, Vec, Vec);
+  ASMJIT_INST_2x(uaddlv, Uaddlv_v, Vec, Vec);
+  ASMJIT_INST_3x(uaddw, Uaddw_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uaddw2, Uaddw2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(ucvtf, Ucvtf_v, Vec, Gp);
+  ASMJIT_INST_3x(ucvtf, Ucvtf_v, Vec, Gp, Imm);
+  ASMJIT_INST_2x(ucvtf, Ucvtf_v, Vec, Vec);
+  ASMJIT_INST_3x(ucvtf, Ucvtf_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uhadd, Uhadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uhsub, Uhsub_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(umax, Umax_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(umaxp, Umaxp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(umaxv, Umaxv_v, Vec, Vec);
+  ASMJIT_INST_3x(umin, Umin_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uminp, Uminp_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(uminv, Uminv_v, Vec, Vec);
+  ASMJIT_INST_3x(umlal, Umlal_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(umlal2, Umlal2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(umlsl, Umlsl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(umlsl2, Umlsl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(umov, Umov_v, Gp, Vec);
+  ASMJIT_INST_3x(umull, Umull_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(umull2, Umull2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uqadd, Uqadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uqrshl, Uqrshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uqrshl, Uqrshl_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uqrshrn, Uqrshrn_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uqrshrn2, Uqrshrn2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uqshl, Uqshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uqshl, Uqshl_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uqshrn, Uqshrn_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uqshrn2, Uqshrn2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(uqsub, Uqsub_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(uqxtn, Uqxtn_v, Vec, Vec);
+  ASMJIT_INST_2x(uqxtn2, Uqxtn2_v, Vec, Vec);
+  ASMJIT_INST_2x(urecpe, Urecpe_v, Vec, Vec);
+  ASMJIT_INST_3x(urhadd, Urhadd_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(urshl, Urshl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(urshr, Urshr_v, Vec, Vec, Imm);
+  ASMJIT_INST_2x(ursqrte, Ursqrte_v, Vec, Vec);
+  ASMJIT_INST_3x(ursra, Ursra_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(ushl, Ushl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(ushll, Ushll_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(ushll2, Ushll2_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(ushr, Ushr_v, Vec, Vec, Imm);
+  ASMJIT_INST_2x(usqadd, Usqadd_v, Vec, Vec);
+  ASMJIT_INST_3x(usra, Usra_v, Vec, Vec, Imm);
+  ASMJIT_INST_3x(usubl, Usubl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(usubl2, Usubl2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(usubw, Usubw_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(usubw2, Usubw2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(uxtl, Uxtl_v, Vec, Vec);
+  ASMJIT_INST_2x(uxtl2, Uxtl2_v, Vec, Vec);
+  ASMJIT_INST_3x(uzp1, Uzp1_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(uzp2, Uzp2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(xtn, Xtn_v, Vec, Vec);
+  ASMJIT_INST_2x(xtn2, Xtn2_v, Vec, Vec);
+  ASMJIT_INST_3x(zip1, Zip1_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(zip2, Zip2_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name AES Instructions
+  //! \{
+
+  ASMJIT_INST_2x(aesd, Aesd_v, Vec, Vec);
+  ASMJIT_INST_2x(aese, Aese_v, Vec, Vec);
+  ASMJIT_INST_2x(aesimc, Aesimc_v, Vec, Vec);
+  ASMJIT_INST_2x(aesmc, Aesmc_v, Vec, Vec);
+
+  //! \}
+
+  //! \name SHA1 Instructions
+  //! \{
+
+  ASMJIT_INST_3x(sha1c, Sha1c_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sha1h, Sha1h_v, Vec, Vec);
+  ASMJIT_INST_3x(sha1m, Sha1m_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sha1p, Sha1p_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sha1su0, Sha1su0_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sha1su1, Sha1su1_v, Vec, Vec);
+
+  //! \}
+
+  //! \name SHA2 Instructions
+  //! \{
+
+  ASMJIT_INST_3x(sha256h, Sha256h_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sha256h2, Sha256h2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sha256su0, Sha256su0_v, Vec, Vec);
+  ASMJIT_INST_3x(sha256su1, Sha256su1_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name RDMA Instructions (ARMv8.1-A)
+  //! \{
+
+  ASMJIT_INST_3x(sqrdmlah, Sqrdmlah_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sqrdmlsh, Sqrdmlsh_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name FCMA Instruction (ARMv8.3-A)
+  //! \{
+
+  ASMJIT_INST_4x(fcadd, Fcadd_v, Vec, Vec, Vec, Imm);
+  ASMJIT_INST_4x(fcmla, Fcmla_v, Vec, Vec, Vec, Imm);
+
+  //! \}
+
+  //! \name FJCVTZS Instruction (ARMv8.3-A)
+  //! \{
+
+  ASMJIT_INST_2x(fjcvtzs, Fjcvtzs_v, Gp, Vec);
+
+  //! \}
+
+  //! \name FP16FML Instructions (ARMv8.4-A, optional in ARMv8.2-A)
+  //! \{
+
+  ASMJIT_INST_3x(fmlal, Fmlal_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmlal2, Fmlal2_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmlsl, Fmlsl_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(fmlsl2, Fmlsl2_v, Vec, Vec, Vec);
+
+
+  //! \}
+
+  //! \name SHA3 Instructions (ARMv8.4-A, optional in ARMv8.2-A)
+  //! \{
+
+  ASMJIT_INST_4x(bcax, Bcax_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_4x(eor3, Eor3_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(rax1, Rax1_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(xar, Xar_v, Vec, Vec, Vec, Imm);
+
+  //! \}
+
+  //! \name SHA512 Instructions (ARMv8.4-A)
+  //! \{
+
+  ASMJIT_INST_3x(sha512h, Sha512h_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sha512h2, Sha512h2_v, Vec, Vec, Vec);
+  ASMJIT_INST_2x(sha512su0, Sha512su0_v, Vec, Vec);
+  ASMJIT_INST_3x(sha512su1, Sha512su1_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name SM3 Instructions (ARMv8.4-A)
+  //! \{
+
+  ASMJIT_INST_3x(sm3partw1, Sm3partw1_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sm3partw2, Sm3partw2_v, Vec, Vec, Vec);
+  ASMJIT_INST_4x(sm3ss1, Sm3ss1_v, Vec, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sm3tt1a, Sm3tt1a_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sm3tt1b, Sm3tt1b_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sm3tt2a, Sm3tt2a_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sm3tt2b, Sm3tt2b_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name SM4 Instructions (ARMv8.4-A)
+  //! \{
+
+  ASMJIT_INST_2x(sm4e, Sm4e_v, Vec, Vec);
+  ASMJIT_INST_3x(sm4ekey, Sm4ekey_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name DOTPROD Instructions (ARMv8.4-A, optional in ARMv8.2-A)
+  //! \{
+
+  ASMJIT_INST_3x(sdot, Sdot_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(udot, Udot_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name BF16 Instructions (ARMv8.6-A)
+  //! \{
+
+  ASMJIT_INST_2x(bfcvt, Bfcvt_v, Vec, Vec);
+  ASMJIT_INST_2x(bfcvtn, Bfcvtn_v, Vec, Vec);
+  ASMJIT_INST_2x(bfcvtn2, Bfcvtn2_v, Vec, Vec);
+  ASMJIT_INST_3x(bfmlalb, Bfmlalb_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(bfmlalt, Bfmlalt_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(bfmmla, Bfmmla_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(bfdot, Bfdot_v, Vec, Vec, Vec);
+
+  //! \}
+
+  //! \name I8MM Instructions (ARMv8.6-A)
+  //! \{
+
+  ASMJIT_INST_3x(smmla, Smmla_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(sudot, Sudot_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(ummla, Ummla_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(usdot, Usdot_v, Vec, Vec, Vec);
+  ASMJIT_INST_3x(usmmla, Usmmla_v, Vec, Vec, Vec);
+
+  //! \}
+};
+
+//! Emitter (ARM).
+//!
+//! \note This class cannot be instantiated, you can only cast to it and use it as emitter that emits to either
+//! `a64::Assembler`, `a64::Builder`, or `a64::Compiler` (use with caution with `a64::Compiler` as it requires
+//! virtual registers).
+class Emitter : public BaseEmitter, public EmitterExplicitT<Emitter> {
+  ASMJIT_NONCONSTRUCTIBLE(Emitter)
+};
+
+//! \}
+
+#undef ASMJIT_INST_0x
+#undef ASMJIT_INST_1x
+#undef ASMJIT_INST_2x
+#undef ASMJIT_INST_3x
+#undef ASMJIT_INST_4x
+#undef ASMJIT_INST_5x
+#undef ASMJIT_INST_6x
+#undef ASMJIT_INST_1cc
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64EMITTER_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64formatter.cpp b/lib/lepton/asmjit/arm/a64formatter.cpp
new file mode 100644
index 0000000000..bccb68b99b
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64formatter.cpp
@@ -0,0 +1,298 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../arm/a64formatter_p.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64instdb_p.h"
+#include "../arm/a64operand.h"
+
+#ifndef ASMJIT_NO_COMPILER
+  #include "../core/compiler.h"
+#endif
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::FormatterInternal - Format Register
+// ========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatRegister(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  RegType regType,
+  uint32_t rId,
+  uint32_t elementType,
+  uint32_t elementIndex) noexcept {
+
+  DebugUtils::unused(flags);
+  DebugUtils::unused(arch);
+
+  static const char bhsdq[] = "bhsdq";
+
+  bool virtRegFormatted = false;
+
+#ifndef ASMJIT_NO_COMPILER
+  if (Operand::isVirtId(rId)) {
+    if (emitter && emitter->isCompiler()) {
+      const BaseCompiler* cc = static_cast<const BaseCompiler*>(emitter);
+      if (cc->isVirtIdValid(rId)) {
+        VirtReg* vReg = cc->virtRegById(rId);
+        ASMJIT_ASSERT(vReg != nullptr);
+
+        const char* name = vReg->name();
+        if (name && name[0] != '\0')
+          ASMJIT_PROPAGATE(sb.append(name));
+        else
+          ASMJIT_PROPAGATE(sb.appendFormat("%%%u", unsigned(Operand::virtIdToIndex(rId))));
+
+        virtRegFormatted = true;
+      }
+    }
+  }
+#else
+  DebugUtils::unused(emitter, flags);
+#endif
+
+  if (!virtRegFormatted) {
+    char letter = '\0';
+    switch (regType) {
+      case RegType::kARM_GpW:
+        if (rId == Gp::kIdZr)
+          return sb.append("wzr");
+        if (rId == Gp::kIdSp)
+          return sb.append("wsp");
+
+        letter = 'w';
+        break;
+
+      case RegType::kARM_GpX:
+        if (rId == Gp::kIdZr)
+          return sb.append("xzr");
+        if (rId == Gp::kIdSp)
+          return sb.append("sp");
+
+        letter = 'x';
+        break;
+
+      case RegType::kARM_VecB:
+      case RegType::kARM_VecH:
+      case RegType::kARM_VecS:
+      case RegType::kARM_VecD:
+      case RegType::kARM_VecV:
+        letter = bhsdq[uint32_t(regType) - uint32_t(RegType::kARM_VecB)];
+        if (elementType)
+          letter = 'v';
+        break;
+
+      default:
+        ASMJIT_PROPAGATE(sb.appendFormat("<Reg-%u>?$u", uint32_t(regType), rId));
+        break;
+    }
+
+    if (letter)
+      ASMJIT_PROPAGATE(sb.appendFormat("%c%u", letter, rId));
+  }
+
+  if (elementType) {
+    char elementLetter = '\0';
+    uint32_t elementCount = 0;
+
+    switch (elementType) {
+      case Vec::kElementTypeB:
+        elementLetter = 'b';
+        elementCount = 16;
+        break;
+
+      case Vec::kElementTypeH:
+        elementLetter = 'h';
+        elementCount = 8;
+        break;
+
+      case Vec::kElementTypeS:
+        elementLetter = 's';
+        elementCount = 4;
+        break;
+
+      case Vec::kElementTypeD:
+        elementLetter = 'd';
+        elementCount = 2;
+        break;
+
+      default:
+        return sb.append(".<Unknown>");
+    }
+
+    if (elementLetter) {
+      if (elementIndex == 0xFFFFFFFFu) {
+        if (regType == RegType::kARM_VecD)
+          elementCount /= 2u;
+        ASMJIT_PROPAGATE(sb.appendFormat(".%u%c", elementCount, elementLetter));
+      }
+      else {
+        ASMJIT_PROPAGATE(sb.appendFormat(".%c[%u]", elementLetter, elementIndex));
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+// a64::FormatterInternal - Format Operand
+// =======================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatOperand(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept {
+
+  if (op.isReg()) {
+    const BaseReg& reg = op.as<BaseReg>();
+
+    uint32_t elementType = op.as<Vec>().elementType();
+    uint32_t elementIndex = op.as<Vec>().elementIndex();
+
+    if (!op.as<Vec>().hasElementIndex())
+      elementIndex = 0xFFFFFFFFu;
+
+    return formatRegister(sb, flags, emitter, arch, reg.type(), reg.id(), elementType, elementIndex);
+  }
+
+  if (op.isMem()) {
+    const Mem& m = op.as<Mem>();
+    ASMJIT_PROPAGATE(sb.append('['));
+
+    if (m.hasBase()) {
+      if (m.hasBaseLabel()) {
+        ASMJIT_PROPAGATE(Formatter::formatLabel(sb, flags, emitter, m.baseId()));
+      }
+      else {
+        FormatFlags modifiedFlags = flags;
+        if (m.isRegHome()) {
+          ASMJIT_PROPAGATE(sb.append('&'));
+          modifiedFlags &= ~FormatFlags::kRegCasts;
+        }
+        ASMJIT_PROPAGATE(formatRegister(sb, modifiedFlags, emitter, arch, m.baseType(), m.baseId()));
+      }
+    }
+    else {
+      // ARM really requires base.
+      if (m.hasIndex() || m.hasOffset()) {
+        ASMJIT_PROPAGATE(sb.append("<None>"));
+      }
+    }
+
+    // The post index makes it look like there was another operand, but it's
+    // still the part of AsmJit's `arm::Mem` operand so it's consistent with
+    // other architectures.
+    if (m.isPostIndex())
+      ASMJIT_PROPAGATE(sb.append(']'));
+
+    if (m.hasIndex()) {
+      ASMJIT_PROPAGATE(sb.append(", "));
+      ASMJIT_PROPAGATE(formatRegister(sb, flags, emitter, arch, m.indexType(), m.indexId()));
+    }
+
+    if (m.hasOffset()) {
+      ASMJIT_PROPAGATE(sb.append(", "));
+
+      int64_t off = int64_t(m.offset());
+      uint32_t base = 10;
+
+      if (Support::test(flags, FormatFlags::kHexOffsets) && uint64_t(off) > 9)
+        base = 16;
+
+      if (base == 10) {
+        ASMJIT_PROPAGATE(sb.appendInt(off, base));
+      }
+      else {
+        ASMJIT_PROPAGATE(sb.append("0x"));
+        ASMJIT_PROPAGATE(sb.appendUInt(uint64_t(off), base));
+      }
+    }
+
+    if (m.hasShift()) {
+      ASMJIT_PROPAGATE(sb.append(' '));
+      if (!m.isPreOrPost())
+        ASMJIT_PROPAGATE(formatShiftOp(sb, (ShiftOp)m.predicate()));
+      ASMJIT_PROPAGATE(sb.appendFormat(" %u", m.shift()));
+    }
+
+    if (!m.isPostIndex())
+      ASMJIT_PROPAGATE(sb.append(']'));
+
+    if (m.isPreIndex())
+      ASMJIT_PROPAGATE(sb.append('!'));
+
+    return kErrorOk;
+  }
+
+  if (op.isImm()) {
+    const Imm& i = op.as<Imm>();
+    int64_t val = i.value();
+
+    if (Support::test(flags, FormatFlags::kHexImms) && uint64_t(val) > 9) {
+      ASMJIT_PROPAGATE(sb.append("0x"));
+      return sb.appendUInt(uint64_t(val), 16);
+    }
+    else {
+      return sb.appendInt(val, 10);
+    }
+  }
+
+  if (op.isLabel()) {
+    return Formatter::formatLabel(sb, flags, emitter, op.id());
+  }
+
+  return sb.append("<None>");
+}
+
+// a64::FormatterInternal - Format Instruction
+// ===========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatInstruction(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept {
+
+  DebugUtils::unused(arch);
+
+  // Format instruction options and instruction mnemonic.
+  InstId instId = inst.realId();
+  if (instId < Inst::_kIdCount)
+    ASMJIT_PROPAGATE(InstInternal::instIdToString(arch, instId, sb));
+  else
+    ASMJIT_PROPAGATE(sb.appendFormat("[InstId=#%u]", unsigned(instId)));
+
+  CondCode cc = inst.armCondCode();
+  if (cc != CondCode::kAL) {
+    ASMJIT_PROPAGATE(sb.append('.'));
+    ASMJIT_PROPAGATE(formatCondCode(sb, cc));
+  }
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand_& op = operands[i];
+    if (op.isNone())
+      break;
+
+    ASMJIT_PROPAGATE(sb.append(i == 0 ? " " : ", "));
+    ASMJIT_PROPAGATE(formatOperand(sb, flags, emitter, arch, op));
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
diff --git a/lib/lepton/asmjit/arm/a64formatter_p.h b/lib/lepton/asmjit/arm/a64formatter_p.h
new file mode 100644
index 0000000000..bd7a1440cb
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64formatter_p.h
@@ -0,0 +1,59 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64FORMATTER_P_H_INCLUDED
+#define ASMJIT_ARM_A64FORMATTER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/formatter.h"
+#include "../core/string.h"
+#include "../arm/armformatter_p.h"
+#include "../arm/a64globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+namespace FormatterInternal {
+
+using namespace arm::FormatterInternal;
+
+Error ASMJIT_CDECL formatRegister(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  RegType regType,
+  uint32_t regId,
+  uint32_t elementType = 0,
+  uint32_t elementIndex = 0xFFFFFFFFu) noexcept;
+
+Error ASMJIT_CDECL formatOperand(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept;
+
+Error ASMJIT_CDECL formatInstruction(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept;
+
+} // {FormatterInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
+#endif // ASMJIT_ARM_A64FORMATTER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64func.cpp b/lib/lepton/asmjit/arm/a64func.cpp
new file mode 100644
index 0000000000..55e3f2e71e
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64func.cpp
@@ -0,0 +1,189 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../arm/a64func_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+namespace FuncInternal {
+
+static inline bool shouldThreatAsCDecl(CallConvId ccId) noexcept {
+  return ccId == CallConvId::kCDecl ||
+         ccId == CallConvId::kStdCall ||
+         ccId == CallConvId::kFastCall ||
+         ccId == CallConvId::kVectorCall ||
+         ccId == CallConvId::kThisCall ||
+         ccId == CallConvId::kRegParm1 ||
+         ccId == CallConvId::kRegParm2 ||
+         ccId == CallConvId::kRegParm3;
+}
+
+static RegType regTypeFromFpOrVecTypeId(TypeId typeId) noexcept {
+  if (typeId == TypeId::kFloat32)
+    return RegType::kARM_VecS;
+  else if (typeId == TypeId::kFloat64)
+    return RegType::kARM_VecD;
+  else if (TypeUtils::isVec32(typeId))
+    return RegType::kARM_VecS;
+  else if (TypeUtils::isVec64(typeId))
+    return RegType::kARM_VecD;
+  else if (TypeUtils::isVec128(typeId))
+    return RegType::kARM_VecV;
+  else
+    return RegType::kNone;
+}
+
+ASMJIT_FAVOR_SIZE Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept {
+  cc.setArch(environment.arch());
+
+  cc.setSaveRestoreRegSize(RegGroup::kGp, 8);
+  cc.setSaveRestoreRegSize(RegGroup::kVec, 8);
+  cc.setSaveRestoreAlignment(RegGroup::kGp, 16);
+  cc.setSaveRestoreAlignment(RegGroup::kVec, 16);
+  cc.setSaveRestoreAlignment(RegGroup::kExtraVirt2, 1);
+  cc.setSaveRestoreAlignment(RegGroup::kExtraVirt3, 1);
+  cc.setPassedOrder(RegGroup::kGp, 0, 1, 2, 3, 4, 5, 6, 7);
+  cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
+  cc.setNaturalStackAlignment(16);
+
+  if (shouldThreatAsCDecl(ccId)) {
+    // ARM doesn't have that many calling conventions as we can find in X86 world, treat most conventions as __cdecl.
+    cc.setId(CallConvId::kCDecl);
+    cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(Gp::kIdOs, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30));
+    cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15));
+  }
+  else {
+    cc.setId(ccId);
+    cc.setSaveRestoreRegSize(RegGroup::kVec, 16);
+    cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30));
+    cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31));
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept {
+  DebugUtils::unused(signature);
+
+  const CallConv& cc = func.callConv();
+  uint32_t stackOffset = 0;
+
+  uint32_t i;
+  uint32_t argCount = func.argCount();
+
+  if (func.hasRet()) {
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      TypeId typeId = func._rets[valueIndex].typeId();
+
+      // Terminate at the first void type (end of the pack).
+      if (typeId == TypeId::kVoid)
+        break;
+
+      switch (typeId) {
+        case TypeId::kInt8:
+        case TypeId::kInt16:
+        case TypeId::kInt32: {
+          func._rets[valueIndex].initReg(RegType::kARM_GpW, valueIndex, TypeId::kInt32);
+          break;
+        }
+
+        case TypeId::kUInt8:
+        case TypeId::kUInt16:
+        case TypeId::kUInt32: {
+          func._rets[valueIndex].initReg(RegType::kARM_GpW, valueIndex, TypeId::kUInt32);
+          break;
+        }
+
+        case TypeId::kInt64:
+        case TypeId::kUInt64: {
+          func._rets[valueIndex].initReg(RegType::kARM_GpX, valueIndex, typeId);
+          break;
+        }
+
+        default: {
+          RegType regType = regTypeFromFpOrVecTypeId(typeId);
+          if (regType == RegType::kNone)
+            return DebugUtils::errored(kErrorInvalidRegType);
+
+          func._rets[valueIndex].initReg(regType, valueIndex, typeId);
+          break;
+        }
+      }
+    }
+  }
+
+  switch (cc.strategy()) {
+    case CallConvStrategy::kDefault: {
+      uint32_t gpzPos = 0;
+      uint32_t vecPos = 0;
+
+      for (i = 0; i < argCount; i++) {
+        FuncValue& arg = func._args[i][0];
+        TypeId typeId = arg.typeId();
+
+        if (TypeUtils::isInt(typeId)) {
+          uint32_t regId = BaseReg::kIdBad;
+
+          if (gpzPos < CallConv::kMaxRegArgsPerGroup)
+            regId = cc._passedOrder[RegGroup::kGp].id[gpzPos];
+
+          if (regId != BaseReg::kIdBad) {
+            RegType regType = typeId <= TypeId::kUInt32 ? RegType::kARM_GpW : RegType::kARM_GpX;
+            arg.assignRegData(regType, regId);
+            func.addUsedRegs(RegGroup::kGp, Support::bitMask(regId));
+            gpzPos++;
+          }
+          else {
+            uint32_t size = Support::max<uint32_t>(TypeUtils::sizeOf(typeId), registerSize);
+            arg.assignStackOffset(int32_t(stackOffset));
+            stackOffset += size;
+          }
+          continue;
+        }
+
+        if (TypeUtils::isFloat(typeId) || TypeUtils::isVec(typeId)) {
+          uint32_t regId = BaseReg::kIdBad;
+
+          if (vecPos < CallConv::kMaxRegArgsPerGroup)
+            regId = cc._passedOrder[RegGroup::kVec].id[vecPos];
+
+          if (regId != BaseReg::kIdBad) {
+            RegType regType = regTypeFromFpOrVecTypeId(typeId);
+            if (regType == RegType::kNone)
+              return DebugUtils::errored(kErrorInvalidRegType);
+
+            arg.initTypeId(typeId);
+            arg.assignRegData(regType, regId);
+            func.addUsedRegs(RegGroup::kVec, Support::bitMask(regId));
+            vecPos++;
+          }
+          else {
+            uint32_t size = TypeUtils::sizeOf(typeId);
+            arg.assignStackOffset(int32_t(stackOffset));
+            stackOffset += size;
+          }
+          continue;
+        }
+      }
+      break;
+    }
+
+    default:
+      return DebugUtils::errored(kErrorInvalidState);
+  }
+
+  func._argStackSize = stackOffset;
+  return kErrorOk;
+}
+
+} // {FuncInternal}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
diff --git a/lib/lepton/asmjit/arm/a64func_p.h b/lib/lepton/asmjit/arm/a64func_p.h
new file mode 100644
index 0000000000..9f531fc5a2
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64func_p.h
@@ -0,0 +1,33 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64FUNC_P_H_INCLUDED
+#define ASMJIT_ARM_A64FUNC_P_H_INCLUDED
+
+#include "../core/func.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64-specific function API (calling conventions and other utilities).
+namespace FuncInternal {
+
+//! Initialize `CallConv` structure (AArch64 specific).
+Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept;
+
+//! Initialize `FuncDetail` (AArch64 specific).
+Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept;
+
+} // {FuncInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64FUNC_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64globals.h b/lib/lepton/asmjit/arm/a64globals.h
new file mode 100644
index 0000000000..2b6b6f0ce9
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64globals.h
@@ -0,0 +1,1894 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64GLOBALS_H_INCLUDED
+#define ASMJIT_ARM_A64GLOBALS_H_INCLUDED
+
+#include "../arm/armglobals.h"
+
+//! \namespace asmjit::a64
+//! \ingroup asmjit_a64
+//!
+//! AArch64 backend.
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64 uses everything from arm namespace and adds into it.
+using namespace arm;
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64 instruction.
+//!
+//! \note Only used to hold ARM-specific enumerations and static functions.
+struct Inst {
+  //! Instruction id.
+  enum Id : uint32_t {
+    // ${InstId:Begin}
+    kIdNone = 0,                         //!< Instruction ''.
+    kIdAdc,                              //!< Instruction 'adc'.
+    kIdAdcs,                             //!< Instruction 'adcs'.
+    kIdAdd,                              //!< Instruction 'add'.
+    kIdAddg,                             //!< Instruction 'addg'.
+    kIdAdds,                             //!< Instruction 'adds'.
+    kIdAdr,                              //!< Instruction 'adr'.
+    kIdAdrp,                             //!< Instruction 'adrp'.
+    kIdAnd,                              //!< Instruction 'and'.
+    kIdAnds,                             //!< Instruction 'ands'.
+    kIdAsr,                              //!< Instruction 'asr'.
+    kIdAsrv,                             //!< Instruction 'asrv'.
+    kIdAt,                               //!< Instruction 'at'.
+    kIdAutda,                            //!< Instruction 'autda'.
+    kIdAutdza,                           //!< Instruction 'autdza'.
+    kIdAutdb,                            //!< Instruction 'autdb'.
+    kIdAutdzb,                           //!< Instruction 'autdzb'.
+    kIdAutia,                            //!< Instruction 'autia'.
+    kIdAutia1716,                        //!< Instruction 'autia1716'.
+    kIdAutiasp,                          //!< Instruction 'autiasp'.
+    kIdAutiaz,                           //!< Instruction 'autiaz'.
+    kIdAutib,                            //!< Instruction 'autib'.
+    kIdAutib1716,                        //!< Instruction 'autib1716'.
+    kIdAutibsp,                          //!< Instruction 'autibsp'.
+    kIdAutibz,                           //!< Instruction 'autibz'.
+    kIdAutiza,                           //!< Instruction 'autiza'.
+    kIdAutizb,                           //!< Instruction 'autizb'.
+    kIdAxflag,                           //!< Instruction 'axflag'.
+    kIdB,                                //!< Instruction 'b'.
+    kIdBfc,                              //!< Instruction 'bfc'.
+    kIdBfi,                              //!< Instruction 'bfi'.
+    kIdBfm,                              //!< Instruction 'bfm'.
+    kIdBfxil,                            //!< Instruction 'bfxil'.
+    kIdBic,                              //!< Instruction 'bic'.
+    kIdBics,                             //!< Instruction 'bics'.
+    kIdBl,                               //!< Instruction 'bl'.
+    kIdBlr,                              //!< Instruction 'blr'.
+    kIdBr,                               //!< Instruction 'br'.
+    kIdBrk,                              //!< Instruction 'brk'.
+    kIdCas,                              //!< Instruction 'cas'.
+    kIdCasa,                             //!< Instruction 'casa'.
+    kIdCasab,                            //!< Instruction 'casab'.
+    kIdCasah,                            //!< Instruction 'casah'.
+    kIdCasal,                            //!< Instruction 'casal'.
+    kIdCasalb,                           //!< Instruction 'casalb'.
+    kIdCasalh,                           //!< Instruction 'casalh'.
+    kIdCasb,                             //!< Instruction 'casb'.
+    kIdCash,                             //!< Instruction 'cash'.
+    kIdCasl,                             //!< Instruction 'casl'.
+    kIdCaslb,                            //!< Instruction 'caslb'.
+    kIdCaslh,                            //!< Instruction 'caslh'.
+    kIdCasp,                             //!< Instruction 'casp'.
+    kIdCaspa,                            //!< Instruction 'caspa'.
+    kIdCaspal,                           //!< Instruction 'caspal'.
+    kIdCaspl,                            //!< Instruction 'caspl'.
+    kIdCbnz,                             //!< Instruction 'cbnz'.
+    kIdCbz,                              //!< Instruction 'cbz'.
+    kIdCcmn,                             //!< Instruction 'ccmn'.
+    kIdCcmp,                             //!< Instruction 'ccmp'.
+    kIdCfinv,                            //!< Instruction 'cfinv'.
+    kIdCinc,                             //!< Instruction 'cinc'.
+    kIdCinv,                             //!< Instruction 'cinv'.
+    kIdClrex,                            //!< Instruction 'clrex'.
+    kIdCls,                              //!< Instruction 'cls'.
+    kIdClz,                              //!< Instruction 'clz'.
+    kIdCmn,                              //!< Instruction 'cmn'.
+    kIdCmp,                              //!< Instruction 'cmp'.
+    kIdCmpp,                             //!< Instruction 'cmpp'.
+    kIdCneg,                             //!< Instruction 'cneg'.
+    kIdCrc32b,                           //!< Instruction 'crc32b'.
+    kIdCrc32cb,                          //!< Instruction 'crc32cb'.
+    kIdCrc32ch,                          //!< Instruction 'crc32ch'.
+    kIdCrc32cw,                          //!< Instruction 'crc32cw'.
+    kIdCrc32cx,                          //!< Instruction 'crc32cx'.
+    kIdCrc32h,                           //!< Instruction 'crc32h'.
+    kIdCrc32w,                           //!< Instruction 'crc32w'.
+    kIdCrc32x,                           //!< Instruction 'crc32x'.
+    kIdCsdb,                             //!< Instruction 'csdb'.
+    kIdCsel,                             //!< Instruction 'csel'.
+    kIdCset,                             //!< Instruction 'cset'.
+    kIdCsetm,                            //!< Instruction 'csetm'.
+    kIdCsinc,                            //!< Instruction 'csinc'.
+    kIdCsinv,                            //!< Instruction 'csinv'.
+    kIdCsneg,                            //!< Instruction 'csneg'.
+    kIdDc,                               //!< Instruction 'dc'.
+    kIdDcps1,                            //!< Instruction 'dcps1'.
+    kIdDcps2,                            //!< Instruction 'dcps2'.
+    kIdDcps3,                            //!< Instruction 'dcps3'.
+    kIdDgh,                              //!< Instruction 'dgh'.
+    kIdDmb,                              //!< Instruction 'dmb'.
+    kIdDrps,                             //!< Instruction 'drps'.
+    kIdDsb,                              //!< Instruction 'dsb'.
+    kIdEon,                              //!< Instruction 'eon'.
+    kIdEor,                              //!< Instruction 'eor'.
+    kIdEsb,                              //!< Instruction 'esb'.
+    kIdExtr,                             //!< Instruction 'extr'.
+    kIdEret,                             //!< Instruction 'eret'.
+    kIdGmi,                              //!< Instruction 'gmi'.
+    kIdHint,                             //!< Instruction 'hint'.
+    kIdHlt,                              //!< Instruction 'hlt'.
+    kIdHvc,                              //!< Instruction 'hvc'.
+    kIdIc,                               //!< Instruction 'ic'.
+    kIdIsb,                              //!< Instruction 'isb'.
+    kIdLdadd,                            //!< Instruction 'ldadd'.
+    kIdLdadda,                           //!< Instruction 'ldadda'.
+    kIdLdaddab,                          //!< Instruction 'ldaddab'.
+    kIdLdaddah,                          //!< Instruction 'ldaddah'.
+    kIdLdaddal,                          //!< Instruction 'ldaddal'.
+    kIdLdaddalb,                         //!< Instruction 'ldaddalb'.
+    kIdLdaddalh,                         //!< Instruction 'ldaddalh'.
+    kIdLdaddb,                           //!< Instruction 'ldaddb'.
+    kIdLdaddh,                           //!< Instruction 'ldaddh'.
+    kIdLdaddl,                           //!< Instruction 'ldaddl'.
+    kIdLdaddlb,                          //!< Instruction 'ldaddlb'.
+    kIdLdaddlh,                          //!< Instruction 'ldaddlh'.
+    kIdLdar,                             //!< Instruction 'ldar'.
+    kIdLdarb,                            //!< Instruction 'ldarb'.
+    kIdLdarh,                            //!< Instruction 'ldarh'.
+    kIdLdaxp,                            //!< Instruction 'ldaxp'.
+    kIdLdaxr,                            //!< Instruction 'ldaxr'.
+    kIdLdaxrb,                           //!< Instruction 'ldaxrb'.
+    kIdLdaxrh,                           //!< Instruction 'ldaxrh'.
+    kIdLdclr,                            //!< Instruction 'ldclr'.
+    kIdLdclra,                           //!< Instruction 'ldclra'.
+    kIdLdclrab,                          //!< Instruction 'ldclrab'.
+    kIdLdclrah,                          //!< Instruction 'ldclrah'.
+    kIdLdclral,                          //!< Instruction 'ldclral'.
+    kIdLdclralb,                         //!< Instruction 'ldclralb'.
+    kIdLdclralh,                         //!< Instruction 'ldclralh'.
+    kIdLdclrb,                           //!< Instruction 'ldclrb'.
+    kIdLdclrh,                           //!< Instruction 'ldclrh'.
+    kIdLdclrl,                           //!< Instruction 'ldclrl'.
+    kIdLdclrlb,                          //!< Instruction 'ldclrlb'.
+    kIdLdclrlh,                          //!< Instruction 'ldclrlh'.
+    kIdLdeor,                            //!< Instruction 'ldeor'.
+    kIdLdeora,                           //!< Instruction 'ldeora'.
+    kIdLdeorab,                          //!< Instruction 'ldeorab'.
+    kIdLdeorah,                          //!< Instruction 'ldeorah'.
+    kIdLdeoral,                          //!< Instruction 'ldeoral'.
+    kIdLdeoralb,                         //!< Instruction 'ldeoralb'.
+    kIdLdeoralh,                         //!< Instruction 'ldeoralh'.
+    kIdLdeorb,                           //!< Instruction 'ldeorb'.
+    kIdLdeorh,                           //!< Instruction 'ldeorh'.
+    kIdLdeorl,                           //!< Instruction 'ldeorl'.
+    kIdLdeorlb,                          //!< Instruction 'ldeorlb'.
+    kIdLdeorlh,                          //!< Instruction 'ldeorlh'.
+    kIdLdg,                              //!< Instruction 'ldg'.
+    kIdLdgm,                             //!< Instruction 'ldgm'.
+    kIdLdlar,                            //!< Instruction 'ldlar'.
+    kIdLdlarb,                           //!< Instruction 'ldlarb'.
+    kIdLdlarh,                           //!< Instruction 'ldlarh'.
+    kIdLdnp,                             //!< Instruction 'ldnp'.
+    kIdLdp,                              //!< Instruction 'ldp'.
+    kIdLdpsw,                            //!< Instruction 'ldpsw'.
+    kIdLdr,                              //!< Instruction 'ldr'.
+    kIdLdraa,                            //!< Instruction 'ldraa'.
+    kIdLdrab,                            //!< Instruction 'ldrab'.
+    kIdLdrb,                             //!< Instruction 'ldrb'.
+    kIdLdrh,                             //!< Instruction 'ldrh'.
+    kIdLdrsb,                            //!< Instruction 'ldrsb'.
+    kIdLdrsh,                            //!< Instruction 'ldrsh'.
+    kIdLdrsw,                            //!< Instruction 'ldrsw'.
+    kIdLdset,                            //!< Instruction 'ldset'.
+    kIdLdseta,                           //!< Instruction 'ldseta'.
+    kIdLdsetab,                          //!< Instruction 'ldsetab'.
+    kIdLdsetah,                          //!< Instruction 'ldsetah'.
+    kIdLdsetal,                          //!< Instruction 'ldsetal'.
+    kIdLdsetalb,                         //!< Instruction 'ldsetalb'.
+    kIdLdsetalh,                         //!< Instruction 'ldsetalh'.
+    kIdLdsetb,                           //!< Instruction 'ldsetb'.
+    kIdLdseth,                           //!< Instruction 'ldseth'.
+    kIdLdsetl,                           //!< Instruction 'ldsetl'.
+    kIdLdsetlb,                          //!< Instruction 'ldsetlb'.
+    kIdLdsetlh,                          //!< Instruction 'ldsetlh'.
+    kIdLdsmax,                           //!< Instruction 'ldsmax'.
+    kIdLdsmaxa,                          //!< Instruction 'ldsmaxa'.
+    kIdLdsmaxab,                         //!< Instruction 'ldsmaxab'.
+    kIdLdsmaxah,                         //!< Instruction 'ldsmaxah'.
+    kIdLdsmaxal,                         //!< Instruction 'ldsmaxal'.
+    kIdLdsmaxalb,                        //!< Instruction 'ldsmaxalb'.
+    kIdLdsmaxalh,                        //!< Instruction 'ldsmaxalh'.
+    kIdLdsmaxb,                          //!< Instruction 'ldsmaxb'.
+    kIdLdsmaxh,                          //!< Instruction 'ldsmaxh'.
+    kIdLdsmaxl,                          //!< Instruction 'ldsmaxl'.
+    kIdLdsmaxlb,                         //!< Instruction 'ldsmaxlb'.
+    kIdLdsmaxlh,                         //!< Instruction 'ldsmaxlh'.
+    kIdLdsmin,                           //!< Instruction 'ldsmin'.
+    kIdLdsmina,                          //!< Instruction 'ldsmina'.
+    kIdLdsminab,                         //!< Instruction 'ldsminab'.
+    kIdLdsminah,                         //!< Instruction 'ldsminah'.
+    kIdLdsminal,                         //!< Instruction 'ldsminal'.
+    kIdLdsminalb,                        //!< Instruction 'ldsminalb'.
+    kIdLdsminalh,                        //!< Instruction 'ldsminalh'.
+    kIdLdsminb,                          //!< Instruction 'ldsminb'.
+    kIdLdsminh,                          //!< Instruction 'ldsminh'.
+    kIdLdsminl,                          //!< Instruction 'ldsminl'.
+    kIdLdsminlb,                         //!< Instruction 'ldsminlb'.
+    kIdLdsminlh,                         //!< Instruction 'ldsminlh'.
+    kIdLdtr,                             //!< Instruction 'ldtr'.
+    kIdLdtrb,                            //!< Instruction 'ldtrb'.
+    kIdLdtrh,                            //!< Instruction 'ldtrh'.
+    kIdLdtrsb,                           //!< Instruction 'ldtrsb'.
+    kIdLdtrsh,                           //!< Instruction 'ldtrsh'.
+    kIdLdtrsw,                           //!< Instruction 'ldtrsw'.
+    kIdLdumax,                           //!< Instruction 'ldumax'.
+    kIdLdumaxa,                          //!< Instruction 'ldumaxa'.
+    kIdLdumaxab,                         //!< Instruction 'ldumaxab'.
+    kIdLdumaxah,                         //!< Instruction 'ldumaxah'.
+    kIdLdumaxal,                         //!< Instruction 'ldumaxal'.
+    kIdLdumaxalb,                        //!< Instruction 'ldumaxalb'.
+    kIdLdumaxalh,                        //!< Instruction 'ldumaxalh'.
+    kIdLdumaxb,                          //!< Instruction 'ldumaxb'.
+    kIdLdumaxh,                          //!< Instruction 'ldumaxh'.
+    kIdLdumaxl,                          //!< Instruction 'ldumaxl'.
+    kIdLdumaxlb,                         //!< Instruction 'ldumaxlb'.
+    kIdLdumaxlh,                         //!< Instruction 'ldumaxlh'.
+    kIdLdumin,                           //!< Instruction 'ldumin'.
+    kIdLdumina,                          //!< Instruction 'ldumina'.
+    kIdLduminab,                         //!< Instruction 'lduminab'.
+    kIdLduminah,                         //!< Instruction 'lduminah'.
+    kIdLduminal,                         //!< Instruction 'lduminal'.
+    kIdLduminalb,                        //!< Instruction 'lduminalb'.
+    kIdLduminalh,                        //!< Instruction 'lduminalh'.
+    kIdLduminb,                          //!< Instruction 'lduminb'.
+    kIdLduminh,                          //!< Instruction 'lduminh'.
+    kIdLduminl,                          //!< Instruction 'lduminl'.
+    kIdLduminlb,                         //!< Instruction 'lduminlb'.
+    kIdLduminlh,                         //!< Instruction 'lduminlh'.
+    kIdLdur,                             //!< Instruction 'ldur'.
+    kIdLdurb,                            //!< Instruction 'ldurb'.
+    kIdLdurh,                            //!< Instruction 'ldurh'.
+    kIdLdursb,                           //!< Instruction 'ldursb'.
+    kIdLdursh,                           //!< Instruction 'ldursh'.
+    kIdLdursw,                           //!< Instruction 'ldursw'.
+    kIdLdxp,                             //!< Instruction 'ldxp'.
+    kIdLdxr,                             //!< Instruction 'ldxr'.
+    kIdLdxrb,                            //!< Instruction 'ldxrb'.
+    kIdLdxrh,                            //!< Instruction 'ldxrh'.
+    kIdLsl,                              //!< Instruction 'lsl'.
+    kIdLslv,                             //!< Instruction 'lslv'.
+    kIdLsr,                              //!< Instruction 'lsr'.
+    kIdLsrv,                             //!< Instruction 'lsrv'.
+    kIdMadd,                             //!< Instruction 'madd'.
+    kIdMneg,                             //!< Instruction 'mneg'.
+    kIdMov,                              //!< Instruction 'mov'.
+    kIdMovk,                             //!< Instruction 'movk'.
+    kIdMovn,                             //!< Instruction 'movn'.
+    kIdMovz,                             //!< Instruction 'movz'.
+    kIdMrs,                              //!< Instruction 'mrs'.
+    kIdMsr,                              //!< Instruction 'msr'.
+    kIdMsub,                             //!< Instruction 'msub'.
+    kIdMul,                              //!< Instruction 'mul'.
+    kIdMvn,                              //!< Instruction 'mvn'.
+    kIdNeg,                              //!< Instruction 'neg'.
+    kIdNegs,                             //!< Instruction 'negs'.
+    kIdNgc,                              //!< Instruction 'ngc'.
+    kIdNgcs,                             //!< Instruction 'ngcs'.
+    kIdNop,                              //!< Instruction 'nop'.
+    kIdOrn,                              //!< Instruction 'orn'.
+    kIdOrr,                              //!< Instruction 'orr'.
+    kIdPacda,                            //!< Instruction 'pacda'.
+    kIdPacdb,                            //!< Instruction 'pacdb'.
+    kIdPacdza,                           //!< Instruction 'pacdza'.
+    kIdPacdzb,                           //!< Instruction 'pacdzb'.
+    kIdPacga,                            //!< Instruction 'pacga'.
+    kIdPssbb,                            //!< Instruction 'pssbb'.
+    kIdRbit,                             //!< Instruction 'rbit'.
+    kIdRet,                              //!< Instruction 'ret'.
+    kIdRev,                              //!< Instruction 'rev'.
+    kIdRev16,                            //!< Instruction 'rev16'.
+    kIdRev32,                            //!< Instruction 'rev32'.
+    kIdRev64,                            //!< Instruction 'rev64'.
+    kIdRor,                              //!< Instruction 'ror'.
+    kIdRorv,                             //!< Instruction 'rorv'.
+    kIdSbc,                              //!< Instruction 'sbc'.
+    kIdSbcs,                             //!< Instruction 'sbcs'.
+    kIdSbfiz,                            //!< Instruction 'sbfiz'.
+    kIdSbfm,                             //!< Instruction 'sbfm'.
+    kIdSbfx,                             //!< Instruction 'sbfx'.
+    kIdSdiv,                             //!< Instruction 'sdiv'.
+    kIdSetf8,                            //!< Instruction 'setf8'.
+    kIdSetf16,                           //!< Instruction 'setf16'.
+    kIdSev,                              //!< Instruction 'sev'.
+    kIdSevl,                             //!< Instruction 'sevl'.
+    kIdSmaddl,                           //!< Instruction 'smaddl'.
+    kIdSmc,                              //!< Instruction 'smc'.
+    kIdSmnegl,                           //!< Instruction 'smnegl'.
+    kIdSmsubl,                           //!< Instruction 'smsubl'.
+    kIdSmulh,                            //!< Instruction 'smulh'.
+    kIdSmull,                            //!< Instruction 'smull'.
+    kIdSsbb,                             //!< Instruction 'ssbb'.
+    kIdSt2g,                             //!< Instruction 'st2g'.
+    kIdStadd,                            //!< Instruction 'stadd'.
+    kIdStaddl,                           //!< Instruction 'staddl'.
+    kIdStaddb,                           //!< Instruction 'staddb'.
+    kIdStaddlb,                          //!< Instruction 'staddlb'.
+    kIdStaddh,                           //!< Instruction 'staddh'.
+    kIdStaddlh,                          //!< Instruction 'staddlh'.
+    kIdStclr,                            //!< Instruction 'stclr'.
+    kIdStclrl,                           //!< Instruction 'stclrl'.
+    kIdStclrb,                           //!< Instruction 'stclrb'.
+    kIdStclrlb,                          //!< Instruction 'stclrlb'.
+    kIdStclrh,                           //!< Instruction 'stclrh'.
+    kIdStclrlh,                          //!< Instruction 'stclrlh'.
+    kIdSteor,                            //!< Instruction 'steor'.
+    kIdSteorl,                           //!< Instruction 'steorl'.
+    kIdSteorb,                           //!< Instruction 'steorb'.
+    kIdSteorlb,                          //!< Instruction 'steorlb'.
+    kIdSteorh,                           //!< Instruction 'steorh'.
+    kIdSteorlh,                          //!< Instruction 'steorlh'.
+    kIdStg,                              //!< Instruction 'stg'.
+    kIdStgm,                             //!< Instruction 'stgm'.
+    kIdStgp,                             //!< Instruction 'stgp'.
+    kIdStllr,                            //!< Instruction 'stllr'.
+    kIdStllrb,                           //!< Instruction 'stllrb'.
+    kIdStllrh,                           //!< Instruction 'stllrh'.
+    kIdStlr,                             //!< Instruction 'stlr'.
+    kIdStlrb,                            //!< Instruction 'stlrb'.
+    kIdStlrh,                            //!< Instruction 'stlrh'.
+    kIdStlxp,                            //!< Instruction 'stlxp'.
+    kIdStlxr,                            //!< Instruction 'stlxr'.
+    kIdStlxrb,                           //!< Instruction 'stlxrb'.
+    kIdStlxrh,                           //!< Instruction 'stlxrh'.
+    kIdStnp,                             //!< Instruction 'stnp'.
+    kIdStp,                              //!< Instruction 'stp'.
+    kIdStr,                              //!< Instruction 'str'.
+    kIdStrb,                             //!< Instruction 'strb'.
+    kIdStrh,                             //!< Instruction 'strh'.
+    kIdStset,                            //!< Instruction 'stset'.
+    kIdStsetl,                           //!< Instruction 'stsetl'.
+    kIdStsetb,                           //!< Instruction 'stsetb'.
+    kIdStsetlb,                          //!< Instruction 'stsetlb'.
+    kIdStseth,                           //!< Instruction 'stseth'.
+    kIdStsetlh,                          //!< Instruction 'stsetlh'.
+    kIdStsmax,                           //!< Instruction 'stsmax'.
+    kIdStsmaxl,                          //!< Instruction 'stsmaxl'.
+    kIdStsmaxb,                          //!< Instruction 'stsmaxb'.
+    kIdStsmaxlb,                         //!< Instruction 'stsmaxlb'.
+    kIdStsmaxh,                          //!< Instruction 'stsmaxh'.
+    kIdStsmaxlh,                         //!< Instruction 'stsmaxlh'.
+    kIdStsmin,                           //!< Instruction 'stsmin'.
+    kIdStsminl,                          //!< Instruction 'stsminl'.
+    kIdStsminb,                          //!< Instruction 'stsminb'.
+    kIdStsminlb,                         //!< Instruction 'stsminlb'.
+    kIdStsminh,                          //!< Instruction 'stsminh'.
+    kIdStsminlh,                         //!< Instruction 'stsminlh'.
+    kIdSttr,                             //!< Instruction 'sttr'.
+    kIdSttrb,                            //!< Instruction 'sttrb'.
+    kIdSttrh,                            //!< Instruction 'sttrh'.
+    kIdStumax,                           //!< Instruction 'stumax'.
+    kIdStumaxl,                          //!< Instruction 'stumaxl'.
+    kIdStumaxb,                          //!< Instruction 'stumaxb'.
+    kIdStumaxlb,                         //!< Instruction 'stumaxlb'.
+    kIdStumaxh,                          //!< Instruction 'stumaxh'.
+    kIdStumaxlh,                         //!< Instruction 'stumaxlh'.
+    kIdStumin,                           //!< Instruction 'stumin'.
+    kIdStuminl,                          //!< Instruction 'stuminl'.
+    kIdStuminb,                          //!< Instruction 'stuminb'.
+    kIdStuminlb,                         //!< Instruction 'stuminlb'.
+    kIdStuminh,                          //!< Instruction 'stuminh'.
+    kIdStuminlh,                         //!< Instruction 'stuminlh'.
+    kIdStur,                             //!< Instruction 'stur'.
+    kIdSturb,                            //!< Instruction 'sturb'.
+    kIdSturh,                            //!< Instruction 'sturh'.
+    kIdStxp,                             //!< Instruction 'stxp'.
+    kIdStxr,                             //!< Instruction 'stxr'.
+    kIdStxrb,                            //!< Instruction 'stxrb'.
+    kIdStxrh,                            //!< Instruction 'stxrh'.
+    kIdStz2g,                            //!< Instruction 'stz2g'.
+    kIdStzg,                             //!< Instruction 'stzg'.
+    kIdStzgm,                            //!< Instruction 'stzgm'.
+    kIdSub,                              //!< Instruction 'sub'.
+    kIdSubg,                             //!< Instruction 'subg'.
+    kIdSubp,                             //!< Instruction 'subp'.
+    kIdSubps,                            //!< Instruction 'subps'.
+    kIdSubs,                             //!< Instruction 'subs'.
+    kIdSvc,                              //!< Instruction 'svc'.
+    kIdSwp,                              //!< Instruction 'swp'.
+    kIdSwpa,                             //!< Instruction 'swpa'.
+    kIdSwpab,                            //!< Instruction 'swpab'.
+    kIdSwpah,                            //!< Instruction 'swpah'.
+    kIdSwpal,                            //!< Instruction 'swpal'.
+    kIdSwpalb,                           //!< Instruction 'swpalb'.
+    kIdSwpalh,                           //!< Instruction 'swpalh'.
+    kIdSwpb,                             //!< Instruction 'swpb'.
+    kIdSwph,                             //!< Instruction 'swph'.
+    kIdSwpl,                             //!< Instruction 'swpl'.
+    kIdSwplb,                            //!< Instruction 'swplb'.
+    kIdSwplh,                            //!< Instruction 'swplh'.
+    kIdSxtb,                             //!< Instruction 'sxtb'.
+    kIdSxth,                             //!< Instruction 'sxth'.
+    kIdSxtw,                             //!< Instruction 'sxtw'.
+    kIdSys,                              //!< Instruction 'sys'.
+    kIdTlbi,                             //!< Instruction 'tlbi'.
+    kIdTst,                              //!< Instruction 'tst'.
+    kIdTbnz,                             //!< Instruction 'tbnz'.
+    kIdTbz,                              //!< Instruction 'tbz'.
+    kIdUbfiz,                            //!< Instruction 'ubfiz'.
+    kIdUbfm,                             //!< Instruction 'ubfm'.
+    kIdUbfx,                             //!< Instruction 'ubfx'.
+    kIdUdf,                              //!< Instruction 'udf'.
+    kIdUdiv,                             //!< Instruction 'udiv'.
+    kIdUmaddl,                           //!< Instruction 'umaddl'.
+    kIdUmnegl,                           //!< Instruction 'umnegl'.
+    kIdUmull,                            //!< Instruction 'umull'.
+    kIdUmulh,                            //!< Instruction 'umulh'.
+    kIdUmsubl,                           //!< Instruction 'umsubl'.
+    kIdUxtb,                             //!< Instruction 'uxtb'.
+    kIdUxth,                             //!< Instruction 'uxth'.
+    kIdWfe,                              //!< Instruction 'wfe'.
+    kIdWfi,                              //!< Instruction 'wfi'.
+    kIdXaflag,                           //!< Instruction 'xaflag'.
+    kIdXpacd,                            //!< Instruction 'xpacd'.
+    kIdXpaci,                            //!< Instruction 'xpaci'.
+    kIdXpaclri,                          //!< Instruction 'xpaclri'.
+    kIdYield,                            //!< Instruction 'yield'.
+    kIdAbs_v,                            //!< Instruction 'abs' {ASIMD}.
+    kIdAdd_v,                            //!< Instruction 'add' {ASIMD}.
+    kIdAddhn_v,                          //!< Instruction 'addhn' {ASIMD}.
+    kIdAddhn2_v,                         //!< Instruction 'addhn2' {ASIMD}.
+    kIdAddp_v,                           //!< Instruction 'addp' {ASIMD}.
+    kIdAddv_v,                           //!< Instruction 'addv' {ASIMD}.
+    kIdAesd_v,                           //!< Instruction 'aesd' {ASIMD}.
+    kIdAese_v,                           //!< Instruction 'aese' {ASIMD}.
+    kIdAesimc_v,                         //!< Instruction 'aesimc' {ASIMD}.
+    kIdAesmc_v,                          //!< Instruction 'aesmc' {ASIMD}.
+    kIdAnd_v,                            //!< Instruction 'and' {ASIMD}.
+    kIdBcax_v,                           //!< Instruction 'bcax' {ASIMD}.
+    kIdBfcvt_v,                          //!< Instruction 'bfcvt' {ASIMD}.
+    kIdBfcvtn_v,                         //!< Instruction 'bfcvtn' {ASIMD}.
+    kIdBfcvtn2_v,                        //!< Instruction 'bfcvtn2' {ASIMD}.
+    kIdBfdot_v,                          //!< Instruction 'bfdot' {ASIMD}.
+    kIdBfmlalb_v,                        //!< Instruction 'bfmlalb' {ASIMD}.
+    kIdBfmlalt_v,                        //!< Instruction 'bfmlalt' {ASIMD}.
+    kIdBfmmla_v,                         //!< Instruction 'bfmmla' {ASIMD}.
+    kIdBic_v,                            //!< Instruction 'bic' {ASIMD}.
+    kIdBif_v,                            //!< Instruction 'bif' {ASIMD}.
+    kIdBit_v,                            //!< Instruction 'bit' {ASIMD}.
+    kIdBsl_v,                            //!< Instruction 'bsl' {ASIMD}.
+    kIdCls_v,                            //!< Instruction 'cls' {ASIMD}.
+    kIdClz_v,                            //!< Instruction 'clz' {ASIMD}.
+    kIdCmeq_v,                           //!< Instruction 'cmeq' {ASIMD}.
+    kIdCmge_v,                           //!< Instruction 'cmge' {ASIMD}.
+    kIdCmgt_v,                           //!< Instruction 'cmgt' {ASIMD}.
+    kIdCmhi_v,                           //!< Instruction 'cmhi' {ASIMD}.
+    kIdCmhs_v,                           //!< Instruction 'cmhs' {ASIMD}.
+    kIdCmle_v,                           //!< Instruction 'cmle' {ASIMD}.
+    kIdCmlt_v,                           //!< Instruction 'cmlt' {ASIMD}.
+    kIdCmtst_v,                          //!< Instruction 'cmtst' {ASIMD}.
+    kIdCnt_v,                            //!< Instruction 'cnt' {ASIMD}.
+    kIdDup_v,                            //!< Instruction 'dup' {ASIMD}.
+    kIdEor_v,                            //!< Instruction 'eor' {ASIMD}.
+    kIdEor3_v,                           //!< Instruction 'eor3' {ASIMD}.
+    kIdExt_v,                            //!< Instruction 'ext' {ASIMD}.
+    kIdFabd_v,                           //!< Instruction 'fabd' {ASIMD}.
+    kIdFabs_v,                           //!< Instruction 'fabs' {ASIMD}.
+    kIdFacge_v,                          //!< Instruction 'facge' {ASIMD}.
+    kIdFacgt_v,                          //!< Instruction 'facgt' {ASIMD}.
+    kIdFadd_v,                           //!< Instruction 'fadd' {ASIMD}.
+    kIdFaddp_v,                          //!< Instruction 'faddp' {ASIMD}.
+    kIdFcadd_v,                          //!< Instruction 'fcadd' {ASIMD}.
+    kIdFccmp_v,                          //!< Instruction 'fccmp' {ASIMD}.
+    kIdFccmpe_v,                         //!< Instruction 'fccmpe' {ASIMD}.
+    kIdFcmeq_v,                          //!< Instruction 'fcmeq' {ASIMD}.
+    kIdFcmge_v,                          //!< Instruction 'fcmge' {ASIMD}.
+    kIdFcmgt_v,                          //!< Instruction 'fcmgt' {ASIMD}.
+    kIdFcmla_v,                          //!< Instruction 'fcmla' {ASIMD}.
+    kIdFcmle_v,                          //!< Instruction 'fcmle' {ASIMD}.
+    kIdFcmlt_v,                          //!< Instruction 'fcmlt' {ASIMD}.
+    kIdFcmp_v,                           //!< Instruction 'fcmp' {ASIMD}.
+    kIdFcmpe_v,                          //!< Instruction 'fcmpe' {ASIMD}.
+    kIdFcsel_v,                          //!< Instruction 'fcsel' {ASIMD}.
+    kIdFcvt_v,                           //!< Instruction 'fcvt' {ASIMD}.
+    kIdFcvtas_v,                         //!< Instruction 'fcvtas' {ASIMD}.
+    kIdFcvtau_v,                         //!< Instruction 'fcvtau' {ASIMD}.
+    kIdFcvtl_v,                          //!< Instruction 'fcvtl' {ASIMD}.
+    kIdFcvtl2_v,                         //!< Instruction 'fcvtl2' {ASIMD}.
+    kIdFcvtms_v,                         //!< Instruction 'fcvtms' {ASIMD}.
+    kIdFcvtmu_v,                         //!< Instruction 'fcvtmu' {ASIMD}.
+    kIdFcvtn_v,                          //!< Instruction 'fcvtn' {ASIMD}.
+    kIdFcvtn2_v,                         //!< Instruction 'fcvtn2' {ASIMD}.
+    kIdFcvtns_v,                         //!< Instruction 'fcvtns' {ASIMD}.
+    kIdFcvtnu_v,                         //!< Instruction 'fcvtnu' {ASIMD}.
+    kIdFcvtps_v,                         //!< Instruction 'fcvtps' {ASIMD}.
+    kIdFcvtpu_v,                         //!< Instruction 'fcvtpu' {ASIMD}.
+    kIdFcvtxn_v,                         //!< Instruction 'fcvtxn' {ASIMD}.
+    kIdFcvtxn2_v,                        //!< Instruction 'fcvtxn2' {ASIMD}.
+    kIdFcvtzs_v,                         //!< Instruction 'fcvtzs' {ASIMD}.
+    kIdFcvtzu_v,                         //!< Instruction 'fcvtzu' {ASIMD}.
+    kIdFdiv_v,                           //!< Instruction 'fdiv' {ASIMD}.
+    kIdFjcvtzs_v,                        //!< Instruction 'fjcvtzs' {ASIMD}.
+    kIdFmadd_v,                          //!< Instruction 'fmadd' {ASIMD}.
+    kIdFmax_v,                           //!< Instruction 'fmax' {ASIMD}.
+    kIdFmaxnm_v,                         //!< Instruction 'fmaxnm' {ASIMD}.
+    kIdFmaxnmp_v,                        //!< Instruction 'fmaxnmp' {ASIMD}.
+    kIdFmaxnmv_v,                        //!< Instruction 'fmaxnmv' {ASIMD}.
+    kIdFmaxp_v,                          //!< Instruction 'fmaxp' {ASIMD}.
+    kIdFmaxv_v,                          //!< Instruction 'fmaxv' {ASIMD}.
+    kIdFmin_v,                           //!< Instruction 'fmin' {ASIMD}.
+    kIdFminnm_v,                         //!< Instruction 'fminnm' {ASIMD}.
+    kIdFminnmp_v,                        //!< Instruction 'fminnmp' {ASIMD}.
+    kIdFminnmv_v,                        //!< Instruction 'fminnmv' {ASIMD}.
+    kIdFminp_v,                          //!< Instruction 'fminp' {ASIMD}.
+    kIdFminv_v,                          //!< Instruction 'fminv' {ASIMD}.
+    kIdFmla_v,                           //!< Instruction 'fmla' {ASIMD}.
+    kIdFmlal_v,                          //!< Instruction 'fmlal' {ASIMD}.
+    kIdFmlal2_v,                         //!< Instruction 'fmlal2' {ASIMD}.
+    kIdFmls_v,                           //!< Instruction 'fmls' {ASIMD}.
+    kIdFmlsl_v,                          //!< Instruction 'fmlsl' {ASIMD}.
+    kIdFmlsl2_v,                         //!< Instruction 'fmlsl2' {ASIMD}.
+    kIdFmov_v,                           //!< Instruction 'fmov' {ASIMD}.
+    kIdFmsub_v,                          //!< Instruction 'fmsub' {ASIMD}.
+    kIdFmul_v,                           //!< Instruction 'fmul' {ASIMD}.
+    kIdFmulx_v,                          //!< Instruction 'fmulx' {ASIMD}.
+    kIdFneg_v,                           //!< Instruction 'fneg' {ASIMD}.
+    kIdFnmadd_v,                         //!< Instruction 'fnmadd' {ASIMD}.
+    kIdFnmsub_v,                         //!< Instruction 'fnmsub' {ASIMD}.
+    kIdFnmul_v,                          //!< Instruction 'fnmul' {ASIMD}.
+    kIdFrecpe_v,                         //!< Instruction 'frecpe' {ASIMD}.
+    kIdFrecps_v,                         //!< Instruction 'frecps' {ASIMD}.
+    kIdFrecpx_v,                         //!< Instruction 'frecpx' {ASIMD}.
+    kIdFrint32x_v,                       //!< Instruction 'frint32x' {ASIMD}.
+    kIdFrint32z_v,                       //!< Instruction 'frint32z' {ASIMD}.
+    kIdFrint64x_v,                       //!< Instruction 'frint64x' {ASIMD}.
+    kIdFrint64z_v,                       //!< Instruction 'frint64z' {ASIMD}.
+    kIdFrinta_v,                         //!< Instruction 'frinta' {ASIMD}.
+    kIdFrinti_v,                         //!< Instruction 'frinti' {ASIMD}.
+    kIdFrintm_v,                         //!< Instruction 'frintm' {ASIMD}.
+    kIdFrintn_v,                         //!< Instruction 'frintn' {ASIMD}.
+    kIdFrintp_v,                         //!< Instruction 'frintp' {ASIMD}.
+    kIdFrintx_v,                         //!< Instruction 'frintx' {ASIMD}.
+    kIdFrintz_v,                         //!< Instruction 'frintz' {ASIMD}.
+    kIdFrsqrte_v,                        //!< Instruction 'frsqrte' {ASIMD}.
+    kIdFrsqrts_v,                        //!< Instruction 'frsqrts' {ASIMD}.
+    kIdFsqrt_v,                          //!< Instruction 'fsqrt' {ASIMD}.
+    kIdFsub_v,                           //!< Instruction 'fsub' {ASIMD}.
+    kIdIns_v,                            //!< Instruction 'ins' {ASIMD}.
+    kIdLd1_v,                            //!< Instruction 'ld1' {ASIMD}.
+    kIdLd1r_v,                           //!< Instruction 'ld1r' {ASIMD}.
+    kIdLd2_v,                            //!< Instruction 'ld2' {ASIMD}.
+    kIdLd2r_v,                           //!< Instruction 'ld2r' {ASIMD}.
+    kIdLd3_v,                            //!< Instruction 'ld3' {ASIMD}.
+    kIdLd3r_v,                           //!< Instruction 'ld3r' {ASIMD}.
+    kIdLd4_v,                            //!< Instruction 'ld4' {ASIMD}.
+    kIdLd4r_v,                           //!< Instruction 'ld4r' {ASIMD}.
+    kIdLdnp_v,                           //!< Instruction 'ldnp' {ASIMD}.
+    kIdLdp_v,                            //!< Instruction 'ldp' {ASIMD}.
+    kIdLdr_v,                            //!< Instruction 'ldr' {ASIMD}.
+    kIdLdur_v,                           //!< Instruction 'ldur' {ASIMD}.
+    kIdMla_v,                            //!< Instruction 'mla' {ASIMD}.
+    kIdMls_v,                            //!< Instruction 'mls' {ASIMD}.
+    kIdMov_v,                            //!< Instruction 'mov' {ASIMD}.
+    kIdMovi_v,                           //!< Instruction 'movi' {ASIMD}.
+    kIdMul_v,                            //!< Instruction 'mul' {ASIMD}.
+    kIdMvn_v,                            //!< Instruction 'mvn' {ASIMD}.
+    kIdMvni_v,                           //!< Instruction 'mvni' {ASIMD}.
+    kIdNeg_v,                            //!< Instruction 'neg' {ASIMD}.
+    kIdNot_v,                            //!< Instruction 'not' {ASIMD}.
+    kIdOrn_v,                            //!< Instruction 'orn' {ASIMD}.
+    kIdOrr_v,                            //!< Instruction 'orr' {ASIMD}.
+    kIdPmul_v,                           //!< Instruction 'pmul' {ASIMD}.
+    kIdPmull_v,                          //!< Instruction 'pmull' {ASIMD}.
+    kIdPmull2_v,                         //!< Instruction 'pmull2' {ASIMD}.
+    kIdRaddhn_v,                         //!< Instruction 'raddhn' {ASIMD}.
+    kIdRaddhn2_v,                        //!< Instruction 'raddhn2' {ASIMD}.
+    kIdRax1_v,                           //!< Instruction 'rax1' {ASIMD}.
+    kIdRbit_v,                           //!< Instruction 'rbit' {ASIMD}.
+    kIdRev16_v,                          //!< Instruction 'rev16' {ASIMD}.
+    kIdRev32_v,                          //!< Instruction 'rev32' {ASIMD}.
+    kIdRev64_v,                          //!< Instruction 'rev64' {ASIMD}.
+    kIdRshrn_v,                          //!< Instruction 'rshrn' {ASIMD}.
+    kIdRshrn2_v,                         //!< Instruction 'rshrn2' {ASIMD}.
+    kIdRsubhn_v,                         //!< Instruction 'rsubhn' {ASIMD}.
+    kIdRsubhn2_v,                        //!< Instruction 'rsubhn2' {ASIMD}.
+    kIdSaba_v,                           //!< Instruction 'saba' {ASIMD}.
+    kIdSabal_v,                          //!< Instruction 'sabal' {ASIMD}.
+    kIdSabal2_v,                         //!< Instruction 'sabal2' {ASIMD}.
+    kIdSabd_v,                           //!< Instruction 'sabd' {ASIMD}.
+    kIdSabdl_v,                          //!< Instruction 'sabdl' {ASIMD}.
+    kIdSabdl2_v,                         //!< Instruction 'sabdl2' {ASIMD}.
+    kIdSadalp_v,                         //!< Instruction 'sadalp' {ASIMD}.
+    kIdSaddl_v,                          //!< Instruction 'saddl' {ASIMD}.
+    kIdSaddl2_v,                         //!< Instruction 'saddl2' {ASIMD}.
+    kIdSaddlp_v,                         //!< Instruction 'saddlp' {ASIMD}.
+    kIdSaddlv_v,                         //!< Instruction 'saddlv' {ASIMD}.
+    kIdSaddw_v,                          //!< Instruction 'saddw' {ASIMD}.
+    kIdSaddw2_v,                         //!< Instruction 'saddw2' {ASIMD}.
+    kIdScvtf_v,                          //!< Instruction 'scvtf' {ASIMD}.
+    kIdSdot_v,                           //!< Instruction 'sdot' {ASIMD}.
+    kIdSha1c_v,                          //!< Instruction 'sha1c' {ASIMD}.
+    kIdSha1h_v,                          //!< Instruction 'sha1h' {ASIMD}.
+    kIdSha1m_v,                          //!< Instruction 'sha1m' {ASIMD}.
+    kIdSha1p_v,                          //!< Instruction 'sha1p' {ASIMD}.
+    kIdSha1su0_v,                        //!< Instruction 'sha1su0' {ASIMD}.
+    kIdSha1su1_v,                        //!< Instruction 'sha1su1' {ASIMD}.
+    kIdSha256h_v,                        //!< Instruction 'sha256h' {ASIMD}.
+    kIdSha256h2_v,                       //!< Instruction 'sha256h2' {ASIMD}.
+    kIdSha256su0_v,                      //!< Instruction 'sha256su0' {ASIMD}.
+    kIdSha256su1_v,                      //!< Instruction 'sha256su1' {ASIMD}.
+    kIdSha512h_v,                        //!< Instruction 'sha512h' {ASIMD}.
+    kIdSha512h2_v,                       //!< Instruction 'sha512h2' {ASIMD}.
+    kIdSha512su0_v,                      //!< Instruction 'sha512su0' {ASIMD}.
+    kIdSha512su1_v,                      //!< Instruction 'sha512su1' {ASIMD}.
+    kIdShadd_v,                          //!< Instruction 'shadd' {ASIMD}.
+    kIdShl_v,                            //!< Instruction 'shl' {ASIMD}.
+    kIdShll_v,                           //!< Instruction 'shll' {ASIMD}.
+    kIdShll2_v,                          //!< Instruction 'shll2' {ASIMD}.
+    kIdShrn_v,                           //!< Instruction 'shrn' {ASIMD}.
+    kIdShrn2_v,                          //!< Instruction 'shrn2' {ASIMD}.
+    kIdShsub_v,                          //!< Instruction 'shsub' {ASIMD}.
+    kIdSli_v,                            //!< Instruction 'sli' {ASIMD}.
+    kIdSm3partw1_v,                      //!< Instruction 'sm3partw1' {ASIMD}.
+    kIdSm3partw2_v,                      //!< Instruction 'sm3partw2' {ASIMD}.
+    kIdSm3ss1_v,                         //!< Instruction 'sm3ss1' {ASIMD}.
+    kIdSm3tt1a_v,                        //!< Instruction 'sm3tt1a' {ASIMD}.
+    kIdSm3tt1b_v,                        //!< Instruction 'sm3tt1b' {ASIMD}.
+    kIdSm3tt2a_v,                        //!< Instruction 'sm3tt2a' {ASIMD}.
+    kIdSm3tt2b_v,                        //!< Instruction 'sm3tt2b' {ASIMD}.
+    kIdSm4e_v,                           //!< Instruction 'sm4e' {ASIMD}.
+    kIdSm4ekey_v,                        //!< Instruction 'sm4ekey' {ASIMD}.
+    kIdSmax_v,                           //!< Instruction 'smax' {ASIMD}.
+    kIdSmaxp_v,                          //!< Instruction 'smaxp' {ASIMD}.
+    kIdSmaxv_v,                          //!< Instruction 'smaxv' {ASIMD}.
+    kIdSmin_v,                           //!< Instruction 'smin' {ASIMD}.
+    kIdSminp_v,                          //!< Instruction 'sminp' {ASIMD}.
+    kIdSminv_v,                          //!< Instruction 'sminv' {ASIMD}.
+    kIdSmlal_v,                          //!< Instruction 'smlal' {ASIMD}.
+    kIdSmlal2_v,                         //!< Instruction 'smlal2' {ASIMD}.
+    kIdSmlsl_v,                          //!< Instruction 'smlsl' {ASIMD}.
+    kIdSmlsl2_v,                         //!< Instruction 'smlsl2' {ASIMD}.
+    kIdSmmla_v,                          //!< Instruction 'smmla' {ASIMD}.
+    kIdSmov_v,                           //!< Instruction 'smov' {ASIMD}.
+    kIdSmull_v,                          //!< Instruction 'smull' {ASIMD}.
+    kIdSmull2_v,                         //!< Instruction 'smull2' {ASIMD}.
+    kIdSqabs_v,                          //!< Instruction 'sqabs' {ASIMD}.
+    kIdSqadd_v,                          //!< Instruction 'sqadd' {ASIMD}.
+    kIdSqdmlal_v,                        //!< Instruction 'sqdmlal' {ASIMD}.
+    kIdSqdmlal2_v,                       //!< Instruction 'sqdmlal2' {ASIMD}.
+    kIdSqdmlsl_v,                        //!< Instruction 'sqdmlsl' {ASIMD}.
+    kIdSqdmlsl2_v,                       //!< Instruction 'sqdmlsl2' {ASIMD}.
+    kIdSqdmulh_v,                        //!< Instruction 'sqdmulh' {ASIMD}.
+    kIdSqdmull_v,                        //!< Instruction 'sqdmull' {ASIMD}.
+    kIdSqdmull2_v,                       //!< Instruction 'sqdmull2' {ASIMD}.
+    kIdSqneg_v,                          //!< Instruction 'sqneg' {ASIMD}.
+    kIdSqrdmlah_v,                       //!< Instruction 'sqrdmlah' {ASIMD}.
+    kIdSqrdmlsh_v,                       //!< Instruction 'sqrdmlsh' {ASIMD}.
+    kIdSqrdmulh_v,                       //!< Instruction 'sqrdmulh' {ASIMD}.
+    kIdSqrshl_v,                         //!< Instruction 'sqrshl' {ASIMD}.
+    kIdSqrshrn_v,                        //!< Instruction 'sqrshrn' {ASIMD}.
+    kIdSqrshrn2_v,                       //!< Instruction 'sqrshrn2' {ASIMD}.
+    kIdSqrshrun_v,                       //!< Instruction 'sqrshrun' {ASIMD}.
+    kIdSqrshrun2_v,                      //!< Instruction 'sqrshrun2' {ASIMD}.
+    kIdSqshl_v,                          //!< Instruction 'sqshl' {ASIMD}.
+    kIdSqshlu_v,                         //!< Instruction 'sqshlu' {ASIMD}.
+    kIdSqshrn_v,                         //!< Instruction 'sqshrn' {ASIMD}.
+    kIdSqshrn2_v,                        //!< Instruction 'sqshrn2' {ASIMD}.
+    kIdSqshrun_v,                        //!< Instruction 'sqshrun' {ASIMD}.
+    kIdSqshrun2_v,                       //!< Instruction 'sqshrun2' {ASIMD}.
+    kIdSqsub_v,                          //!< Instruction 'sqsub' {ASIMD}.
+    kIdSqxtn_v,                          //!< Instruction 'sqxtn' {ASIMD}.
+    kIdSqxtn2_v,                         //!< Instruction 'sqxtn2' {ASIMD}.
+    kIdSqxtun_v,                         //!< Instruction 'sqxtun' {ASIMD}.
+    kIdSqxtun2_v,                        //!< Instruction 'sqxtun2' {ASIMD}.
+    kIdSrhadd_v,                         //!< Instruction 'srhadd' {ASIMD}.
+    kIdSri_v,                            //!< Instruction 'sri' {ASIMD}.
+    kIdSrshl_v,                          //!< Instruction 'srshl' {ASIMD}.
+    kIdSrshr_v,                          //!< Instruction 'srshr' {ASIMD}.
+    kIdSrsra_v,                          //!< Instruction 'srsra' {ASIMD}.
+    kIdSshl_v,                           //!< Instruction 'sshl' {ASIMD}.
+    kIdSshll_v,                          //!< Instruction 'sshll' {ASIMD}.
+    kIdSshll2_v,                         //!< Instruction 'sshll2' {ASIMD}.
+    kIdSshr_v,                           //!< Instruction 'sshr' {ASIMD}.
+    kIdSsra_v,                           //!< Instruction 'ssra' {ASIMD}.
+    kIdSsubl_v,                          //!< Instruction 'ssubl' {ASIMD}.
+    kIdSsubl2_v,                         //!< Instruction 'ssubl2' {ASIMD}.
+    kIdSsubw_v,                          //!< Instruction 'ssubw' {ASIMD}.
+    kIdSsubw2_v,                         //!< Instruction 'ssubw2' {ASIMD}.
+    kIdSt1_v,                            //!< Instruction 'st1' {ASIMD}.
+    kIdSt2_v,                            //!< Instruction 'st2' {ASIMD}.
+    kIdSt3_v,                            //!< Instruction 'st3' {ASIMD}.
+    kIdSt4_v,                            //!< Instruction 'st4' {ASIMD}.
+    kIdStnp_v,                           //!< Instruction 'stnp' {ASIMD}.
+    kIdStp_v,                            //!< Instruction 'stp' {ASIMD}.
+    kIdStr_v,                            //!< Instruction 'str' {ASIMD}.
+    kIdStur_v,                           //!< Instruction 'stur' {ASIMD}.
+    kIdSub_v,                            //!< Instruction 'sub' {ASIMD}.
+    kIdSubhn_v,                          //!< Instruction 'subhn' {ASIMD}.
+    kIdSubhn2_v,                         //!< Instruction 'subhn2' {ASIMD}.
+    kIdSudot_v,                          //!< Instruction 'sudot' {ASIMD}.
+    kIdSuqadd_v,                         //!< Instruction 'suqadd' {ASIMD}.
+    kIdSxtl_v,                           //!< Instruction 'sxtl' {ASIMD}.
+    kIdSxtl2_v,                          //!< Instruction 'sxtl2' {ASIMD}.
+    kIdTbl_v,                            //!< Instruction 'tbl' {ASIMD}.
+    kIdTbx_v,                            //!< Instruction 'tbx' {ASIMD}.
+    kIdTrn1_v,                           //!< Instruction 'trn1' {ASIMD}.
+    kIdTrn2_v,                           //!< Instruction 'trn2' {ASIMD}.
+    kIdUaba_v,                           //!< Instruction 'uaba' {ASIMD}.
+    kIdUabal_v,                          //!< Instruction 'uabal' {ASIMD}.
+    kIdUabal2_v,                         //!< Instruction 'uabal2' {ASIMD}.
+    kIdUabd_v,                           //!< Instruction 'uabd' {ASIMD}.
+    kIdUabdl_v,                          //!< Instruction 'uabdl' {ASIMD}.
+    kIdUabdl2_v,                         //!< Instruction 'uabdl2' {ASIMD}.
+    kIdUadalp_v,                         //!< Instruction 'uadalp' {ASIMD}.
+    kIdUaddl_v,                          //!< Instruction 'uaddl' {ASIMD}.
+    kIdUaddl2_v,                         //!< Instruction 'uaddl2' {ASIMD}.
+    kIdUaddlp_v,                         //!< Instruction 'uaddlp' {ASIMD}.
+    kIdUaddlv_v,                         //!< Instruction 'uaddlv' {ASIMD}.
+    kIdUaddw_v,                          //!< Instruction 'uaddw' {ASIMD}.
+    kIdUaddw2_v,                         //!< Instruction 'uaddw2' {ASIMD}.
+    kIdUcvtf_v,                          //!< Instruction 'ucvtf' {ASIMD}.
+    kIdUdot_v,                           //!< Instruction 'udot' {ASIMD}.
+    kIdUhadd_v,                          //!< Instruction 'uhadd' {ASIMD}.
+    kIdUhsub_v,                          //!< Instruction 'uhsub' {ASIMD}.
+    kIdUmax_v,                           //!< Instruction 'umax' {ASIMD}.
+    kIdUmaxp_v,                          //!< Instruction 'umaxp' {ASIMD}.
+    kIdUmaxv_v,                          //!< Instruction 'umaxv' {ASIMD}.
+    kIdUmin_v,                           //!< Instruction 'umin' {ASIMD}.
+    kIdUminp_v,                          //!< Instruction 'uminp' {ASIMD}.
+    kIdUminv_v,                          //!< Instruction 'uminv' {ASIMD}.
+    kIdUmlal_v,                          //!< Instruction 'umlal' {ASIMD}.
+    kIdUmlal2_v,                         //!< Instruction 'umlal2' {ASIMD}.
+    kIdUmlsl_v,                          //!< Instruction 'umlsl' {ASIMD}.
+    kIdUmlsl2_v,                         //!< Instruction 'umlsl2' {ASIMD}.
+    kIdUmmla_v,                          //!< Instruction 'ummla' {ASIMD}.
+    kIdUmov_v,                           //!< Instruction 'umov' {ASIMD}.
+    kIdUmull_v,                          //!< Instruction 'umull' {ASIMD}.
+    kIdUmull2_v,                         //!< Instruction 'umull2' {ASIMD}.
+    kIdUqadd_v,                          //!< Instruction 'uqadd' {ASIMD}.
+    kIdUqrshl_v,                         //!< Instruction 'uqrshl' {ASIMD}.
+    kIdUqrshrn_v,                        //!< Instruction 'uqrshrn' {ASIMD}.
+    kIdUqrshrn2_v,                       //!< Instruction 'uqrshrn2' {ASIMD}.
+    kIdUqshl_v,                          //!< Instruction 'uqshl' {ASIMD}.
+    kIdUqshrn_v,                         //!< Instruction 'uqshrn' {ASIMD}.
+    kIdUqshrn2_v,                        //!< Instruction 'uqshrn2' {ASIMD}.
+    kIdUqsub_v,                          //!< Instruction 'uqsub' {ASIMD}.
+    kIdUqxtn_v,                          //!< Instruction 'uqxtn' {ASIMD}.
+    kIdUqxtn2_v,                         //!< Instruction 'uqxtn2' {ASIMD}.
+    kIdUrecpe_v,                         //!< Instruction 'urecpe' {ASIMD}.
+    kIdUrhadd_v,                         //!< Instruction 'urhadd' {ASIMD}.
+    kIdUrshl_v,                          //!< Instruction 'urshl' {ASIMD}.
+    kIdUrshr_v,                          //!< Instruction 'urshr' {ASIMD}.
+    kIdUrsqrte_v,                        //!< Instruction 'ursqrte' {ASIMD}.
+    kIdUrsra_v,                          //!< Instruction 'ursra' {ASIMD}.
+    kIdUsdot_v,                          //!< Instruction 'usdot' {ASIMD}.
+    kIdUshl_v,                           //!< Instruction 'ushl' {ASIMD}.
+    kIdUshll_v,                          //!< Instruction 'ushll' {ASIMD}.
+    kIdUshll2_v,                         //!< Instruction 'ushll2' {ASIMD}.
+    kIdUshr_v,                           //!< Instruction 'ushr' {ASIMD}.
+    kIdUsmmla_v,                         //!< Instruction 'usmmla' {ASIMD}.
+    kIdUsqadd_v,                         //!< Instruction 'usqadd' {ASIMD}.
+    kIdUsra_v,                           //!< Instruction 'usra' {ASIMD}.
+    kIdUsubl_v,                          //!< Instruction 'usubl' {ASIMD}.
+    kIdUsubl2_v,                         //!< Instruction 'usubl2' {ASIMD}.
+    kIdUsubw_v,                          //!< Instruction 'usubw' {ASIMD}.
+    kIdUsubw2_v,                         //!< Instruction 'usubw2' {ASIMD}.
+    kIdUxtl_v,                           //!< Instruction 'uxtl' {ASIMD}.
+    kIdUxtl2_v,                          //!< Instruction 'uxtl2' {ASIMD}.
+    kIdUzp1_v,                           //!< Instruction 'uzp1' {ASIMD}.
+    kIdUzp2_v,                           //!< Instruction 'uzp2' {ASIMD}.
+    kIdXar_v,                            //!< Instruction 'xar' {ASIMD}.
+    kIdXtn_v,                            //!< Instruction 'xtn' {ASIMD}.
+    kIdXtn2_v,                           //!< Instruction 'xtn2' {ASIMD}.
+    kIdZip1_v,                           //!< Instruction 'zip1' {ASIMD}.
+    kIdZip2_v,                           //!< Instruction 'zip2' {ASIMD}.
+    _kIdCount
+    // ${InstId:End}
+  };
+
+  //! Tests whether the `instId` is defined (counts also Inst::kIdNone, which must be zero).
+  static inline bool isDefinedId(InstId instId) noexcept { return (instId & uint32_t(InstIdParts::kRealId)) < _kIdCount; }
+};
+
+namespace Predicate {
+
+//! Address translate options (AT).
+namespace AT {
+  static inline constexpr uint32_t encode(uint32_t op1, uint32_t cRn, uint32_t cRm, uint32_t op2) noexcept {
+    return (op1 << 11) | (cRn << 7) | (cRm << 3) | (op2 << 0);
+  }
+
+  enum Value : uint32_t {
+    kS1E1R  = encode(0b000, 0b0111, 0b1000, 0b000),
+    kS1E2R  = encode(0b100, 0b0111, 0b1000, 0b000),
+    kS1E3R  = encode(0b110, 0b0111, 0b1000, 0b000),
+    kS1E1W  = encode(0b000, 0b0111, 0b1000, 0b001),
+    kS1E2W  = encode(0b100, 0b0111, 0b1000, 0b001),
+    kS1E3W  = encode(0b110, 0b0111, 0b1000, 0b001),
+    kS1E0R  = encode(0b000, 0b0111, 0b1000, 0b010),
+    kS1E0W  = encode(0b000, 0b0111, 0b1000, 0b011),
+    kS12E1R = encode(0b100, 0b0111, 0b1000, 0b100),
+    kS12E1W = encode(0b100, 0b0111, 0b1000, 0b101),
+    kS12E0R = encode(0b100, 0b0111, 0b1000, 0b110),
+    kS12E0W = encode(0b100, 0b0111, 0b1000, 0b111),
+    kS1E1RP = encode(0b000, 0b0111, 0b1001, 0b000),
+    kS1E1WP = encode(0b000, 0b0111, 0b1001, 0b001)
+  };
+}
+
+//! Data barrier options (DMB/DSB).
+namespace DB {
+  //! Data barrier immediate values.
+  enum Value : uint32_t {
+    //! Waits only for loads to complete, and only applies to the outer shareable domain.
+    kOSHLD = 0x01u,
+    //! Waits only for stores to complete, and only applies to the outer shareable domain.
+    kOSHST = 0x02u,
+    //! Only applies to the outer shareable domain.
+    kOSH = 0x03u,
+
+    //! Waits only for loads to complete and only applies out to the point of unification.
+    kNSHLD = 0x05u,
+    //! Waits only for stores to complete and only applies out to the point of unification.
+    kNSHST = 0x06u,
+    //! Only applies out to the point of unification.
+    kNSH = 0x07u,
+
+    //! Waits only for loads to complete, and only applies to the inner shareable domain.
+    kISHLD = 0x09u,
+    //! Waits only for stores to complete, and only applies to the inner shareable domain.
+    kISHST = 0x0Au,
+    //! Only applies to the inner shareable domain.
+    kISH = 0x0Bu,
+
+    //! Waits only for loads to complete.
+    kLD = 0x0Du,
+    //! Waits only for stores to complete.
+    kST = 0x0Eu,
+    //! Full system memory barrier operation.
+    kSY = 0x0Fu
+  };
+}
+
+//! Data cache maintenance options.
+namespace DC {
+  static inline constexpr uint32_t encode(uint32_t op1, uint32_t cRn, uint32_t cRm, uint32_t op2) noexcept {
+    return (op1 << 11) | (cRn << 7) | (cRm << 3) | (op2 << 0);
+  }
+
+  //! Data cache maintenance immediate values.
+  enum Value : uint32_t {
+    kZVA     = encode(0b011, 0b0111, 0b0100, 0b001),
+    kIVAC    = encode(0b000, 0b0111, 0b0110, 0b001),
+    kISW     = encode(0b000, 0b0111, 0b0110, 0b010),
+    kCVAC    = encode(0b011, 0b0111, 0b1010, 0b001),
+    kCSW     = encode(0b000, 0b0111, 0b1010, 0b010),
+    kCVAU    = encode(0b011, 0b0111, 0b1011, 0b001),
+    kCIVAC   = encode(0b011, 0b0111, 0b1110, 0b001),
+    kCISW    = encode(0b000, 0b0111, 0b1110, 0b010),
+    kCVAP    = encode(0b011, 0b0111, 0b1100, 0b001),
+    kCVADP   = encode(0b011, 0b0111, 0b1101, 0b001),
+    kIGVAC   = encode(0b000, 0b0111, 0b0110, 0b011),
+    kIGSW    = encode(0b000, 0b0111, 0b0110, 0b100),
+    kCGSW    = encode(0b000, 0b0111, 0b1010, 0b100),
+    kCIGSW   = encode(0b000, 0b0111, 0b1110, 0b100),
+    kCGVAC   = encode(0b011, 0b0111, 0b1010, 0b011),
+    kCGVAP   = encode(0b011, 0b0111, 0b1100, 0b011),
+    kCGVADP  = encode(0b011, 0b0111, 0b1101, 0b011),
+    kCIGVAC  = encode(0b011, 0b0111, 0b1110, 0b011),
+    kGVA     = encode(0b011, 0b0111, 0b0100, 0b011),
+    kIGDVAC  = encode(0b000, 0b0111, 0b0110, 0b101),
+    kIGDSW   = encode(0b000, 0b0111, 0b0110, 0b110),
+    kCGDSW   = encode(0b000, 0b0111, 0b1010, 0b110),
+    kCIGDSW  = encode(0b000, 0b0111, 0b1110, 0b110),
+    kCGDVAC  = encode(0b011, 0b0111, 0b1010, 0b101),
+    kCGDVAP  = encode(0b011, 0b0111, 0b1100, 0b101),
+    kCGDVADP = encode(0b011, 0b0111, 0b1101, 0b101),
+    kCIGDVAC = encode(0b011, 0b0111, 0b1110, 0b101),
+    kGZVA    = encode(0b011, 0b0111, 0b0100, 0b100)
+  };
+}
+
+//! Instruction cache maintenance options.
+namespace IC {
+  static inline constexpr uint32_t encode(uint32_t op1, uint32_t cRn, uint32_t cRm, uint32_t op2) noexcept {
+    return (op1 << 11) | (cRn << 7) | (cRm << 3) | (op2 << 0);
+  }
+
+  //! Instruction cache maintenance immediate values.
+  enum Value : uint32_t {
+    kIALLUIS = encode(0b000, 0b0111, 0b0001, 0b000),
+    kIALLU   = encode(0b000, 0b0111, 0b0101, 0b000),
+    kIVAU    = encode(0b011, 0b0111, 0b0101, 0b001)
+  };
+}
+
+//! Instruction-fetch barrier options.
+namespace ISB {
+  //! Instruction-fetch barrier immediate values.
+  enum Value : uint32_t {
+    kSY = 0xF
+  };
+}
+
+//! Prefetch options.
+namespace PRFOp {
+  //! Prefetch immediate values.
+  enum Value : uint32_t {
+    kPLDL1KEEP = 0x00,
+    kPLDL1STRM = 0x01,
+    kPLDL2KEEP = 0x02,
+    kPLDL2STRM = 0x03,
+    kPLDL3KEEP = 0x04,
+    kPLDL3STRM = 0x05,
+    kPLIL1KEEP = 0x08,
+    kPLIL1STRM = 0x09,
+    kPLIL2KEEP = 0x0A,
+    kPLIL2STRM = 0x0B,
+    kPLIL3KEEP = 0x0C,
+    kPLIL3STRM = 0x0D,
+    kPSTL1KEEP = 0x10,
+    kPSTL1STRM = 0x11,
+    kPSTL2KEEP = 0x12,
+    kPSTL2STRM = 0x13,
+    kPSTL3KEEP = 0x14,
+    kPSTL3STRM = 0x15
+  };
+}
+
+//! PSB instruction options.
+namespace PSB {
+  //! PSB immediate values.
+  enum Value : uint32_t {
+    kCSYNC = 0x11u
+  };
+}
+
+namespace TLBI {
+  static inline constexpr uint32_t encode(uint32_t op1, uint32_t cRn, uint32_t cRm, uint32_t op2) noexcept {
+    return (op1 << 11) | (cRn << 7) | (cRm << 3) | (op2 << 0);
+  }
+
+  enum Value : uint32_t {
+    kIPAS2E1IS    = encode(0b100, 0b1000, 0b0000, 0b001),
+    kIPAS2LE1IS   = encode(0b100, 0b1000, 0b0000, 0b101),
+    kVMALLE1IS    = encode(0b000, 0b1000, 0b0011, 0b000),
+    kALLE2IS      = encode(0b100, 0b1000, 0b0011, 0b000),
+    kALLE3IS      = encode(0b110, 0b1000, 0b0011, 0b000),
+    kVAE1IS       = encode(0b000, 0b1000, 0b0011, 0b001),
+    kVAE2IS       = encode(0b100, 0b1000, 0b0011, 0b001),
+    kVAE3IS       = encode(0b110, 0b1000, 0b0011, 0b001),
+    kASIDE1IS     = encode(0b000, 0b1000, 0b0011, 0b010),
+    kVAAE1IS      = encode(0b000, 0b1000, 0b0011, 0b011),
+    kALLE1IS      = encode(0b100, 0b1000, 0b0011, 0b100),
+    kVALE1IS      = encode(0b000, 0b1000, 0b0011, 0b101),
+    kVALE2IS      = encode(0b100, 0b1000, 0b0011, 0b101),
+    kVALE3IS      = encode(0b110, 0b1000, 0b0011, 0b101),
+    kVMALLS12E1IS = encode(0b100, 0b1000, 0b0011, 0b110),
+    kVAALE1IS     = encode(0b000, 0b1000, 0b0011, 0b111),
+    kIPAS2E1      = encode(0b100, 0b1000, 0b0100, 0b001),
+    kIPAS2LE1     = encode(0b100, 0b1000, 0b0100, 0b101),
+    kVMALLE1      = encode(0b000, 0b1000, 0b0111, 0b000),
+    kALLE2        = encode(0b100, 0b1000, 0b0111, 0b000),
+    kALLE3        = encode(0b110, 0b1000, 0b0111, 0b000),
+    kVAE1         = encode(0b000, 0b1000, 0b0111, 0b001),
+    kVAE2         = encode(0b100, 0b1000, 0b0111, 0b001),
+    kVAE3         = encode(0b110, 0b1000, 0b0111, 0b001),
+    kASIDE1       = encode(0b000, 0b1000, 0b0111, 0b010),
+    kVAAE1        = encode(0b000, 0b1000, 0b0111, 0b011),
+    kALLE1        = encode(0b100, 0b1000, 0b0111, 0b100),
+    kVALE1        = encode(0b000, 0b1000, 0b0111, 0b101),
+    kVALE2        = encode(0b100, 0b1000, 0b0111, 0b101),
+    kVALE3        = encode(0b110, 0b1000, 0b0111, 0b101),
+    kVMALLS12E1   = encode(0b100, 0b1000, 0b0111, 0b110),
+    kVAALE1       = encode(0b000, 0b1000, 0b0111, 0b111),
+
+    kVMALLE1OS    = encode(0b000, 0b1000, 0b0001, 0b000),
+    kVAE1OS       = encode(0b000, 0b1000, 0b0001, 0b001),
+    kASIDE1OS     = encode(0b000, 0b1000, 0b0001, 0b010),
+    kVAAE1OS      = encode(0b000, 0b1000, 0b0001, 0b011),
+    kVALE1OS      = encode(0b000, 0b1000, 0b0001, 0b101),
+    kVAALE1OS     = encode(0b000, 0b1000, 0b0001, 0b111),
+    kIPAS2E1OS    = encode(0b100, 0b1000, 0b0100, 0b000),
+    kIPAS2LE1OS   = encode(0b100, 0b1000, 0b0100, 0b100),
+    kVAE2OS       = encode(0b100, 0b1000, 0b0001, 0b001),
+    kVALE2OS      = encode(0b100, 0b1000, 0b0001, 0b101),
+    kVMALLS12E1OS = encode(0b100, 0b1000, 0b0001, 0b110),
+    kVAE3OS       = encode(0b110, 0b1000, 0b0001, 0b001),
+    kVALE3OS      = encode(0b110, 0b1000, 0b0001, 0b101),
+    kALLE2OS      = encode(0b100, 0b1000, 0b0001, 0b000),
+    kALLE1OS      = encode(0b100, 0b1000, 0b0001, 0b100),
+    kALLE3OS      = encode(0b110, 0b1000, 0b0001, 0b000),
+
+    kRVAE1        = encode(0b000, 0b1000, 0b0110, 0b001),
+    kRVAAE1       = encode(0b000, 0b1000, 0b0110, 0b011),
+    kRVALE1       = encode(0b000, 0b1000, 0b0110, 0b101),
+    kRVAALE1      = encode(0b000, 0b1000, 0b0110, 0b111),
+    kRVAE1IS      = encode(0b000, 0b1000, 0b0010, 0b001),
+    kRVAAE1IS     = encode(0b000, 0b1000, 0b0010, 0b011),
+    kRVALE1IS     = encode(0b000, 0b1000, 0b0010, 0b101),
+    kRVAALE1IS    = encode(0b000, 0b1000, 0b0010, 0b111),
+    kRVAE1OS      = encode(0b000, 0b1000, 0b0101, 0b001),
+    kRVAAE1OS     = encode(0b000, 0b1000, 0b0101, 0b011),
+    kRVALE1OS     = encode(0b000, 0b1000, 0b0101, 0b101),
+    kRVAALE1OS    = encode(0b000, 0b1000, 0b0101, 0b111),
+    kRIPAS2E1IS   = encode(0b100, 0b1000, 0b0000, 0b010),
+    kRIPAS2LE1IS  = encode(0b100, 0b1000, 0b0000, 0b110),
+    kRIPAS2E1     = encode(0b100, 0b1000, 0b0100, 0b010),
+    kRIPAS2LE1    = encode(0b100, 0b1000, 0b0100, 0b110),
+    kRIPAS2E1OS   = encode(0b100, 0b1000, 0b0100, 0b011),
+    kRIPAS2LE1OS  = encode(0b100, 0b1000, 0b0100, 0b111),
+    kRVAE2        = encode(0b100, 0b1000, 0b0110, 0b001),
+    kRVALE2       = encode(0b100, 0b1000, 0b0110, 0b101),
+    kRVAE2IS      = encode(0b100, 0b1000, 0b0010, 0b001),
+    kRVALE2IS     = encode(0b100, 0b1000, 0b0010, 0b101),
+    kRVAE2OS      = encode(0b100, 0b1000, 0b0101, 0b001),
+    kRVALE2OS     = encode(0b100, 0b1000, 0b0101, 0b101),
+    kRVAE3        = encode(0b110, 0b1000, 0b0110, 0b001),
+    kRVALE3       = encode(0b110, 0b1000, 0b0110, 0b101),
+    kRVAE3IS      = encode(0b110, 0b1000, 0b0010, 0b001),
+    kRVALE3IS     = encode(0b110, 0b1000, 0b0010, 0b101),
+    kRVAE3OS      = encode(0b110, 0b1000, 0b0101, 0b001),
+    kRVALE3OS     = encode(0b110, 0b1000, 0b0101, 0b101),
+  };
+}
+
+//! Trace synchronization barrier options.
+namespace TSB {
+  //! Trace synchronization immediate values.
+  enum Value : uint32_t {
+    kCSYNC = 0
+  };
+}
+
+//! Processor state access through MSR.
+namespace PState {
+  //! Encodes a pstate from `op0` and `op1`.
+  static inline constexpr uint32_t encode(uint32_t op0, uint32_t op1) noexcept {
+    return (op0 << 3) | (op1 << 0);
+  }
+
+  //! Processor state access immediates.
+  enum Value : uint32_t {
+    kSPSel   = encode(0b000, 0b101),
+    kDAIFSet = encode(0b011, 0b110),
+    kDAIFClr = encode(0b011, 0b111),
+    kPAN     = encode(0b000, 0b100),
+    kUAO     = encode(0b000, 0b011),
+    kDIT     = encode(0b011, 0b010),
+    kSSBS    = encode(0b011, 0b001),
+    kTCO     = encode(0b011, 0b100)
+  };
+};
+
+//! System register identifiers and utilities (MSR/MRS).
+namespace SysReg {
+  //! System register fields.
+  struct Fields {
+    uint8_t op0;
+    uint8_t op1;
+    uint8_t cRn;
+    uint8_t cRm;
+    uint8_t op2;
+  };
+
+  //! Encodes a system register from `op0`, `op1`, `cRn`, `cRm`, and `op2` fields.
+  static inline constexpr uint32_t encode(uint32_t op0, uint32_t op1, uint32_t cRn, uint32_t cRm, uint32_t op2) noexcept {
+    return (op0 << 14) | (op1 << 11) | (cRn << 7) | (cRm << 3) | (op2 << 0);
+  }
+
+  //! Encodes a system register from `fields`.
+  static inline constexpr uint32_t encode(const Fields& fields) noexcept {
+    return encode(fields.op0, fields.op1, fields.cRn, fields.cRm, fields.op2);
+  }
+
+  //! Decodes a system register to \ref Fields.
+  static inline constexpr Fields decode(uint32_t id) noexcept {
+    return Fields {
+      uint8_t((id >> 14) & 0x3u),
+      uint8_t((id >> 11) & 0x7u),
+      uint8_t((id >>  7) & 0xFu),
+      uint8_t((id >>  3) & 0xFu),
+      uint8_t((id >>  0) & 0x7u)
+    };
+  }
+
+  //! System register identifiers.
+  enum Id : uint32_t {
+    kACTLR_EL1            = encode(0b11, 0b000, 0b0001, 0b0000, 0b001), // RW
+    kACTLR_EL2            = encode(0b11, 0b100, 0b0001, 0b0000, 0b001), // RW
+    kACTLR_EL3            = encode(0b11, 0b110, 0b0001, 0b0000, 0b001), // RW
+    kAFSR0_EL1            = encode(0b11, 0b000, 0b0101, 0b0001, 0b000), // RW
+    kAFSR0_EL12           = encode(0b11, 0b101, 0b0101, 0b0001, 0b000), // RW
+    kAFSR0_EL2            = encode(0b11, 0b100, 0b0101, 0b0001, 0b000), // RW
+    kAFSR0_EL3            = encode(0b11, 0b110, 0b0101, 0b0001, 0b000), // RW
+    kAFSR1_EL1            = encode(0b11, 0b000, 0b0101, 0b0001, 0b001), // RW
+    kAFSR1_EL12           = encode(0b11, 0b101, 0b0101, 0b0001, 0b001), // RW
+    kAFSR1_EL2            = encode(0b11, 0b100, 0b0101, 0b0001, 0b001), // RW
+    kAFSR1_EL3            = encode(0b11, 0b110, 0b0101, 0b0001, 0b001), // RW
+    kAIDR_EL1             = encode(0b11, 0b001, 0b0000, 0b0000, 0b111), // RO
+    kAMAIR_EL1            = encode(0b11, 0b000, 0b1010, 0b0011, 0b000), // RW
+    kAMAIR_EL12           = encode(0b11, 0b101, 0b1010, 0b0011, 0b000), // RW
+    kAMAIR_EL2            = encode(0b11, 0b100, 0b1010, 0b0011, 0b000), // RW
+    kAMAIR_EL3            = encode(0b11, 0b110, 0b1010, 0b0011, 0b000), // RW
+    kAMCFGR_EL0           = encode(0b11, 0b011, 0b1101, 0b0010, 0b001), // RO
+    kAMCGCR_EL0           = encode(0b11, 0b011, 0b1101, 0b0010, 0b010), // RO
+    kAMCNTENCLR0_EL0      = encode(0b11, 0b011, 0b1101, 0b0010, 0b100), // RW
+    kAMCNTENCLR1_EL0      = encode(0b11, 0b011, 0b1101, 0b0011, 0b000), // RW
+    kAMCNTENSET0_EL0      = encode(0b11, 0b011, 0b1101, 0b0010, 0b101), // RW
+    kAMCNTENSET1_EL0      = encode(0b11, 0b011, 0b1101, 0b0011, 0b001), // RW
+    kAMCR_EL0             = encode(0b11, 0b011, 0b1101, 0b0010, 0b000), // RW
+    kAMEVCNTR00_EL0       = encode(0b11, 0b011, 0b1101, 0b0100, 0b000), // RW
+    kAMEVCNTR01_EL0       = encode(0b11, 0b011, 0b1101, 0b0100, 0b001), // RW
+    kAMEVCNTR02_EL0       = encode(0b11, 0b011, 0b1101, 0b0100, 0b010), // RW
+    kAMEVCNTR03_EL0       = encode(0b11, 0b011, 0b1101, 0b0100, 0b011), // RW
+    kAMEVCNTR10_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b000), // RW
+    kAMEVCNTR110_EL0      = encode(0b11, 0b011, 0b1101, 0b1101, 0b010), // RW
+    kAMEVCNTR111_EL0      = encode(0b11, 0b011, 0b1101, 0b1101, 0b011), // RW
+    kAMEVCNTR112_EL0      = encode(0b11, 0b011, 0b1101, 0b1101, 0b100), // RW
+    kAMEVCNTR113_EL0      = encode(0b11, 0b011, 0b1101, 0b1101, 0b101), // RW
+    kAMEVCNTR114_EL0      = encode(0b11, 0b011, 0b1101, 0b1101, 0b110), // RW
+    kAMEVCNTR115_EL0      = encode(0b11, 0b011, 0b1101, 0b1101, 0b111), // RW
+    kAMEVCNTR11_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b001), // RW
+    kAMEVCNTR12_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b010), // RW
+    kAMEVCNTR13_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b011), // RW
+    kAMEVCNTR14_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b100), // RW
+    kAMEVCNTR15_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b101), // RW
+    kAMEVCNTR16_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b110), // RW
+    kAMEVCNTR17_EL0       = encode(0b11, 0b011, 0b1101, 0b1100, 0b111), // RW
+    kAMEVCNTR18_EL0       = encode(0b11, 0b011, 0b1101, 0b1101, 0b000), // RW
+    kAMEVCNTR19_EL0       = encode(0b11, 0b011, 0b1101, 0b1101, 0b001), // RW
+    kAMEVTYPER00_EL0      = encode(0b11, 0b011, 0b1101, 0b0110, 0b000), // RO
+    kAMEVTYPER01_EL0      = encode(0b11, 0b011, 0b1101, 0b0110, 0b001), // RO
+    kAMEVTYPER02_EL0      = encode(0b11, 0b011, 0b1101, 0b0110, 0b010), // RO
+    kAMEVTYPER03_EL0      = encode(0b11, 0b011, 0b1101, 0b0110, 0b011), // RO
+    kAMEVTYPER10_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b000), // RW
+    kAMEVTYPER110_EL0     = encode(0b11, 0b011, 0b1101, 0b1111, 0b010), // RW
+    kAMEVTYPER111_EL0     = encode(0b11, 0b011, 0b1101, 0b1111, 0b011), // RW
+    kAMEVTYPER112_EL0     = encode(0b11, 0b011, 0b1101, 0b1111, 0b100), // RW
+    kAMEVTYPER113_EL0     = encode(0b11, 0b011, 0b1101, 0b1111, 0b101), // RW
+    kAMEVTYPER114_EL0     = encode(0b11, 0b011, 0b1101, 0b1111, 0b110), // RW
+    kAMEVTYPER115_EL0     = encode(0b11, 0b011, 0b1101, 0b1111, 0b111), // RW
+    kAMEVTYPER11_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b001), // RW
+    kAMEVTYPER12_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b010), // RW
+    kAMEVTYPER13_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b011), // RW
+    kAMEVTYPER14_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b100), // RW
+    kAMEVTYPER15_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b101), // RW
+    kAMEVTYPER16_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b110), // RW
+    kAMEVTYPER17_EL0      = encode(0b11, 0b011, 0b1101, 0b1110, 0b111), // RW
+    kAMEVTYPER18_EL0      = encode(0b11, 0b011, 0b1101, 0b1111, 0b000), // RW
+    kAMEVTYPER19_EL0      = encode(0b11, 0b011, 0b1101, 0b1111, 0b001), // RW
+    kAMUSERENR_EL0        = encode(0b11, 0b011, 0b1101, 0b0010, 0b011), // RW
+    kAPDAKeyHi_EL1        = encode(0b11, 0b000, 0b0010, 0b0010, 0b001), // RW
+    kAPDAKeyLo_EL1        = encode(0b11, 0b000, 0b0010, 0b0010, 0b000), // RW
+    kAPDBKeyHi_EL1        = encode(0b11, 0b000, 0b0010, 0b0010, 0b011), // RW
+    kAPDBKeyLo_EL1        = encode(0b11, 0b000, 0b0010, 0b0010, 0b010), // RW
+    kAPGAKeyHi_EL1        = encode(0b11, 0b000, 0b0010, 0b0011, 0b001), // RW
+    kAPGAKeyLo_EL1        = encode(0b11, 0b000, 0b0010, 0b0011, 0b000), // RW
+    kAPIAKeyHi_EL1        = encode(0b11, 0b000, 0b0010, 0b0001, 0b001), // RW
+    kAPIAKeyLo_EL1        = encode(0b11, 0b000, 0b0010, 0b0001, 0b000), // RW
+    kAPIBKeyHi_EL1        = encode(0b11, 0b000, 0b0010, 0b0001, 0b011), // RW
+    kAPIBKeyLo_EL1        = encode(0b11, 0b000, 0b0010, 0b0001, 0b010), // RW
+    kCCSIDR2_EL1          = encode(0b11, 0b001, 0b0000, 0b0000, 0b010), // RO
+    kCCSIDR_EL1           = encode(0b11, 0b001, 0b0000, 0b0000, 0b000), // RO
+    kCLIDR_EL1            = encode(0b11, 0b001, 0b0000, 0b0000, 0b001), // RO
+    kCNTFRQ_EL0           = encode(0b11, 0b011, 0b1110, 0b0000, 0b000), // RW
+    kCNTHCTL_EL2          = encode(0b11, 0b100, 0b1110, 0b0001, 0b000), // RW
+    kCNTHPS_CTL_EL2       = encode(0b11, 0b100, 0b1110, 0b0101, 0b001), // RW
+    kCNTHPS_CVAL_EL2      = encode(0b11, 0b100, 0b1110, 0b0101, 0b010), // RW
+    kCNTHPS_TVAL_EL2      = encode(0b11, 0b100, 0b1110, 0b0101, 0b000), // RW
+    kCNTHP_CTL_EL2        = encode(0b11, 0b100, 0b1110, 0b0010, 0b001), // RW
+    kCNTHP_CVAL_EL2       = encode(0b11, 0b100, 0b1110, 0b0010, 0b010), // RW
+    kCNTHP_TVAL_EL2       = encode(0b11, 0b100, 0b1110, 0b0010, 0b000), // RW
+    kCNTHVS_CTL_EL2       = encode(0b11, 0b100, 0b1110, 0b0100, 0b001), // RW
+    kCNTHVS_CVAL_EL2      = encode(0b11, 0b100, 0b1110, 0b0100, 0b010), // RW
+    kCNTHVS_TVAL_EL2      = encode(0b11, 0b100, 0b1110, 0b0100, 0b000), // RW
+    kCNTHV_CTL_EL2        = encode(0b11, 0b100, 0b1110, 0b0011, 0b001), // RW
+    kCNTHV_CVAL_EL2       = encode(0b11, 0b100, 0b1110, 0b0011, 0b010), // RW
+    kCNTHV_TVAL_EL2       = encode(0b11, 0b100, 0b1110, 0b0011, 0b000), // RW
+    kCNTISCALE_EL2        = encode(0b11, 0b100, 0b1110, 0b0000, 0b101), // RW
+    kCNTKCTL_EL1          = encode(0b11, 0b000, 0b1110, 0b0001, 0b000), // RW
+    kCNTKCTL_EL12         = encode(0b11, 0b101, 0b1110, 0b0001, 0b000), // RW
+    kCNTPCTSS_EL0         = encode(0b11, 0b011, 0b1110, 0b0000, 0b101), // RW
+    kCNTPCT_EL0           = encode(0b11, 0b011, 0b1110, 0b0000, 0b001), // RO
+    kCNTPOFF_EL2          = encode(0b11, 0b100, 0b1110, 0b0000, 0b110), // RW
+    kCNTPS_CTL_EL1        = encode(0b11, 0b111, 0b1110, 0b0010, 0b001), // RW
+    kCNTPS_CVAL_EL1       = encode(0b11, 0b111, 0b1110, 0b0010, 0b010), // RW
+    kCNTPS_TVAL_EL1       = encode(0b11, 0b111, 0b1110, 0b0010, 0b000), // RW
+    kCNTP_CTL_EL0         = encode(0b11, 0b011, 0b1110, 0b0010, 0b001), // RW
+    kCNTP_CTL_EL02        = encode(0b11, 0b101, 0b1110, 0b0010, 0b001), // RW
+    kCNTP_CVAL_EL0        = encode(0b11, 0b011, 0b1110, 0b0010, 0b010), // RW
+    kCNTP_CVAL_EL02       = encode(0b11, 0b101, 0b1110, 0b0010, 0b010), // RW
+    kCNTP_TVAL_EL0        = encode(0b11, 0b011, 0b1110, 0b0010, 0b000), // RW
+    kCNTP_TVAL_EL02       = encode(0b11, 0b101, 0b1110, 0b0010, 0b000), // RW
+    kCNTSCALE_EL2         = encode(0b11, 0b100, 0b1110, 0b0000, 0b100), // RW
+    kCNTVCTSS_EL0         = encode(0b11, 0b011, 0b1110, 0b0000, 0b110), // RW
+    kCNTVCT_EL0           = encode(0b11, 0b011, 0b1110, 0b0000, 0b010), // RO
+    kCNTVFRQ_EL2          = encode(0b11, 0b100, 0b1110, 0b0000, 0b111), // RW
+    kCNTVOFF_EL2          = encode(0b11, 0b100, 0b1110, 0b0000, 0b011), // RW
+    kCNTV_CTL_EL0         = encode(0b11, 0b011, 0b1110, 0b0011, 0b001), // RW
+    kCNTV_CTL_EL02        = encode(0b11, 0b101, 0b1110, 0b0011, 0b001), // RW
+    kCNTV_CVAL_EL0        = encode(0b11, 0b011, 0b1110, 0b0011, 0b010), // RW
+    kCNTV_CVAL_EL02       = encode(0b11, 0b101, 0b1110, 0b0011, 0b010), // RW
+    kCNTV_TVAL_EL0        = encode(0b11, 0b011, 0b1110, 0b0011, 0b000), // RW
+    kCNTV_TVAL_EL02       = encode(0b11, 0b101, 0b1110, 0b0011, 0b000), // RW
+    kCONTEXTIDR_EL1       = encode(0b11, 0b000, 0b1101, 0b0000, 0b001), // RW
+    kCONTEXTIDR_EL12      = encode(0b11, 0b101, 0b1101, 0b0000, 0b001), // RW
+    kCONTEXTIDR_EL2       = encode(0b11, 0b100, 0b1101, 0b0000, 0b001), // RW
+    kCPACR_EL1            = encode(0b11, 0b000, 0b0001, 0b0000, 0b010), // RW
+    kCPACR_EL12           = encode(0b11, 0b101, 0b0001, 0b0000, 0b010), // RW
+    kCPM_IOACC_CTL_EL3    = encode(0b11, 0b111, 0b1111, 0b0010, 0b000), // RW
+    kCPTR_EL2             = encode(0b11, 0b100, 0b0001, 0b0001, 0b010), // RW
+    kCPTR_EL3             = encode(0b11, 0b110, 0b0001, 0b0001, 0b010), // RW
+    kCSSELR_EL1           = encode(0b11, 0b010, 0b0000, 0b0000, 0b000), // RW
+    kCTR_EL0              = encode(0b11, 0b011, 0b0000, 0b0000, 0b001), // RO
+    kCurrentEL            = encode(0b11, 0b000, 0b0100, 0b0010, 0b010), // RO
+    kDACR32_EL2           = encode(0b11, 0b100, 0b0011, 0b0000, 0b000), // RW
+    kDAIF                 = encode(0b11, 0b011, 0b0100, 0b0010, 0b001), // RW
+    kDBGAUTHSTATUS_EL1    = encode(0b10, 0b000, 0b0111, 0b1110, 0b110), // RO
+    kDBGBCR0_EL1          = encode(0b10, 0b000, 0b0000, 0b0000, 0b101), // RW
+    kDBGBCR10_EL1         = encode(0b10, 0b000, 0b0000, 0b1010, 0b101), // RW
+    kDBGBCR11_EL1         = encode(0b10, 0b000, 0b0000, 0b1011, 0b101), // RW
+    kDBGBCR12_EL1         = encode(0b10, 0b000, 0b0000, 0b1100, 0b101), // RW
+    kDBGBCR13_EL1         = encode(0b10, 0b000, 0b0000, 0b1101, 0b101), // RW
+    kDBGBCR14_EL1         = encode(0b10, 0b000, 0b0000, 0b1110, 0b101), // RW
+    kDBGBCR15_EL1         = encode(0b10, 0b000, 0b0000, 0b1111, 0b101), // RW
+    kDBGBCR1_EL1          = encode(0b10, 0b000, 0b0000, 0b0001, 0b101), // RW
+    kDBGBCR2_EL1          = encode(0b10, 0b000, 0b0000, 0b0010, 0b101), // RW
+    kDBGBCR3_EL1          = encode(0b10, 0b000, 0b0000, 0b0011, 0b101), // RW
+    kDBGBCR4_EL1          = encode(0b10, 0b000, 0b0000, 0b0100, 0b101), // RW
+    kDBGBCR5_EL1          = encode(0b10, 0b000, 0b0000, 0b0101, 0b101), // RW
+    kDBGBCR6_EL1          = encode(0b10, 0b000, 0b0000, 0b0110, 0b101), // RW
+    kDBGBCR7_EL1          = encode(0b10, 0b000, 0b0000, 0b0111, 0b101), // RW
+    kDBGBCR8_EL1          = encode(0b10, 0b000, 0b0000, 0b1000, 0b101), // RW
+    kDBGBCR9_EL1          = encode(0b10, 0b000, 0b0000, 0b1001, 0b101), // RW
+    kDBGBVR0_EL1          = encode(0b10, 0b000, 0b0000, 0b0000, 0b100), // RW
+    kDBGBVR10_EL1         = encode(0b10, 0b000, 0b0000, 0b1010, 0b100), // RW
+    kDBGBVR11_EL1         = encode(0b10, 0b000, 0b0000, 0b1011, 0b100), // RW
+    kDBGBVR12_EL1         = encode(0b10, 0b000, 0b0000, 0b1100, 0b100), // RW
+    kDBGBVR13_EL1         = encode(0b10, 0b000, 0b0000, 0b1101, 0b100), // RW
+    kDBGBVR14_EL1         = encode(0b10, 0b000, 0b0000, 0b1110, 0b100), // RW
+    kDBGBVR15_EL1         = encode(0b10, 0b000, 0b0000, 0b1111, 0b100), // RW
+    kDBGBVR1_EL1          = encode(0b10, 0b000, 0b0000, 0b0001, 0b100), // RW
+    kDBGBVR2_EL1          = encode(0b10, 0b000, 0b0000, 0b0010, 0b100), // RW
+    kDBGBVR3_EL1          = encode(0b10, 0b000, 0b0000, 0b0011, 0b100), // RW
+    kDBGBVR4_EL1          = encode(0b10, 0b000, 0b0000, 0b0100, 0b100), // RW
+    kDBGBVR5_EL1          = encode(0b10, 0b000, 0b0000, 0b0101, 0b100), // RW
+    kDBGBVR6_EL1          = encode(0b10, 0b000, 0b0000, 0b0110, 0b100), // RW
+    kDBGBVR7_EL1          = encode(0b10, 0b000, 0b0000, 0b0111, 0b100), // RW
+    kDBGBVR8_EL1          = encode(0b10, 0b000, 0b0000, 0b1000, 0b100), // RW
+    kDBGBVR9_EL1          = encode(0b10, 0b000, 0b0000, 0b1001, 0b100), // RW
+    kDBGCLAIMCLR_EL1      = encode(0b10, 0b000, 0b0111, 0b1001, 0b110), // RW
+    kDBGCLAIMSET_EL1      = encode(0b10, 0b000, 0b0111, 0b1000, 0b110), // RW
+    kDBGDTRRX_EL0         = encode(0b10, 0b011, 0b0000, 0b0101, 0b000), // RO
+    kDBGDTRTX_EL0         = encode(0b10, 0b011, 0b0000, 0b0101, 0b000), // WO
+    kDBGDTR_EL0           = encode(0b10, 0b011, 0b0000, 0b0100, 0b000), // RW
+    kDBGPRCR_EL1          = encode(0b10, 0b000, 0b0001, 0b0100, 0b100), // RW
+    kDBGVCR32_EL2         = encode(0b10, 0b100, 0b0000, 0b0111, 0b000), // RW
+    kDBGWCR0_EL1          = encode(0b10, 0b000, 0b0000, 0b0000, 0b111), // RW
+    kDBGWCR10_EL1         = encode(0b10, 0b000, 0b0000, 0b1010, 0b111), // RW
+    kDBGWCR11_EL1         = encode(0b10, 0b000, 0b0000, 0b1011, 0b111), // RW
+    kDBGWCR12_EL1         = encode(0b10, 0b000, 0b0000, 0b1100, 0b111), // RW
+    kDBGWCR13_EL1         = encode(0b10, 0b000, 0b0000, 0b1101, 0b111), // RW
+    kDBGWCR14_EL1         = encode(0b10, 0b000, 0b0000, 0b1110, 0b111), // RW
+    kDBGWCR15_EL1         = encode(0b10, 0b000, 0b0000, 0b1111, 0b111), // RW
+    kDBGWCR1_EL1          = encode(0b10, 0b000, 0b0000, 0b0001, 0b111), // RW
+    kDBGWCR2_EL1          = encode(0b10, 0b000, 0b0000, 0b0010, 0b111), // RW
+    kDBGWCR3_EL1          = encode(0b10, 0b000, 0b0000, 0b0011, 0b111), // RW
+    kDBGWCR4_EL1          = encode(0b10, 0b000, 0b0000, 0b0100, 0b111), // RW
+    kDBGWCR5_EL1          = encode(0b10, 0b000, 0b0000, 0b0101, 0b111), // RW
+    kDBGWCR6_EL1          = encode(0b10, 0b000, 0b0000, 0b0110, 0b111), // RW
+    kDBGWCR7_EL1          = encode(0b10, 0b000, 0b0000, 0b0111, 0b111), // RW
+    kDBGWCR8_EL1          = encode(0b10, 0b000, 0b0000, 0b1000, 0b111), // RW
+    kDBGWCR9_EL1          = encode(0b10, 0b000, 0b0000, 0b1001, 0b111), // RW
+    kDBGWVR0_EL1          = encode(0b10, 0b000, 0b0000, 0b0000, 0b110), // RW
+    kDBGWVR10_EL1         = encode(0b10, 0b000, 0b0000, 0b1010, 0b110), // RW
+    kDBGWVR11_EL1         = encode(0b10, 0b000, 0b0000, 0b1011, 0b110), // RW
+    kDBGWVR12_EL1         = encode(0b10, 0b000, 0b0000, 0b1100, 0b110), // RW
+    kDBGWVR13_EL1         = encode(0b10, 0b000, 0b0000, 0b1101, 0b110), // RW
+    kDBGWVR14_EL1         = encode(0b10, 0b000, 0b0000, 0b1110, 0b110), // RW
+    kDBGWVR15_EL1         = encode(0b10, 0b000, 0b0000, 0b1111, 0b110), // RW
+    kDBGWVR1_EL1          = encode(0b10, 0b000, 0b0000, 0b0001, 0b110), // RW
+    kDBGWVR2_EL1          = encode(0b10, 0b000, 0b0000, 0b0010, 0b110), // RW
+    kDBGWVR3_EL1          = encode(0b10, 0b000, 0b0000, 0b0011, 0b110), // RW
+    kDBGWVR4_EL1          = encode(0b10, 0b000, 0b0000, 0b0100, 0b110), // RW
+    kDBGWVR5_EL1          = encode(0b10, 0b000, 0b0000, 0b0101, 0b110), // RW
+    kDBGWVR6_EL1          = encode(0b10, 0b000, 0b0000, 0b0110, 0b110), // RW
+    kDBGWVR7_EL1          = encode(0b10, 0b000, 0b0000, 0b0111, 0b110), // RW
+    kDBGWVR8_EL1          = encode(0b10, 0b000, 0b0000, 0b1000, 0b110), // RW
+    kDBGWVR9_EL1          = encode(0b10, 0b000, 0b0000, 0b1001, 0b110), // RW
+    kDCZID_EL0            = encode(0b11, 0b011, 0b0000, 0b0000, 0b111), // RO
+    kDISR_EL1             = encode(0b11, 0b000, 0b1100, 0b0001, 0b001), // RW
+    kDIT                  = encode(0b11, 0b011, 0b0100, 0b0010, 0b101), // RW
+    kDLR_EL0              = encode(0b11, 0b011, 0b0100, 0b0101, 0b001), // RW
+    kDSPSR_EL0            = encode(0b11, 0b011, 0b0100, 0b0101, 0b000), // RW
+    kELR_EL1              = encode(0b11, 0b000, 0b0100, 0b0000, 0b001), // RW
+    kELR_EL12             = encode(0b11, 0b101, 0b0100, 0b0000, 0b001), // RW
+    kELR_EL2              = encode(0b11, 0b100, 0b0100, 0b0000, 0b001), // RW
+    kELR_EL3              = encode(0b11, 0b110, 0b0100, 0b0000, 0b001), // RW
+    kERRIDR_EL1           = encode(0b11, 0b000, 0b0101, 0b0011, 0b000), // RO
+    kERRSELR_EL1          = encode(0b11, 0b000, 0b0101, 0b0011, 0b001), // RW
+    kERXADDR_EL1          = encode(0b11, 0b000, 0b0101, 0b0100, 0b011), // RW
+    kERXCTLR_EL1          = encode(0b11, 0b000, 0b0101, 0b0100, 0b001), // RW
+    kERXFR_EL1            = encode(0b11, 0b000, 0b0101, 0b0100, 0b000), // RO
+    kERXMISC0_EL1         = encode(0b11, 0b000, 0b0101, 0b0101, 0b000), // RW
+    kERXMISC1_EL1         = encode(0b11, 0b000, 0b0101, 0b0101, 0b001), // RW
+    kERXMISC2_EL1         = encode(0b11, 0b000, 0b0101, 0b0101, 0b010), // RW
+    kERXMISC3_EL1         = encode(0b11, 0b000, 0b0101, 0b0101, 0b011), // RW
+    kERXPFGCDN_EL1        = encode(0b11, 0b000, 0b0101, 0b0100, 0b110), // RW
+    kERXPFGCTL_EL1        = encode(0b11, 0b000, 0b0101, 0b0100, 0b101), // RW
+    kERXPFGF_EL1          = encode(0b11, 0b000, 0b0101, 0b0100, 0b100), // RO
+    kERXSTATUS_EL1        = encode(0b11, 0b000, 0b0101, 0b0100, 0b010), // RW
+    kESR_EL1              = encode(0b11, 0b000, 0b0101, 0b0010, 0b000), // RW
+    kESR_EL12             = encode(0b11, 0b101, 0b0101, 0b0010, 0b000), // RW
+    kESR_EL2              = encode(0b11, 0b100, 0b0101, 0b0010, 0b000), // RW
+    kESR_EL3              = encode(0b11, 0b110, 0b0101, 0b0010, 0b000), // RW
+    kFAR_EL1              = encode(0b11, 0b000, 0b0110, 0b0000, 0b000), // RW
+    kFAR_EL12             = encode(0b11, 0b101, 0b0110, 0b0000, 0b000), // RW
+    kFAR_EL2              = encode(0b11, 0b100, 0b0110, 0b0000, 0b000), // RW
+    kFAR_EL3              = encode(0b11, 0b110, 0b0110, 0b0000, 0b000), // RW
+    kFPCR                 = encode(0b11, 0b011, 0b0100, 0b0100, 0b000), // RW
+    kFPEXC32_EL2          = encode(0b11, 0b100, 0b0101, 0b0011, 0b000), // RW
+    kFPSR                 = encode(0b11, 0b011, 0b0100, 0b0100, 0b001), // RW
+    kGCR_EL1              = encode(0b11, 0b000, 0b0001, 0b0000, 0b110), // RW
+    kGMID_EL1             = encode(0b11, 0b001, 0b0000, 0b0000, 0b100), // RO
+    kHACR_EL2             = encode(0b11, 0b100, 0b0001, 0b0001, 0b111), // RW
+    kHCR_EL2              = encode(0b11, 0b100, 0b0001, 0b0001, 0b000), // RW
+    kHDFGRTR_EL2          = encode(0b11, 0b100, 0b0011, 0b0001, 0b100), // RW
+    kHDFGWTR_EL2          = encode(0b11, 0b100, 0b0011, 0b0001, 0b101), // RW
+    kHFGITR_EL2           = encode(0b11, 0b100, 0b0001, 0b0001, 0b110), // RW
+    kHFGRTR_EL2           = encode(0b11, 0b100, 0b0001, 0b0001, 0b100), // RW
+    kHFGWTR_EL2           = encode(0b11, 0b100, 0b0001, 0b0001, 0b101), // RW
+    kHPFAR_EL2            = encode(0b11, 0b100, 0b0110, 0b0000, 0b100), // RW
+    kHSTR_EL2             = encode(0b11, 0b100, 0b0001, 0b0001, 0b011), // RW
+    kICC_AP0R0_EL1        = encode(0b11, 0b000, 0b1100, 0b1000, 0b100), // RW
+    kICC_AP0R1_EL1        = encode(0b11, 0b000, 0b1100, 0b1000, 0b101), // RW
+    kICC_AP0R2_EL1        = encode(0b11, 0b000, 0b1100, 0b1000, 0b110), // RW
+    kICC_AP0R3_EL1        = encode(0b11, 0b000, 0b1100, 0b1000, 0b111), // RW
+    kICC_AP1R0_EL1        = encode(0b11, 0b000, 0b1100, 0b1001, 0b000), // RW
+    kICC_AP1R1_EL1        = encode(0b11, 0b000, 0b1100, 0b1001, 0b001), // RW
+    kICC_AP1R2_EL1        = encode(0b11, 0b000, 0b1100, 0b1001, 0b010), // RW
+    kICC_AP1R3_EL1        = encode(0b11, 0b000, 0b1100, 0b1001, 0b011), // RW
+    kICC_ASGI1R_EL1       = encode(0b11, 0b000, 0b1100, 0b1011, 0b110), // WO
+    kICC_BPR0_EL1         = encode(0b11, 0b000, 0b1100, 0b1000, 0b011), // RW
+    kICC_BPR1_EL1         = encode(0b11, 0b000, 0b1100, 0b1100, 0b011), // RW
+    kICC_CTLR_EL1         = encode(0b11, 0b000, 0b1100, 0b1100, 0b100), // RW
+    kICC_CTLR_EL3         = encode(0b11, 0b110, 0b1100, 0b1100, 0b100), // RW
+    kICC_DIR_EL1          = encode(0b11, 0b000, 0b1100, 0b1011, 0b001), // WO
+    kICC_EOIR0_EL1        = encode(0b11, 0b000, 0b1100, 0b1000, 0b001), // WO
+    kICC_EOIR1_EL1        = encode(0b11, 0b000, 0b1100, 0b1100, 0b001), // WO
+    kICC_HPPIR0_EL1       = encode(0b11, 0b000, 0b1100, 0b1000, 0b010), // RO
+    kICC_HPPIR1_EL1       = encode(0b11, 0b000, 0b1100, 0b1100, 0b010), // RO
+    kICC_IAR0_EL1         = encode(0b11, 0b000, 0b1100, 0b1000, 0b000), // RO
+    kICC_IAR1_EL1         = encode(0b11, 0b000, 0b1100, 0b1100, 0b000), // RO
+    kICC_IGRPEN0_EL1      = encode(0b11, 0b000, 0b1100, 0b1100, 0b110), // RW
+    kICC_IGRPEN1_EL1      = encode(0b11, 0b000, 0b1100, 0b1100, 0b111), // RW
+    kICC_IGRPEN1_EL3      = encode(0b11, 0b110, 0b1100, 0b1100, 0b111), // RW
+    kICC_PMR_EL1          = encode(0b11, 0b000, 0b0100, 0b0110, 0b000), // RW
+    kICC_RPR_EL1          = encode(0b11, 0b000, 0b1100, 0b1011, 0b011), // RO
+    kICC_SGI0R_EL1        = encode(0b11, 0b000, 0b1100, 0b1011, 0b111), // WO
+    kICC_SGI1R_EL1        = encode(0b11, 0b000, 0b1100, 0b1011, 0b101), // WO
+    kICC_SRE_EL1          = encode(0b11, 0b000, 0b1100, 0b1100, 0b101), // RW
+    kICC_SRE_EL2          = encode(0b11, 0b100, 0b1100, 0b1001, 0b101), // RW
+    kICC_SRE_EL3          = encode(0b11, 0b110, 0b1100, 0b1100, 0b101), // RW
+    kICH_AP0R0_EL2        = encode(0b11, 0b100, 0b1100, 0b1000, 0b000), // RW
+    kICH_AP0R1_EL2        = encode(0b11, 0b100, 0b1100, 0b1000, 0b001), // RW
+    kICH_AP0R2_EL2        = encode(0b11, 0b100, 0b1100, 0b1000, 0b010), // RW
+    kICH_AP0R3_EL2        = encode(0b11, 0b100, 0b1100, 0b1000, 0b011), // RW
+    kICH_AP1R0_EL2        = encode(0b11, 0b100, 0b1100, 0b1001, 0b000), // RW
+    kICH_AP1R1_EL2        = encode(0b11, 0b100, 0b1100, 0b1001, 0b001), // RW
+    kICH_AP1R2_EL2        = encode(0b11, 0b100, 0b1100, 0b1001, 0b010), // RW
+    kICH_AP1R3_EL2        = encode(0b11, 0b100, 0b1100, 0b1001, 0b011), // RW
+    kICH_EISR_EL2         = encode(0b11, 0b100, 0b1100, 0b1011, 0b011), // RO
+    kICH_ELRSR_EL2        = encode(0b11, 0b100, 0b1100, 0b1011, 0b101), // RO
+    kICH_HCR_EL2          = encode(0b11, 0b100, 0b1100, 0b1011, 0b000), // RW
+    kICH_LR0_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b000), // RW
+    kICH_LR10_EL2         = encode(0b11, 0b100, 0b1100, 0b1101, 0b010), // RW
+    kICH_LR11_EL2         = encode(0b11, 0b100, 0b1100, 0b1101, 0b011), // RW
+    kICH_LR12_EL2         = encode(0b11, 0b100, 0b1100, 0b1101, 0b100), // RW
+    kICH_LR13_EL2         = encode(0b11, 0b100, 0b1100, 0b1101, 0b101), // RW
+    kICH_LR14_EL2         = encode(0b11, 0b100, 0b1100, 0b1101, 0b110), // RW
+    kICH_LR15_EL2         = encode(0b11, 0b100, 0b1100, 0b1101, 0b111), // RW
+    kICH_LR1_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b001), // RW
+    kICH_LR2_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b010), // RW
+    kICH_LR3_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b011), // RW
+    kICH_LR4_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b100), // RW
+    kICH_LR5_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b101), // RW
+    kICH_LR6_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b110), // RW
+    kICH_LR7_EL2          = encode(0b11, 0b100, 0b1100, 0b1100, 0b111), // RW
+    kICH_LR8_EL2          = encode(0b11, 0b100, 0b1100, 0b1101, 0b000), // RW
+    kICH_LR9_EL2          = encode(0b11, 0b100, 0b1100, 0b1101, 0b001), // RW
+    kICH_MISR_EL2         = encode(0b11, 0b100, 0b1100, 0b1011, 0b010), // RO
+    kICH_VMCR_EL2         = encode(0b11, 0b100, 0b1100, 0b1011, 0b111), // RW
+    kICH_VTR_EL2          = encode(0b11, 0b100, 0b1100, 0b1011, 0b001), // RO
+    kID_AA64AFR0_EL1      = encode(0b11, 0b000, 0b0000, 0b0101, 0b100), // RO
+    kID_AA64AFR1_EL1      = encode(0b11, 0b000, 0b0000, 0b0101, 0b101), // RO
+    kID_AA64DFR0_EL1      = encode(0b11, 0b000, 0b0000, 0b0101, 0b000), // RO
+    kID_AA64DFR1_EL1      = encode(0b11, 0b000, 0b0000, 0b0101, 0b001), // RO
+    kID_AA64ISAR0_EL1     = encode(0b11, 0b000, 0b0000, 0b0110, 0b000), // RO
+    kID_AA64ISAR1_EL1     = encode(0b11, 0b000, 0b0000, 0b0110, 0b001), // RO
+    kID_AA64MMFR0_EL1     = encode(0b11, 0b000, 0b0000, 0b0111, 0b000), // RO
+    kID_AA64MMFR1_EL1     = encode(0b11, 0b000, 0b0000, 0b0111, 0b001), // RO
+    kID_AA64MMFR2_EL1     = encode(0b11, 0b000, 0b0000, 0b0111, 0b010), // RO
+    kID_AA64PFR0_EL1      = encode(0b11, 0b000, 0b0000, 0b0100, 0b000), // RO
+    kID_AA64PFR1_EL1      = encode(0b11, 0b000, 0b0000, 0b0100, 0b001), // RO
+    kID_AA64ZFR0_EL1      = encode(0b11, 0b000, 0b0000, 0b0100, 0b100), // RO
+    kID_AFR0_EL1          = encode(0b11, 0b000, 0b0000, 0b0001, 0b011), // RO
+    kID_DFR0_EL1          = encode(0b11, 0b000, 0b0000, 0b0001, 0b010), // RO
+    kID_ISAR0_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b000), // RO
+    kID_ISAR1_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b001), // RO
+    kID_ISAR2_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b010), // RO
+    kID_ISAR3_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b011), // RO
+    kID_ISAR4_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b100), // RO
+    kID_ISAR5_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b101), // RO
+    kID_ISAR6_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b111), // RO
+    kID_MMFR0_EL1         = encode(0b11, 0b000, 0b0000, 0b0001, 0b100), // RO
+    kID_MMFR1_EL1         = encode(0b11, 0b000, 0b0000, 0b0001, 0b101), // RO
+    kID_MMFR2_EL1         = encode(0b11, 0b000, 0b0000, 0b0001, 0b110), // RO
+    kID_MMFR3_EL1         = encode(0b11, 0b000, 0b0000, 0b0001, 0b111), // RO
+    kID_MMFR4_EL1         = encode(0b11, 0b000, 0b0000, 0b0010, 0b110), // RO
+    kID_MMFR5_EL1         = encode(0b11, 0b000, 0b0000, 0b0011, 0b110), // RO
+    kID_PFR0_EL1          = encode(0b11, 0b000, 0b0000, 0b0001, 0b000), // RO
+    kID_PFR1_EL1          = encode(0b11, 0b000, 0b0000, 0b0001, 0b001), // RO
+    kID_PFR2_EL1          = encode(0b11, 0b000, 0b0000, 0b0011, 0b100), // RO
+    kIFSR32_EL2           = encode(0b11, 0b100, 0b0101, 0b0000, 0b001), // RW
+    kISR_EL1              = encode(0b11, 0b000, 0b1100, 0b0001, 0b000), // RO
+    kLORC_EL1             = encode(0b11, 0b000, 0b1010, 0b0100, 0b011), // RW
+    kLOREA_EL1            = encode(0b11, 0b000, 0b1010, 0b0100, 0b001), // RW
+    kLORID_EL1            = encode(0b11, 0b000, 0b1010, 0b0100, 0b111), // RO
+    kLORN_EL1             = encode(0b11, 0b000, 0b1010, 0b0100, 0b010), // RW
+    kLORSA_EL1            = encode(0b11, 0b000, 0b1010, 0b0100, 0b000), // RW
+    kMAIR_EL1             = encode(0b11, 0b000, 0b1010, 0b0010, 0b000), // RW
+    kMAIR_EL12            = encode(0b11, 0b101, 0b1010, 0b0010, 0b000), // RW
+    kMAIR_EL2             = encode(0b11, 0b100, 0b1010, 0b0010, 0b000), // RW
+    kMAIR_EL3             = encode(0b11, 0b110, 0b1010, 0b0010, 0b000), // RW
+    kMDCCINT_EL1          = encode(0b10, 0b000, 0b0000, 0b0010, 0b000), // RW
+    kMDCCSR_EL0           = encode(0b10, 0b011, 0b0000, 0b0001, 0b000), // RO
+    kMDCR_EL2             = encode(0b11, 0b100, 0b0001, 0b0001, 0b001), // RW
+    kMDCR_EL3             = encode(0b11, 0b110, 0b0001, 0b0011, 0b001), // RW
+    kMDRAR_EL1            = encode(0b10, 0b000, 0b0001, 0b0000, 0b000), // RO
+    kMDSCR_EL1            = encode(0b10, 0b000, 0b0000, 0b0010, 0b010), // RW
+    kMIDR_EL1             = encode(0b11, 0b000, 0b0000, 0b0000, 0b000), // RO
+    kMPAM0_EL1            = encode(0b11, 0b000, 0b1010, 0b0101, 0b001), // RW
+    kMPAM1_EL1            = encode(0b11, 0b000, 0b1010, 0b0101, 0b000), // RW
+    kMPAM1_EL12           = encode(0b11, 0b101, 0b1010, 0b0101, 0b000), // RW
+    kMPAM2_EL2            = encode(0b11, 0b100, 0b1010, 0b0101, 0b000), // RW
+    kMPAM3_EL3            = encode(0b11, 0b110, 0b1010, 0b0101, 0b000), // RW
+    kMPAMHCR_EL2          = encode(0b11, 0b100, 0b1010, 0b0100, 0b000), // RW
+    kMPAMIDR_EL1          = encode(0b11, 0b000, 0b1010, 0b0100, 0b100), // RO
+    kMPAMVPM0_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b000), // RW
+    kMPAMVPM1_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b001), // RW
+    kMPAMVPM2_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b010), // RW
+    kMPAMVPM3_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b011), // RW
+    kMPAMVPM4_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b100), // RW
+    kMPAMVPM5_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b101), // RW
+    kMPAMVPM6_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b110), // RW
+    kMPAMVPM7_EL2         = encode(0b11, 0b100, 0b1010, 0b0110, 0b111), // RW
+    kMPAMVPMV_EL2         = encode(0b11, 0b100, 0b1010, 0b0100, 0b001), // RW
+    kMPIDR_EL1            = encode(0b11, 0b000, 0b0000, 0b0000, 0b101), // RO
+    kMVFR0_EL1            = encode(0b11, 0b000, 0b0000, 0b0011, 0b000), // RO
+    kMVFR1_EL1            = encode(0b11, 0b000, 0b0000, 0b0011, 0b001), // RO
+    kMVFR2_EL1            = encode(0b11, 0b000, 0b0000, 0b0011, 0b010), // RO
+    kNZCV                 = encode(0b11, 0b011, 0b0100, 0b0010, 0b000), // RW
+    kOSDLR_EL1            = encode(0b10, 0b000, 0b0001, 0b0011, 0b100), // RW
+    kOSDTRRX_EL1          = encode(0b10, 0b000, 0b0000, 0b0000, 0b010), // RW
+    kOSDTRTX_EL1          = encode(0b10, 0b000, 0b0000, 0b0011, 0b010), // RW
+    kOSECCR_EL1           = encode(0b10, 0b000, 0b0000, 0b0110, 0b010), // RW
+    kOSLAR_EL1            = encode(0b10, 0b000, 0b0001, 0b0000, 0b100), // WO
+    kOSLSR_EL1            = encode(0b10, 0b000, 0b0001, 0b0001, 0b100), // RO
+    kPAN                  = encode(0b11, 0b000, 0b0100, 0b0010, 0b011), // RW
+    kPAR_EL1              = encode(0b11, 0b000, 0b0111, 0b0100, 0b000), // RW
+    kPMBIDR_EL1           = encode(0b11, 0b000, 0b1001, 0b1010, 0b111), // RO
+    kPMBLIMITR_EL1        = encode(0b11, 0b000, 0b1001, 0b1010, 0b000), // RW
+    kPMBPTR_EL1           = encode(0b11, 0b000, 0b1001, 0b1010, 0b001), // RW
+    kPMBSR_EL1            = encode(0b11, 0b000, 0b1001, 0b1010, 0b011), // RW
+    kPMCCFILTR_EL0        = encode(0b11, 0b011, 0b1110, 0b1111, 0b111), // RW
+    kPMCCNTR_EL0          = encode(0b11, 0b011, 0b1001, 0b1101, 0b000), // RW
+    kPMCEID0_EL0          = encode(0b11, 0b011, 0b1001, 0b1100, 0b110), // RO
+    kPMCEID1_EL0          = encode(0b11, 0b011, 0b1001, 0b1100, 0b111), // RO
+    kPMCNTENCLR_EL0       = encode(0b11, 0b011, 0b1001, 0b1100, 0b010), // RW
+    kPMCNTENSET_EL0       = encode(0b11, 0b011, 0b1001, 0b1100, 0b001), // RW
+    kPMCR_EL0             = encode(0b11, 0b011, 0b1001, 0b1100, 0b000), // RW
+    kPMEVCNTR0_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b000), // RW
+    kPMEVCNTR10_EL0       = encode(0b11, 0b011, 0b1110, 0b1001, 0b010), // RW
+    kPMEVCNTR11_EL0       = encode(0b11, 0b011, 0b1110, 0b1001, 0b011), // RW
+    kPMEVCNTR12_EL0       = encode(0b11, 0b011, 0b1110, 0b1001, 0b100), // RW
+    kPMEVCNTR13_EL0       = encode(0b11, 0b011, 0b1110, 0b1001, 0b101), // RW
+    kPMEVCNTR14_EL0       = encode(0b11, 0b011, 0b1110, 0b1001, 0b110), // RW
+    kPMEVCNTR15_EL0       = encode(0b11, 0b011, 0b1110, 0b1001, 0b111), // RW
+    kPMEVCNTR16_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b000), // RW
+    kPMEVCNTR17_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b001), // RW
+    kPMEVCNTR18_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b010), // RW
+    kPMEVCNTR19_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b011), // RW
+    kPMEVCNTR1_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b001), // RW
+    kPMEVCNTR20_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b100), // RW
+    kPMEVCNTR21_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b101), // RW
+    kPMEVCNTR22_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b110), // RW
+    kPMEVCNTR23_EL0       = encode(0b11, 0b011, 0b1110, 0b1010, 0b111), // RW
+    kPMEVCNTR24_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b000), // RW
+    kPMEVCNTR25_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b001), // RW
+    kPMEVCNTR26_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b010), // RW
+    kPMEVCNTR27_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b011), // RW
+    kPMEVCNTR28_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b100), // RW
+    kPMEVCNTR29_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b101), // RW
+    kPMEVCNTR2_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b010), // RW
+    kPMEVCNTR30_EL0       = encode(0b11, 0b011, 0b1110, 0b1011, 0b110), // RW
+    kPMEVCNTR3_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b011), // RW
+    kPMEVCNTR4_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b100), // RW
+    kPMEVCNTR5_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b101), // RW
+    kPMEVCNTR6_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b110), // RW
+    kPMEVCNTR7_EL0        = encode(0b11, 0b011, 0b1110, 0b1000, 0b111), // RW
+    kPMEVCNTR8_EL0        = encode(0b11, 0b011, 0b1110, 0b1001, 0b000), // RW
+    kPMEVCNTR9_EL0        = encode(0b11, 0b011, 0b1110, 0b1001, 0b001), // RW
+    kPMEVTYPER0_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b000), // RW
+    kPMEVTYPER10_EL0      = encode(0b11, 0b011, 0b1110, 0b1101, 0b010), // RW
+    kPMEVTYPER11_EL0      = encode(0b11, 0b011, 0b1110, 0b1101, 0b011), // RW
+    kPMEVTYPER12_EL0      = encode(0b11, 0b011, 0b1110, 0b1101, 0b100), // RW
+    kPMEVTYPER13_EL0      = encode(0b11, 0b011, 0b1110, 0b1101, 0b101), // RW
+    kPMEVTYPER14_EL0      = encode(0b11, 0b011, 0b1110, 0b1101, 0b110), // RW
+    kPMEVTYPER15_EL0      = encode(0b11, 0b011, 0b1110, 0b1101, 0b111), // RW
+    kPMEVTYPER16_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b000), // RW
+    kPMEVTYPER17_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b001), // RW
+    kPMEVTYPER18_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b010), // RW
+    kPMEVTYPER19_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b011), // RW
+    kPMEVTYPER1_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b001), // RW
+    kPMEVTYPER20_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b100), // RW
+    kPMEVTYPER21_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b101), // RW
+    kPMEVTYPER22_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b110), // RW
+    kPMEVTYPER23_EL0      = encode(0b11, 0b011, 0b1110, 0b1110, 0b111), // RW
+    kPMEVTYPER24_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b000), // RW
+    kPMEVTYPER25_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b001), // RW
+    kPMEVTYPER26_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b010), // RW
+    kPMEVTYPER27_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b011), // RW
+    kPMEVTYPER28_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b100), // RW
+    kPMEVTYPER29_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b101), // RW
+    kPMEVTYPER2_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b010), // RW
+    kPMEVTYPER30_EL0      = encode(0b11, 0b011, 0b1110, 0b1111, 0b110), // RW
+    kPMEVTYPER3_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b011), // RW
+    kPMEVTYPER4_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b100), // RW
+    kPMEVTYPER5_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b101), // RW
+    kPMEVTYPER6_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b110), // RW
+    kPMEVTYPER7_EL0       = encode(0b11, 0b011, 0b1110, 0b1100, 0b111), // RW
+    kPMEVTYPER8_EL0       = encode(0b11, 0b011, 0b1110, 0b1101, 0b000), // RW
+    kPMEVTYPER9_EL0       = encode(0b11, 0b011, 0b1110, 0b1101, 0b001), // RW
+    kPMINTENCLR_EL1       = encode(0b11, 0b000, 0b1001, 0b1110, 0b010), // RW
+    kPMINTENSET_EL1       = encode(0b11, 0b000, 0b1001, 0b1110, 0b001), // RW
+    kPMMIR_EL1            = encode(0b11, 0b000, 0b1001, 0b1110, 0b110), // RW
+    kPMOVSCLR_EL0         = encode(0b11, 0b011, 0b1001, 0b1100, 0b011), // RW
+    kPMOVSSET_EL0         = encode(0b11, 0b011, 0b1001, 0b1110, 0b011), // RW
+    kPMSCR_EL1            = encode(0b11, 0b000, 0b1001, 0b1001, 0b000), // RW
+    kPMSCR_EL12           = encode(0b11, 0b101, 0b1001, 0b1001, 0b000), // RW
+    kPMSCR_EL2            = encode(0b11, 0b100, 0b1001, 0b1001, 0b000), // RW
+    kPMSELR_EL0           = encode(0b11, 0b011, 0b1001, 0b1100, 0b101), // RW
+    kPMSEVFR_EL1          = encode(0b11, 0b000, 0b1001, 0b1001, 0b101), // RW
+    kPMSFCR_EL1           = encode(0b11, 0b000, 0b1001, 0b1001, 0b100), // RW
+    kPMSICR_EL1           = encode(0b11, 0b000, 0b1001, 0b1001, 0b010), // RW
+    kPMSIDR_EL1           = encode(0b11, 0b000, 0b1001, 0b1001, 0b111), // RO
+    kPMSIRR_EL1           = encode(0b11, 0b000, 0b1001, 0b1001, 0b011), // RW
+    kPMSLATFR_EL1         = encode(0b11, 0b000, 0b1001, 0b1001, 0b110), // RW
+    kPMSWINC_EL0          = encode(0b11, 0b011, 0b1001, 0b1100, 0b100), // WO
+    kPMUSERENR_EL0        = encode(0b11, 0b011, 0b1001, 0b1110, 0b000), // RW
+    kPMXEVCNTR_EL0        = encode(0b11, 0b011, 0b1001, 0b1101, 0b010), // RW
+    kPMXEVTYPER_EL0       = encode(0b11, 0b011, 0b1001, 0b1101, 0b001), // RW
+    kREVIDR_EL1           = encode(0b11, 0b000, 0b0000, 0b0000, 0b110), // RO
+    kRGSR_EL1             = encode(0b11, 0b000, 0b0001, 0b0000, 0b101), // RW
+    kRMR_EL1              = encode(0b11, 0b000, 0b1100, 0b0000, 0b010), // RW
+    kRMR_EL2              = encode(0b11, 0b100, 0b1100, 0b0000, 0b010), // RW
+    kRMR_EL3              = encode(0b11, 0b110, 0b1100, 0b0000, 0b010), // RW
+    kRNDR                 = encode(0b11, 0b011, 0b0010, 0b0100, 0b000), // RO
+    kRNDRRS               = encode(0b11, 0b011, 0b0010, 0b0100, 0b001), // RO
+    kRVBAR_EL1            = encode(0b11, 0b000, 0b1100, 0b0000, 0b001), // RO
+    kRVBAR_EL2            = encode(0b11, 0b100, 0b1100, 0b0000, 0b001), // RO
+    kRVBAR_EL3            = encode(0b11, 0b110, 0b1100, 0b0000, 0b001), // RO
+    kSCR_EL3              = encode(0b11, 0b110, 0b0001, 0b0001, 0b000), // RW
+    kSCTLR_EL1            = encode(0b11, 0b000, 0b0001, 0b0000, 0b000), // RW
+    kSCTLR_EL12           = encode(0b11, 0b101, 0b0001, 0b0000, 0b000), // RW
+    kSCTLR_EL2            = encode(0b11, 0b100, 0b0001, 0b0000, 0b000), // RW
+    kSCTLR_EL3            = encode(0b11, 0b110, 0b0001, 0b0000, 0b000), // RW
+    kSCXTNUM_EL0          = encode(0b11, 0b011, 0b1101, 0b0000, 0b111), // RW
+    kSCXTNUM_EL1          = encode(0b11, 0b000, 0b1101, 0b0000, 0b111), // RW
+    kSCXTNUM_EL12         = encode(0b11, 0b101, 0b1101, 0b0000, 0b111), // RW
+    kSCXTNUM_EL2          = encode(0b11, 0b100, 0b1101, 0b0000, 0b111), // RW
+    kSCXTNUM_EL3          = encode(0b11, 0b110, 0b1101, 0b0000, 0b111), // RW
+    kSDER32_EL2           = encode(0b11, 0b100, 0b0001, 0b0011, 0b001), // RW
+    kSDER32_EL3           = encode(0b11, 0b110, 0b0001, 0b0001, 0b001), // RW
+    kSPSR_EL1             = encode(0b11, 0b000, 0b0100, 0b0000, 0b000), // RW
+    kSPSR_EL12            = encode(0b11, 0b101, 0b0100, 0b0000, 0b000), // RW
+    kSPSR_EL2             = encode(0b11, 0b100, 0b0100, 0b0000, 0b000), // RW
+    kSPSR_EL3             = encode(0b11, 0b110, 0b0100, 0b0000, 0b000), // RW
+    kSPSR_abt             = encode(0b11, 0b100, 0b0100, 0b0011, 0b001), // RW
+    kSPSR_fiq             = encode(0b11, 0b100, 0b0100, 0b0011, 0b011), // RW
+    kSPSR_irq             = encode(0b11, 0b100, 0b0100, 0b0011, 0b000), // RW
+    kSPSR_und             = encode(0b11, 0b100, 0b0100, 0b0011, 0b010), // RW
+    kSPSel                = encode(0b11, 0b000, 0b0100, 0b0010, 0b000), // RW
+    kSP_EL0               = encode(0b11, 0b000, 0b0100, 0b0001, 0b000), // RW
+    kSP_EL1               = encode(0b11, 0b100, 0b0100, 0b0001, 0b000), // RW
+    kSP_EL2               = encode(0b11, 0b110, 0b0100, 0b0001, 0b000), // RW
+    kSSBS                 = encode(0b11, 0b011, 0b0100, 0b0010, 0b110), // RW
+    kTCO                  = encode(0b11, 0b011, 0b0100, 0b0010, 0b111), // RW
+    kTCR_EL1              = encode(0b11, 0b000, 0b0010, 0b0000, 0b010), // RW
+    kTCR_EL12             = encode(0b11, 0b101, 0b0010, 0b0000, 0b010), // RW
+    kTCR_EL2              = encode(0b11, 0b100, 0b0010, 0b0000, 0b010), // RW
+    kTCR_EL3              = encode(0b11, 0b110, 0b0010, 0b0000, 0b010), // RW
+    kTEECR32_EL1          = encode(0b10, 0b010, 0b0000, 0b0000, 0b000), // RW
+    kTEEHBR32_EL1         = encode(0b10, 0b010, 0b0001, 0b0000, 0b000), // RW
+    kTFSRE0_EL1           = encode(0b11, 0b000, 0b0101, 0b0110, 0b001), // RW
+    kTFSR_EL1             = encode(0b11, 0b000, 0b0101, 0b0110, 0b000), // RW
+    kTFSR_EL12            = encode(0b11, 0b101, 0b0101, 0b0110, 0b000), // RW
+    kTFSR_EL2             = encode(0b11, 0b100, 0b0101, 0b0110, 0b000), // RW
+    kTFSR_EL3             = encode(0b11, 0b110, 0b0101, 0b0110, 0b000), // RW
+    kTPIDRRO_EL0          = encode(0b11, 0b011, 0b1101, 0b0000, 0b011), // RW
+    kTPIDR_EL0            = encode(0b11, 0b011, 0b1101, 0b0000, 0b010), // RW
+    kTPIDR_EL1            = encode(0b11, 0b000, 0b1101, 0b0000, 0b100), // RW
+    kTPIDR_EL2            = encode(0b11, 0b100, 0b1101, 0b0000, 0b010), // RW
+    kTPIDR_EL3            = encode(0b11, 0b110, 0b1101, 0b0000, 0b010), // RW
+    kTRBBASER_EL1         = encode(0b11, 0b000, 0b1001, 0b1011, 0b010), // RW
+    kTRBIDR_EL1           = encode(0b11, 0b000, 0b1001, 0b1011, 0b111), // RO
+    kTRBLIMITR_EL1        = encode(0b11, 0b000, 0b1001, 0b1011, 0b000), // RW
+    kTRBMAR_EL1           = encode(0b11, 0b000, 0b1001, 0b1011, 0b100), // RW
+    kTRBPTR_EL1           = encode(0b11, 0b000, 0b1001, 0b1011, 0b001), // RW
+    kTRBSR_EL1            = encode(0b11, 0b000, 0b1001, 0b1011, 0b011), // RW
+    kTRBTRG_EL1           = encode(0b11, 0b000, 0b1001, 0b1011, 0b110), // RW
+    kTRCACATR0            = encode(0b10, 0b001, 0b0010, 0b0000, 0b010), // RW
+    kTRCACATR1            = encode(0b10, 0b001, 0b0010, 0b0010, 0b010), // RW
+    kTRCACATR10           = encode(0b10, 0b001, 0b0010, 0b0100, 0b011), // RW
+    kTRCACATR11           = encode(0b10, 0b001, 0b0010, 0b0110, 0b011), // RW
+    kTRCACATR12           = encode(0b10, 0b001, 0b0010, 0b1000, 0b011), // RW
+    kTRCACATR13           = encode(0b10, 0b001, 0b0010, 0b1010, 0b011), // RW
+    kTRCACATR14           = encode(0b10, 0b001, 0b0010, 0b1100, 0b011), // RW
+    kTRCACATR15           = encode(0b10, 0b001, 0b0010, 0b1110, 0b011), // RW
+    kTRCACATR2            = encode(0b10, 0b001, 0b0010, 0b0100, 0b010), // RW
+    kTRCACATR3            = encode(0b10, 0b001, 0b0010, 0b0110, 0b010), // RW
+    kTRCACATR4            = encode(0b10, 0b001, 0b0010, 0b1000, 0b010), // RW
+    kTRCACATR5            = encode(0b10, 0b001, 0b0010, 0b1010, 0b010), // RW
+    kTRCACATR6            = encode(0b10, 0b001, 0b0010, 0b1100, 0b010), // RW
+    kTRCACATR7            = encode(0b10, 0b001, 0b0010, 0b1110, 0b010), // RW
+    kTRCACATR8            = encode(0b10, 0b001, 0b0010, 0b0000, 0b011), // RW
+    kTRCACATR9            = encode(0b10, 0b001, 0b0010, 0b0010, 0b011), // RW
+    kTRCACVR0             = encode(0b10, 0b001, 0b0010, 0b0000, 0b000), // RW
+    kTRCACVR1             = encode(0b10, 0b001, 0b0010, 0b0010, 0b000), // RW
+    kTRCACVR10            = encode(0b10, 0b001, 0b0010, 0b0100, 0b001), // RW
+    kTRCACVR11            = encode(0b10, 0b001, 0b0010, 0b0110, 0b001), // RW
+    kTRCACVR12            = encode(0b10, 0b001, 0b0010, 0b1000, 0b001), // RW
+    kTRCACVR13            = encode(0b10, 0b001, 0b0010, 0b1010, 0b001), // RW
+    kTRCACVR14            = encode(0b10, 0b001, 0b0010, 0b1100, 0b001), // RW
+    kTRCACVR15            = encode(0b10, 0b001, 0b0010, 0b1110, 0b001), // RW
+    kTRCACVR2             = encode(0b10, 0b001, 0b0010, 0b0100, 0b000), // RW
+    kTRCACVR3             = encode(0b10, 0b001, 0b0010, 0b0110, 0b000), // RW
+    kTRCACVR4             = encode(0b10, 0b001, 0b0010, 0b1000, 0b000), // RW
+    kTRCACVR5             = encode(0b10, 0b001, 0b0010, 0b1010, 0b000), // RW
+    kTRCACVR6             = encode(0b10, 0b001, 0b0010, 0b1100, 0b000), // RW
+    kTRCACVR7             = encode(0b10, 0b001, 0b0010, 0b1110, 0b000), // RW
+    kTRCACVR8             = encode(0b10, 0b001, 0b0010, 0b0000, 0b001), // RW
+    kTRCACVR9             = encode(0b10, 0b001, 0b0010, 0b0010, 0b001), // RW
+    kTRCAUTHSTATUS        = encode(0b10, 0b001, 0b0111, 0b1110, 0b110), // RO
+    kTRCAUXCTLR           = encode(0b10, 0b001, 0b0000, 0b0110, 0b000), // RW
+    kTRCBBCTLR            = encode(0b10, 0b001, 0b0000, 0b1111, 0b000), // RW
+    kTRCCCCTLR            = encode(0b10, 0b001, 0b0000, 0b1110, 0b000), // RW
+    kTRCCIDCCTLR0         = encode(0b10, 0b001, 0b0011, 0b0000, 0b010), // RW
+    kTRCCIDCCTLR1         = encode(0b10, 0b001, 0b0011, 0b0001, 0b010), // RW
+    kTRCCIDCVR0           = encode(0b10, 0b001, 0b0011, 0b0000, 0b000), // RW
+    kTRCCIDCVR1           = encode(0b10, 0b001, 0b0011, 0b0010, 0b000), // RW
+    kTRCCIDCVR2           = encode(0b10, 0b001, 0b0011, 0b0100, 0b000), // RW
+    kTRCCIDCVR3           = encode(0b10, 0b001, 0b0011, 0b0110, 0b000), // RW
+    kTRCCIDCVR4           = encode(0b10, 0b001, 0b0011, 0b1000, 0b000), // RW
+    kTRCCIDCVR5           = encode(0b10, 0b001, 0b0011, 0b1010, 0b000), // RW
+    kTRCCIDCVR6           = encode(0b10, 0b001, 0b0011, 0b1100, 0b000), // RW
+    kTRCCIDCVR7           = encode(0b10, 0b001, 0b0011, 0b1110, 0b000), // RW
+    kTRCCIDR0             = encode(0b10, 0b001, 0b0111, 0b1100, 0b111), // RO
+    kTRCCIDR1             = encode(0b10, 0b001, 0b0111, 0b1101, 0b111), // RO
+    kTRCCIDR2             = encode(0b10, 0b001, 0b0111, 0b1110, 0b111), // RO
+    kTRCCIDR3             = encode(0b10, 0b001, 0b0111, 0b1111, 0b111), // RO
+    kTRCCLAIMCLR          = encode(0b10, 0b001, 0b0111, 0b1001, 0b110), // RW
+    kTRCCLAIMSET          = encode(0b10, 0b001, 0b0111, 0b1000, 0b110), // RW
+    kTRCCNTCTLR0          = encode(0b10, 0b001, 0b0000, 0b0100, 0b101), // RW
+    kTRCCNTCTLR1          = encode(0b10, 0b001, 0b0000, 0b0101, 0b101), // RW
+    kTRCCNTCTLR2          = encode(0b10, 0b001, 0b0000, 0b0110, 0b101), // RW
+    kTRCCNTCTLR3          = encode(0b10, 0b001, 0b0000, 0b0111, 0b101), // RW
+    kTRCCNTRLDVR0         = encode(0b10, 0b001, 0b0000, 0b0000, 0b101), // RW
+    kTRCCNTRLDVR1         = encode(0b10, 0b001, 0b0000, 0b0001, 0b101), // RW
+    kTRCCNTRLDVR2         = encode(0b10, 0b001, 0b0000, 0b0010, 0b101), // RW
+    kTRCCNTRLDVR3         = encode(0b10, 0b001, 0b0000, 0b0011, 0b101), // RW
+    kTRCCNTVR0            = encode(0b10, 0b001, 0b0000, 0b1000, 0b101), // RW
+    kTRCCNTVR1            = encode(0b10, 0b001, 0b0000, 0b1001, 0b101), // RW
+    kTRCCNTVR2            = encode(0b10, 0b001, 0b0000, 0b1010, 0b101), // RW
+    kTRCCNTVR3            = encode(0b10, 0b001, 0b0000, 0b1011, 0b101), // RW
+    kTRCCONFIGR           = encode(0b10, 0b001, 0b0000, 0b0100, 0b000), // RW
+    kTRCDEVAFF0           = encode(0b10, 0b001, 0b0111, 0b1010, 0b110), // RO
+    kTRCDEVAFF1           = encode(0b10, 0b001, 0b0111, 0b1011, 0b110), // RO
+    kTRCDEVARCH           = encode(0b10, 0b001, 0b0111, 0b1111, 0b110), // RO
+    kTRCDEVID             = encode(0b10, 0b001, 0b0111, 0b0010, 0b111), // RO
+    kTRCDEVTYPE           = encode(0b10, 0b001, 0b0111, 0b0011, 0b111), // RO
+    kTRCDVCMR0            = encode(0b10, 0b001, 0b0010, 0b0000, 0b110), // RW
+    kTRCDVCMR1            = encode(0b10, 0b001, 0b0010, 0b0100, 0b110), // RW
+    kTRCDVCMR2            = encode(0b10, 0b001, 0b0010, 0b1000, 0b110), // RW
+    kTRCDVCMR3            = encode(0b10, 0b001, 0b0010, 0b1100, 0b110), // RW
+    kTRCDVCMR4            = encode(0b10, 0b001, 0b0010, 0b0000, 0b111), // RW
+    kTRCDVCMR5            = encode(0b10, 0b001, 0b0010, 0b0100, 0b111), // RW
+    kTRCDVCMR6            = encode(0b10, 0b001, 0b0010, 0b1000, 0b111), // RW
+    kTRCDVCMR7            = encode(0b10, 0b001, 0b0010, 0b1100, 0b111), // RW
+    kTRCDVCVR0            = encode(0b10, 0b001, 0b0010, 0b0000, 0b100), // RW
+    kTRCDVCVR1            = encode(0b10, 0b001, 0b0010, 0b0100, 0b100), // RW
+    kTRCDVCVR2            = encode(0b10, 0b001, 0b0010, 0b1000, 0b100), // RW
+    kTRCDVCVR3            = encode(0b10, 0b001, 0b0010, 0b1100, 0b100), // RW
+    kTRCDVCVR4            = encode(0b10, 0b001, 0b0010, 0b0000, 0b101), // RW
+    kTRCDVCVR5            = encode(0b10, 0b001, 0b0010, 0b0100, 0b101), // RW
+    kTRCDVCVR6            = encode(0b10, 0b001, 0b0010, 0b1000, 0b101), // RW
+    kTRCDVCVR7            = encode(0b10, 0b001, 0b0010, 0b1100, 0b101), // RW
+    kTRCEVENTCTL0R        = encode(0b10, 0b001, 0b0000, 0b1000, 0b000), // RW
+    kTRCEVENTCTL1R        = encode(0b10, 0b001, 0b0000, 0b1001, 0b000), // RW
+    kTRCEXTINSELR         = encode(0b10, 0b001, 0b0000, 0b1000, 0b100), // RW
+    kTRCEXTINSELR0        = encode(0b10, 0b001, 0b0000, 0b1000, 0b100), // RW
+    kTRCEXTINSELR1        = encode(0b10, 0b001, 0b0000, 0b1001, 0b100), // RW
+    kTRCEXTINSELR2        = encode(0b10, 0b001, 0b0000, 0b1010, 0b100), // RW
+    kTRCEXTINSELR3        = encode(0b10, 0b001, 0b0000, 0b1011, 0b100), // RW
+    kTRCIDR0              = encode(0b10, 0b001, 0b0000, 0b1000, 0b111), // RO
+    kTRCIDR1              = encode(0b10, 0b001, 0b0000, 0b1001, 0b111), // RO
+    kTRCIDR10             = encode(0b10, 0b001, 0b0000, 0b0010, 0b110), // RO
+    kTRCIDR11             = encode(0b10, 0b001, 0b0000, 0b0011, 0b110), // RO
+    kTRCIDR12             = encode(0b10, 0b001, 0b0000, 0b0100, 0b110), // RO
+    kTRCIDR13             = encode(0b10, 0b001, 0b0000, 0b0101, 0b110), // RO
+    kTRCIDR2              = encode(0b10, 0b001, 0b0000, 0b1010, 0b111), // RO
+    kTRCIDR3              = encode(0b10, 0b001, 0b0000, 0b1011, 0b111), // RO
+    kTRCIDR4              = encode(0b10, 0b001, 0b0000, 0b1100, 0b111), // RO
+    kTRCIDR5              = encode(0b10, 0b001, 0b0000, 0b1101, 0b111), // RO
+    kTRCIDR6              = encode(0b10, 0b001, 0b0000, 0b1110, 0b111), // RO
+    kTRCIDR7              = encode(0b10, 0b001, 0b0000, 0b1111, 0b111), // RO
+    kTRCIDR8              = encode(0b10, 0b001, 0b0000, 0b0000, 0b110), // RO
+    kTRCIDR9              = encode(0b10, 0b001, 0b0000, 0b0001, 0b110), // RO
+    kTRCIMSPEC0           = encode(0b10, 0b001, 0b0000, 0b0000, 0b111), // RW
+    kTRCIMSPEC1           = encode(0b10, 0b001, 0b0000, 0b0001, 0b111), // RW
+    kTRCIMSPEC2           = encode(0b10, 0b001, 0b0000, 0b0010, 0b111), // RW
+    kTRCIMSPEC3           = encode(0b10, 0b001, 0b0000, 0b0011, 0b111), // RW
+    kTRCIMSPEC4           = encode(0b10, 0b001, 0b0000, 0b0100, 0b111), // RW
+    kTRCIMSPEC5           = encode(0b10, 0b001, 0b0000, 0b0101, 0b111), // RW
+    kTRCIMSPEC6           = encode(0b10, 0b001, 0b0000, 0b0110, 0b111), // RW
+    kTRCIMSPEC7           = encode(0b10, 0b001, 0b0000, 0b0111, 0b111), // RW
+    kTRCITCTRL            = encode(0b10, 0b001, 0b0111, 0b0000, 0b100), // RW
+    kTRCLAR               = encode(0b10, 0b001, 0b0111, 0b1100, 0b110), // WO
+    kTRCLSR               = encode(0b10, 0b001, 0b0111, 0b1101, 0b110), // RO
+    kTRCOSLAR             = encode(0b10, 0b001, 0b0001, 0b0000, 0b100), // WO
+    kTRCOSLSR             = encode(0b10, 0b001, 0b0001, 0b0001, 0b100), // RO
+    kTRCPDCR              = encode(0b10, 0b001, 0b0001, 0b0100, 0b100), // RW
+    kTRCPDSR              = encode(0b10, 0b001, 0b0001, 0b0101, 0b100), // RO
+    kTRCPIDR0             = encode(0b10, 0b001, 0b0111, 0b1000, 0b111), // RO
+    kTRCPIDR1             = encode(0b10, 0b001, 0b0111, 0b1001, 0b111), // RO
+    kTRCPIDR2             = encode(0b10, 0b001, 0b0111, 0b1010, 0b111), // RO
+    kTRCPIDR3             = encode(0b10, 0b001, 0b0111, 0b1011, 0b111), // RO
+    kTRCPIDR4             = encode(0b10, 0b001, 0b0111, 0b0100, 0b111), // RO
+    kTRCPIDR5             = encode(0b10, 0b001, 0b0111, 0b0101, 0b111), // RO
+    kTRCPIDR6             = encode(0b10, 0b001, 0b0111, 0b0110, 0b111), // RO
+    kTRCPIDR7             = encode(0b10, 0b001, 0b0111, 0b0111, 0b111), // RO
+    kTRCPRGCTLR           = encode(0b10, 0b001, 0b0000, 0b0001, 0b000), // RW
+    kTRCPROCSELR          = encode(0b10, 0b001, 0b0000, 0b0010, 0b000), // RW
+    kTRCQCTLR             = encode(0b10, 0b001, 0b0000, 0b0001, 0b001), // RW
+    kTRCRSCTLR10          = encode(0b10, 0b001, 0b0001, 0b1010, 0b000), // RW
+    kTRCRSCTLR11          = encode(0b10, 0b001, 0b0001, 0b1011, 0b000), // RW
+    kTRCRSCTLR12          = encode(0b10, 0b001, 0b0001, 0b1100, 0b000), // RW
+    kTRCRSCTLR13          = encode(0b10, 0b001, 0b0001, 0b1101, 0b000), // RW
+    kTRCRSCTLR14          = encode(0b10, 0b001, 0b0001, 0b1110, 0b000), // RW
+    kTRCRSCTLR15          = encode(0b10, 0b001, 0b0001, 0b1111, 0b000), // RW
+    kTRCRSCTLR16          = encode(0b10, 0b001, 0b0001, 0b0000, 0b001), // RW
+    kTRCRSCTLR17          = encode(0b10, 0b001, 0b0001, 0b0001, 0b001), // RW
+    kTRCRSCTLR18          = encode(0b10, 0b001, 0b0001, 0b0010, 0b001), // RW
+    kTRCRSCTLR19          = encode(0b10, 0b001, 0b0001, 0b0011, 0b001), // RW
+    kTRCRSCTLR2           = encode(0b10, 0b001, 0b0001, 0b0010, 0b000), // RW
+    kTRCRSCTLR20          = encode(0b10, 0b001, 0b0001, 0b0100, 0b001), // RW
+    kTRCRSCTLR21          = encode(0b10, 0b001, 0b0001, 0b0101, 0b001), // RW
+    kTRCRSCTLR22          = encode(0b10, 0b001, 0b0001, 0b0110, 0b001), // RW
+    kTRCRSCTLR23          = encode(0b10, 0b001, 0b0001, 0b0111, 0b001), // RW
+    kTRCRSCTLR24          = encode(0b10, 0b001, 0b0001, 0b1000, 0b001), // RW
+    kTRCRSCTLR25          = encode(0b10, 0b001, 0b0001, 0b1001, 0b001), // RW
+    kTRCRSCTLR26          = encode(0b10, 0b001, 0b0001, 0b1010, 0b001), // RW
+    kTRCRSCTLR27          = encode(0b10, 0b001, 0b0001, 0b1011, 0b001), // RW
+    kTRCRSCTLR28          = encode(0b10, 0b001, 0b0001, 0b1100, 0b001), // RW
+    kTRCRSCTLR29          = encode(0b10, 0b001, 0b0001, 0b1101, 0b001), // RW
+    kTRCRSCTLR3           = encode(0b10, 0b001, 0b0001, 0b0011, 0b000), // RW
+    kTRCRSCTLR30          = encode(0b10, 0b001, 0b0001, 0b1110, 0b001), // RW
+    kTRCRSCTLR31          = encode(0b10, 0b001, 0b0001, 0b1111, 0b001), // RW
+    kTRCRSCTLR4           = encode(0b10, 0b001, 0b0001, 0b0100, 0b000), // RW
+    kTRCRSCTLR5           = encode(0b10, 0b001, 0b0001, 0b0101, 0b000), // RW
+    kTRCRSCTLR6           = encode(0b10, 0b001, 0b0001, 0b0110, 0b000), // RW
+    kTRCRSCTLR7           = encode(0b10, 0b001, 0b0001, 0b0111, 0b000), // RW
+    kTRCRSCTLR8           = encode(0b10, 0b001, 0b0001, 0b1000, 0b000), // RW
+    kTRCRSCTLR9           = encode(0b10, 0b001, 0b0001, 0b1001, 0b000), // RW
+    kTRCRSR               = encode(0b10, 0b001, 0b0000, 0b1010, 0b000), // RW
+    kTRCSEQEVR0           = encode(0b10, 0b001, 0b0000, 0b0000, 0b100), // RW
+    kTRCSEQEVR1           = encode(0b10, 0b001, 0b0000, 0b0001, 0b100), // RW
+    kTRCSEQEVR2           = encode(0b10, 0b001, 0b0000, 0b0010, 0b100), // RW
+    kTRCSEQRSTEVR         = encode(0b10, 0b001, 0b0000, 0b0110, 0b100), // RW
+    kTRCSEQSTR            = encode(0b10, 0b001, 0b0000, 0b0111, 0b100), // RW
+    kTRCSSCCR0            = encode(0b10, 0b001, 0b0001, 0b0000, 0b010), // RW
+    kTRCSSCCR1            = encode(0b10, 0b001, 0b0001, 0b0001, 0b010), // RW
+    kTRCSSCCR2            = encode(0b10, 0b001, 0b0001, 0b0010, 0b010), // RW
+    kTRCSSCCR3            = encode(0b10, 0b001, 0b0001, 0b0011, 0b010), // RW
+    kTRCSSCCR4            = encode(0b10, 0b001, 0b0001, 0b0100, 0b010), // RW
+    kTRCSSCCR5            = encode(0b10, 0b001, 0b0001, 0b0101, 0b010), // RW
+    kTRCSSCCR6            = encode(0b10, 0b001, 0b0001, 0b0110, 0b010), // RW
+    kTRCSSCCR7            = encode(0b10, 0b001, 0b0001, 0b0111, 0b010), // RW
+    kTRCSSCSR0            = encode(0b10, 0b001, 0b0001, 0b1000, 0b010), // RW
+    kTRCSSCSR1            = encode(0b10, 0b001, 0b0001, 0b1001, 0b010), // RW
+    kTRCSSCSR2            = encode(0b10, 0b001, 0b0001, 0b1010, 0b010), // RW
+    kTRCSSCSR3            = encode(0b10, 0b001, 0b0001, 0b1011, 0b010), // RW
+    kTRCSSCSR4            = encode(0b10, 0b001, 0b0001, 0b1100, 0b010), // RW
+    kTRCSSCSR5            = encode(0b10, 0b001, 0b0001, 0b1101, 0b010), // RW
+    kTRCSSCSR6            = encode(0b10, 0b001, 0b0001, 0b1110, 0b010), // RW
+    kTRCSSCSR7            = encode(0b10, 0b001, 0b0001, 0b1111, 0b010), // RW
+    kTRCSSPCICR0          = encode(0b10, 0b001, 0b0001, 0b0000, 0b011), // RW
+    kTRCSSPCICR1          = encode(0b10, 0b001, 0b0001, 0b0001, 0b011), // RW
+    kTRCSSPCICR2          = encode(0b10, 0b001, 0b0001, 0b0010, 0b011), // RW
+    kTRCSSPCICR3          = encode(0b10, 0b001, 0b0001, 0b0011, 0b011), // RW
+    kTRCSSPCICR4          = encode(0b10, 0b001, 0b0001, 0b0100, 0b011), // RW
+    kTRCSSPCICR5          = encode(0b10, 0b001, 0b0001, 0b0101, 0b011), // RW
+    kTRCSSPCICR6          = encode(0b10, 0b001, 0b0001, 0b0110, 0b011), // RW
+    kTRCSSPCICR7          = encode(0b10, 0b001, 0b0001, 0b0111, 0b011), // RW
+    kTRCSTALLCTLR         = encode(0b10, 0b001, 0b0000, 0b1011, 0b000), // RW
+    kTRCSTATR             = encode(0b10, 0b001, 0b0000, 0b0011, 0b000), // RO
+    kTRCSYNCPR            = encode(0b10, 0b001, 0b0000, 0b1101, 0b000), // RW
+    kTRCTRACEIDR          = encode(0b10, 0b001, 0b0000, 0b0000, 0b001), // RW
+    kTRCTSCTLR            = encode(0b10, 0b001, 0b0000, 0b1100, 0b000), // RW
+    kTRCVDARCCTLR         = encode(0b10, 0b001, 0b0000, 0b1010, 0b010), // RW
+    kTRCVDCTLR            = encode(0b10, 0b001, 0b0000, 0b1000, 0b010), // RW
+    kTRCVDSACCTLR         = encode(0b10, 0b001, 0b0000, 0b1001, 0b010), // RW
+    kTRCVICTLR            = encode(0b10, 0b001, 0b0000, 0b0000, 0b010), // RW
+    kTRCVIIECTLR          = encode(0b10, 0b001, 0b0000, 0b0001, 0b010), // RW
+    kTRCVIPCSSCTLR        = encode(0b10, 0b001, 0b0000, 0b0011, 0b010), // RW
+    kTRCVISSCTLR          = encode(0b10, 0b001, 0b0000, 0b0010, 0b010), // RW
+    kTRCVMIDCCTLR0        = encode(0b10, 0b001, 0b0011, 0b0010, 0b010), // RW
+    kTRCVMIDCCTLR1        = encode(0b10, 0b001, 0b0011, 0b0011, 0b010), // RW
+    kTRCVMIDCVR0          = encode(0b10, 0b001, 0b0011, 0b0000, 0b001), // RW
+    kTRCVMIDCVR1          = encode(0b10, 0b001, 0b0011, 0b0010, 0b001), // RW
+    kTRCVMIDCVR2          = encode(0b10, 0b001, 0b0011, 0b0100, 0b001), // RW
+    kTRCVMIDCVR3          = encode(0b10, 0b001, 0b0011, 0b0110, 0b001), // RW
+    kTRCVMIDCVR4          = encode(0b10, 0b001, 0b0011, 0b1000, 0b001), // RW
+    kTRCVMIDCVR5          = encode(0b10, 0b001, 0b0011, 0b1010, 0b001), // RW
+    kTRCVMIDCVR6          = encode(0b10, 0b001, 0b0011, 0b1100, 0b001), // RW
+    kTRCVMIDCVR7          = encode(0b10, 0b001, 0b0011, 0b1110, 0b001), // RW
+    kTRFCR_EL1            = encode(0b11, 0b000, 0b0001, 0b0010, 0b001), // RW
+    kTRFCR_EL12           = encode(0b11, 0b101, 0b0001, 0b0010, 0b001), // RW
+    kTRFCR_EL2            = encode(0b11, 0b100, 0b0001, 0b0010, 0b001), // RW
+    kTTBR0_EL1            = encode(0b11, 0b000, 0b0010, 0b0000, 0b000), // RW
+    kTTBR0_EL12           = encode(0b11, 0b101, 0b0010, 0b0000, 0b000), // RW
+    kTTBR0_EL2            = encode(0b11, 0b100, 0b0010, 0b0000, 0b000), // RW
+    kTTBR0_EL3            = encode(0b11, 0b110, 0b0010, 0b0000, 0b000), // RW
+    kTTBR1_EL1            = encode(0b11, 0b000, 0b0010, 0b0000, 0b001), // RW
+    kTTBR1_EL12           = encode(0b11, 0b101, 0b0010, 0b0000, 0b001), // RW
+    kTTBR1_EL2            = encode(0b11, 0b100, 0b0010, 0b0000, 0b001), // RW
+    kUAO                  = encode(0b11, 0b000, 0b0100, 0b0010, 0b100), // RW
+    kVBAR_EL1             = encode(0b11, 0b000, 0b1100, 0b0000, 0b000), // RW
+    kVBAR_EL12            = encode(0b11, 0b101, 0b1100, 0b0000, 0b000), // RW
+    kVBAR_EL2             = encode(0b11, 0b100, 0b1100, 0b0000, 0b000), // RW
+    kVBAR_EL3             = encode(0b11, 0b110, 0b1100, 0b0000, 0b000), // RW
+    kVDISR_EL2            = encode(0b11, 0b100, 0b1100, 0b0001, 0b001), // RW
+    kVMPIDR_EL2           = encode(0b11, 0b100, 0b0000, 0b0000, 0b101), // RW
+    kVNCR_EL2             = encode(0b11, 0b100, 0b0010, 0b0010, 0b000), // RW
+    kVPIDR_EL2            = encode(0b11, 0b100, 0b0000, 0b0000, 0b000), // RW
+    kVSESR_EL2            = encode(0b11, 0b100, 0b0101, 0b0010, 0b011), // RW
+    kVSTCR_EL2            = encode(0b11, 0b100, 0b0010, 0b0110, 0b010), // RW
+    kVSTTBR_EL2           = encode(0b11, 0b100, 0b0010, 0b0110, 0b000), // RW
+    kVTCR_EL2             = encode(0b11, 0b100, 0b0010, 0b0001, 0b010), // RW
+    kVTTBR_EL2            = encode(0b11, 0b100, 0b0010, 0b0001, 0b000), // RW
+    kZCR_EL1              = encode(0b11, 0b000, 0b0001, 0b0010, 0b000), // RW
+    kZCR_EL12             = encode(0b11, 0b101, 0b0001, 0b0010, 0b000), // RW
+    kZCR_EL2              = encode(0b11, 0b100, 0b0001, 0b0010, 0b000), // RW
+    kZCR_EL3              = encode(0b11, 0b110, 0b0001, 0b0010, 0b000)  // RW
+  };
+};
+
+} // {Predicate}
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64GLOBALS_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64instapi.cpp b/lib/lepton/asmjit/arm/a64instapi.cpp
new file mode 100644
index 0000000000..d933d4bd7f
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64instapi.cpp
@@ -0,0 +1,278 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../core/cpuinfo.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64instdb_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::InstInternal - Text
+// ========================
+
+#ifndef ASMJIT_NO_TEXT
+Error InstInternal::instIdToString(Arch arch, InstId instId, String& output) noexcept {
+  uint32_t realId = instId & uint32_t(InstIdParts::kRealId);
+  DebugUtils::unused(arch);
+
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(realId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  const InstDB::InstInfo& info = InstDB::infoById(realId);
+  return output.append(InstDB::_nameData + info._nameDataIndex);
+}
+
+InstId InstInternal::stringToInstId(Arch arch, const char* s, size_t len) noexcept {
+  DebugUtils::unused(arch);
+
+  if (ASMJIT_UNLIKELY(!s))
+    return Inst::kIdNone;
+
+  if (len == SIZE_MAX)
+    len = strlen(s);
+
+  if (ASMJIT_UNLIKELY(len == 0 || len > InstDB::kMaxNameSize))
+    return Inst::kIdNone;
+
+  uint32_t prefix = uint32_t(s[0]) - 'a';
+  if (ASMJIT_UNLIKELY(prefix > 'z' - 'a'))
+    return Inst::kIdNone;
+
+  uint32_t index = InstDB::instNameIndex[prefix].start;
+  if (ASMJIT_UNLIKELY(!index))
+    return Inst::kIdNone;
+
+  const char* nameData = InstDB::_nameData;
+  const InstDB::InstInfo* table = InstDB::_instInfoTable;
+
+  const InstDB::InstInfo* base = table + index;
+  const InstDB::InstInfo* end  = table + InstDB::instNameIndex[prefix].end;
+
+  for (size_t lim = (size_t)(end - base); lim != 0; lim >>= 1) {
+    const InstDB::InstInfo* cur = base + (lim >> 1);
+    int result = Support::cmpInstName(nameData + cur[0]._nameDataIndex, s, len);
+
+    if (result < 0) {
+      base = cur + 1;
+      lim--;
+      continue;
+    }
+
+    if (result > 0)
+      continue;
+
+    return uint32_t((size_t)(cur - table));
+  }
+
+  return Inst::kIdNone;
+}
+#endif // !ASMJIT_NO_TEXT
+
+// a64::InstInternal - Validate
+// ============================
+
+#ifndef ASMJIT_NO_VALIDATION
+ASMJIT_FAVOR_SIZE Error InstInternal::validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept {
+  // TODO:
+  DebugUtils::unused(arch, inst, operands, opCount, validationFlags);
+  return kErrorOk;
+}
+#endif // !ASMJIT_NO_VALIDATION
+
+// a64::InstInternal - QueryRWInfo
+// ===============================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+struct InstRWInfoData {
+  uint8_t rwx[Globals::kMaxOpCount];
+};
+
+static const InstRWInfoData instRWInfoData[] = {
+  #define R uint8_t(OpRWFlags::kRead)
+  #define W uint8_t(OpRWFlags::kWrite)
+  #define X uint8_t(OpRWFlags::kRW)
+
+  {{ R, R, R, R, R, R }}, // kRWI_R
+  {{ R, W, R, R, R, R }}, // kRWI_RW
+  {{ R, X, R, R, R, R }}, // kRWI_RX
+  {{ R, R, W, R, R, R }}, // kRWI_RRW
+  {{ R, W, X, R, R, R }}, // kRWI_RWX
+  {{ W, R, R, R, R, R }}, // kRWI_W
+  {{ W, R, W, R, R, R }}, // kRWI_WRW
+  {{ W, R, X, R, R, R }}, // kRWI_WRX
+  {{ W, R, R, W, R, R }}, // kRWI_WRRW
+  {{ W, R, R, X, R, R }}, // kRWI_WRRX
+  {{ W, W, R, R, R, R }}, // kRWI_WW
+  {{ X, R, R, R, R, R }}, // kRWI_X
+  {{ X, R, X, R, R, R }}, // kRWI_XRX
+  {{ X, X, R, R, X, R }}, // kRWI_XXRRX
+
+  {{ W, R, R, R, R, R }}, // kRWI_LDn
+  {{ R, W, R, R, R, R }}, // kRWI_STn
+  {{ R, R, R, R, R, R }}  // kRWI_TODO
+
+  #undef R
+  #undef W
+  #undef X
+};
+
+static const uint8_t elementTypeSize[8] = { 0, 1, 2, 4, 8, 4, 4, 0 };
+
+Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept {
+  // Unused in Release configuration as the assert is not compiled in.
+  DebugUtils::unused(arch);
+
+  // Only called when `arch` matches X86 family.
+  ASMJIT_ASSERT(Environment::isFamilyARM(arch));
+
+  // Get the instruction data.
+  uint32_t realId = inst.id() & uint32_t(InstIdParts::kRealId);
+
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(realId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  out->_instFlags = InstRWFlags::kNone;
+  out->_opCount = uint8_t(opCount);
+  out->_rmFeature = 0;
+  out->_extraReg.reset();
+  out->_readFlags = CpuRWFlags::kNone; // TODO: [ARM] Read PSTATUS.
+  out->_writeFlags = CpuRWFlags::kNone; // TODO: [ARM] Write PSTATUS
+
+  const InstDB::InstInfo& instInfo = InstDB::_instInfoTable[realId];
+  const InstRWInfoData& rwInfo = instRWInfoData[instInfo.rwInfoIndex()];
+
+  if (instInfo.hasFlag(InstDB::kInstFlagConsecutive) && opCount > 2) {
+    for (uint32_t i = 0; i < opCount; i++) {
+      OpRWInfo& op = out->_operands[i];
+      const Operand_& srcOp = operands[i];
+
+      if (!srcOp.isRegOrMem()) {
+        op.reset();
+        continue;
+      }
+
+      OpRWFlags rwFlags = i < opCount - 1 ? (OpRWFlags)rwInfo.rwx[0] : (OpRWFlags)rwInfo.rwx[1];
+
+      op._opFlags = rwFlags & ~(OpRWFlags::kZExt);
+      op._physId = BaseReg::kIdBad;
+      op._rmSize = 0;
+      op._resetReserved();
+
+      uint64_t rByteMask = op.isRead() ? 0xFFFFFFFFFFFFFFFFu : 0x0000000000000000u;
+      uint64_t wByteMask = op.isWrite() ? 0xFFFFFFFFFFFFFFFFu : 0x0000000000000000u;
+
+      op._readByteMask = rByteMask;
+      op._writeByteMask = wByteMask;
+      op._extendByteMask = 0;
+      op._consecutiveLeadCount = 0;
+
+      if (srcOp.isReg()) {
+        if (i == 0)
+          op._consecutiveLeadCount = uint8_t(opCount - 1);
+        else
+          op.addOpFlags(OpRWFlags::kConsecutive);
+      }
+      else {
+        const Mem& memOp = srcOp.as<Mem>();
+
+        if (memOp.hasBase()) {
+          op.addOpFlags(OpRWFlags::kMemBaseRead);
+        }
+
+        if (memOp.hasIndex()) {
+          op.addOpFlags(OpRWFlags::kMemIndexRead);
+          op.addOpFlags(memOp.isPreOrPost() ? OpRWFlags::kMemIndexWrite : OpRWFlags::kNone);
+        }
+      }
+    }
+  }
+  else {
+    for (uint32_t i = 0; i < opCount; i++) {
+      OpRWInfo& op = out->_operands[i];
+      const Operand_& srcOp = operands[i];
+
+      if (!srcOp.isRegOrMem()) {
+        op.reset();
+        continue;
+      }
+
+      OpRWFlags rwFlags = (OpRWFlags)rwInfo.rwx[i];
+
+      op._opFlags = rwFlags & ~(OpRWFlags::kZExt);
+      op._physId = BaseReg::kIdBad;
+      op._rmSize = 0;
+      op._resetReserved();
+
+      uint64_t rByteMask = op.isRead() ? 0xFFFFFFFFFFFFFFFFu : 0x0000000000000000u;
+      uint64_t wByteMask = op.isWrite() ? 0xFFFFFFFFFFFFFFFFu : 0x0000000000000000u;
+
+      op._readByteMask = rByteMask;
+      op._writeByteMask = wByteMask;
+      op._extendByteMask = 0;
+      op._consecutiveLeadCount = 0;
+
+      if (srcOp.isReg()) {
+        if (srcOp.as<Vec>().hasElementIndex()) {
+          // Only part of the vector is accessed if element index [] is used.
+          uint32_t elementType = srcOp.as<Vec>().elementType();
+          uint32_t elementIndex = srcOp.as<Vec>().elementIndex();
+
+          uint32_t elementSize = elementTypeSize[elementType];
+          uint64_t accessMask = uint64_t(Support::lsbMask<uint32_t>(elementSize)) << (elementIndex * elementSize);
+
+          op._readByteMask &= accessMask;
+          op._writeByteMask &= accessMask;
+        }
+
+        // TODO: [ARM] RW info is not finished.
+      }
+      else {
+        const Mem& memOp = srcOp.as<Mem>();
+
+        if (memOp.hasBase()) {
+          op.addOpFlags(OpRWFlags::kMemBaseRead);
+        }
+
+        if (memOp.hasIndex()) {
+          op.addOpFlags(OpRWFlags::kMemIndexRead);
+          op.addOpFlags(memOp.isPreOrPost() ? OpRWFlags::kMemIndexWrite : OpRWFlags::kNone);
+        }
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+// a64::InstInternal - QueryFeatures
+// =================================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+Error InstInternal::queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept {
+  // TODO: [ARM] QueryFeatures not implemented yet.
+  DebugUtils::unused(arch, inst, operands, opCount, out);
+  return kErrorOk;
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+// a64::InstInternal - Unit
+// ========================
+
+#if defined(ASMJIT_TEST)
+UNIT(arm_inst_api_text) {
+  // TODO:
+}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
diff --git a/lib/lepton/asmjit/arm/a64instapi_p.h b/lib/lepton/asmjit/arm/a64instapi_p.h
new file mode 100644
index 0000000000..320a3e881d
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64instapi_p.h
@@ -0,0 +1,41 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64INSTAPI_P_H_INCLUDED
+#define ASMJIT_ARM_A64INSTAPI_P_H_INCLUDED
+
+#include "../core/inst.h"
+#include "../core/operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+namespace InstInternal {
+
+#ifndef ASMJIT_NO_TEXT
+Error ASMJIT_CDECL instIdToString(Arch arch, InstId instId, String& output) noexcept;
+InstId ASMJIT_CDECL stringToInstId(Arch arch, const char* s, size_t len) noexcept;
+#endif // !ASMJIT_NO_TEXT
+
+#ifndef ASMJIT_NO_VALIDATION
+Error ASMJIT_CDECL validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept;
+#endif // !ASMJIT_NO_VALIDATION
+
+#ifndef ASMJIT_NO_INTROSPECTION
+Error ASMJIT_CDECL queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept;
+Error ASMJIT_CDECL queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept;
+#endif // !ASMJIT_NO_INTROSPECTION
+
+} // {InstInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64INSTAPI_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64instdb.cpp b/lib/lepton/asmjit/arm/a64instdb.cpp
new file mode 100644
index 0000000000..64709b5db0
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64instdb.cpp
@@ -0,0 +1,1957 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../core/codeholder.h"
+#include "../core/support.h"
+#include "../arm/a64instdb_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+namespace InstDB {
+
+// a64::InstDB - InstInfoTable
+// ===========================
+
+// Don't store `_nameDataIndex` if instruction names are disabled. Since some
+// APIs can use `_nameDataIndex` it's much safer if it's zero if it's not used.
+#if defined(ASMJIT_NO_TEXT)
+  #define NAME_DATA_INDEX(x) 0
+#else
+  #define NAME_DATA_INDEX(x) x
+#endif
+
+// Defines an ARM/AArch64 instruction.
+#define INST(id, opcodeEncoding, opcodeData, rwInfoIndex, flags, opcodeDataIndex, nameDataIndex) { \
+  uint32_t(kEncoding##opcodeEncoding),      \
+  uint32_t(opcodeDataIndex),                \
+  0,                                        \
+  uint32_t(NAME_DATA_INDEX(nameDataIndex)), \
+  uint16_t(rwInfoIndex),                    \
+  uint16_t(flags)                           \
+}
+
+#define F(flag) kInstFlag##flag
+
+// TODO: [ARM] Missing Instructions:
+/*
+BLRAA, BLRAAZ, BLRAB, BLRABZ: Branch with Link to Register, with pointer authentication.
+BRAA, BRAAZ, BRAB, BRABZ: Branch to Register, with pointer authentication.
+
+CFP: Control Flow Prediction Restriction by Context: an alias of SYS.
+CPP: Cache Prefetch Prediction Restriction by Context: an alias of SYS.
+DVP: Data Value Prediction Restriction by Context: an alias of SYS.
+PSB CSYNC: Profiling Synchronization Barrier.
+
+ERETAA, ERETAB: Exception Return, with pointer authentication.
+LDAPxxx
+PACIA, PACIA1716, PACIASP, PACIAZ, PACIZA: Pointer Authentication Code for Instruction address, using key A.
+PACIB, PACIB1716, PACIBSP, PACIBZ, PACIZB: Pointer Authentication Code for Instruction address, using key B.
+PRFM (immediate): Prefetch Memory (immediate).
+PRFM (literal): Prefetch Memory (literal).
+PRFM (register): Prefetch Memory (register).
+PRFUM: Prefetch Memory (unscaled offset).
+RETAA, RETAB: Return from subroutine, with pointer authentication.
+RMIF: Rotate, Mask Insert Flags.
+SYSL
+IRG: Insert Random Tag.
+INST_(Irg              , BaseRRR            , (0b1001101011000000000100, kX , kSP, kX , kSP, kX , kZR, true)                        , kRWI_W    , 0                         , 0  , 1   ), // #1
+*/
+const InstInfo _instInfoTable[] = {
+  // +------------------+---------------------+--------------------------------------------------------------------------------------+-----------+---------------------------+----+-----+
+  // | Instruction Id   | Encoding            | Opcode Data                                                                          | RW Info   | Instruction Flags         |DatX|NameX|
+  // +------------------+---------------------+--------------------------------------------------------------------------------------+-----------+---------------------------+----+-----+
+  // ${InstInfo:Begin}
+  INST(None             , None               , (_)                                                                                   , 0         , 0                         , 0  , 0   ), // #0
+  INST(Adc              , BaseRRR            , (0b0001101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 0  , 1   ), // #1
+  INST(Adcs             , BaseRRR            , (0b0011101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 1  , 5   ), // #2
+  INST(Add              , BaseAddSub         , (0b0001011000, 0b0001011001, 0b0010001)                                               , kRWI_W    , 0                         , 0  , 978 ), // #3
+  INST(Addg             , BaseRRII           , (0b1001000110000000000000, kX, kSP, kX, kSP, 6, 4, 16, 4, 0, 10)                      , kRWI_W    , 0                         , 0  , 10  ), // #4
+  INST(Adds             , BaseAddSub         , (0b0101011000, 0b0101011001, 0b0110001)                                               , kRWI_W    , 0                         , 1  , 15  ), // #5
+  INST(Adr              , BaseAdr            , (0b0001000000000000000000, OffsetType::kAArch64_ADR)                                  , kRWI_W    , 0                         , 0  , 25  ), // #6
+  INST(Adrp             , BaseAdr            , (0b1001000000000000000000, OffsetType::kAArch64_ADRP)                                 , kRWI_W    , 0                         , 1  , 29  ), // #7
+  INST(And              , BaseLogical        , (0b0001010000, 0b00100100, 0)                                                         , kRWI_W    , 0                         , 0  , 57  ), // #8
+  INST(Ands             , BaseLogical        , (0b1101010000, 0b11100100, 0)                                                         , kRWI_W    , 0                         , 1  , 61  ), // #9
+  INST(Asr              , BaseShift          , (0b0001101011000000001010, 0b0001001100000000011111, 0)                               , kRWI_W    , 0                         , 0  , 66  ), // #10
+  INST(Asrv             , BaseShift          , (0b0001101011000000001010, 0b0000000000000000000000, 0)                               , kRWI_W    , 0                         , 1  , 70  ), // #11
+  INST(At               , BaseAtDcIcTlbi     , (0b00011111110000, 0b00001111000000, true)                                            , kRWI_RX   , 0                         , 0  , 75  ), // #12
+  INST(Autda            , BaseRR             , (0b11011010110000010001100000000000, kX, kZR, 0, kX, kSP, 5, true)                    , kRWI_X    , 0                         , 0  , 78  ), // #13
+  INST(Autdza           , BaseR              , (0b11011010110000010011101111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 0  , 90  ), // #14
+  INST(Autdb            , BaseRR             , (0b11011010110000010001110000000000, kX, kZR, 0, kX, kSP, 5, true)                    , kRWI_X    , 0                         , 1  , 84  ), // #15
+  INST(Autdzb           , BaseR              , (0b11011010110000010011111111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 1  , 97  ), // #16
+  INST(Autia            , BaseRR             , (0b11011010110000010001000000000000, kX, kZR, 0, kX, kSP, 5, true)                    , kRWI_X    , 0                         , 2  , 104 ), // #17
+  INST(Autia1716        , BaseOp             , (0b11010101000000110010000110011111)                                                  , 0         , 0                         , 0  , 110 ), // #18
+  INST(Autiasp          , BaseOp             , (0b11010101000000110010001110111111)                                                  , 0         , 0                         , 1  , 120 ), // #19
+  INST(Autiaz           , BaseOp             , (0b11010101000000110010001110011111)                                                  , 0         , 0                         , 2  , 128 ), // #20
+  INST(Autib            , BaseRR             , (0b11011010110000010001010000000000, kX, kZR, 0, kX, kSP, 5, true)                    , kRWI_X    , 0                         , 3  , 135 ), // #21
+  INST(Autib1716        , BaseOp             , (0b11010101000000110010000111011111)                                                  , 0         , 0                         , 3  , 141 ), // #22
+  INST(Autibsp          , BaseOp             , (0b11010101000000110010001111111111)                                                  , 0         , 0                         , 4  , 151 ), // #23
+  INST(Autibz           , BaseOp             , (0b11010101000000110010001111011111)                                                  , 0         , 0                         , 5  , 159 ), // #24
+  INST(Autiza           , BaseR              , (0b11011010110000010011001111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 2  , 166 ), // #25
+  INST(Autizb           , BaseR              , (0b11011010110000010011011111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 3  , 173 ), // #26
+  INST(Axflag           , BaseOp             , (0b11010101000000000100000001011111)                                                  , 0         , 0                         , 6  , 180 ), // #27
+  INST(B                , BaseBranchRel      , (0b00010100000000000000000000000000)                                                  , 0         , F(Cond)                   , 0  , 1738), // #28
+  INST(Bfc              , BaseBfc            , (0b00110011000000000000001111100000)                                                  , kRWI_X    , 0                         , 0  , 192 ), // #29
+  INST(Bfi              , BaseBfi            , (0b00110011000000000000000000000000)                                                  , kRWI_X    , 0                         , 0  , 223 ), // #30
+  INST(Bfm              , BaseBfm            , (0b00110011000000000000000000000000)                                                  , kRWI_X    , 0                         , 0  , 2514), // #31
+  INST(Bfxil            , BaseBfx            , (0b00110011000000000000000000000000)                                                  , kRWI_X    , 0                         , 0  , 250 ), // #32
+  INST(Bic              , BaseLogical        , (0b0001010001, 0b00100100, 1)                                                         , kRWI_W    , 0                         , 2  , 256 ), // #33
+  INST(Bics             , BaseLogical        , (0b1101010001, 0b11100100, 1)                                                         , kRWI_W    , 0                         , 3  , 260 ), // #34
+  INST(Bl               , BaseBranchRel      , (0b10010100000000000000000000000000)                                                  , 0         , 0                         , 1  , 2831), // #35
+  INST(Blr              , BaseBranchReg      , (0b11010110001111110000000000000000)                                                  , kRWI_R    , 0                         , 0  , 269 ), // #36
+  INST(Br               , BaseBranchReg      , (0b11010110000111110000000000000000)                                                  , kRWI_R    , 0                         , 1  , 273 ), // #37
+  INST(Brk              , BaseOpImm          , (0b11010100001000000000000000000000, 16, 5)                                           , 0         , 0                         , 0  , 276 ), // #38
+  INST(Cas              , BaseAtomicOp       , (0b1000100010100000011111, kWX, 30, 0)                                                , kRWI_XRX  , 0                         , 0  , 284 ), // #39
+  INST(Casa             , BaseAtomicOp       , (0b1000100011100000011111, kWX, 30, 1)                                                , kRWI_XRX  , 0                         , 1  , 288 ), // #40
+  INST(Casab            , BaseAtomicOp       , (0b0000100011100000011111, kW , 0 , 1)                                                , kRWI_XRX  , 0                         , 2  , 293 ), // #41
+  INST(Casah            , BaseAtomicOp       , (0b0100100011100000011111, kW , 0 , 1)                                                , kRWI_XRX  , 0                         , 3  , 299 ), // #42
+  INST(Casal            , BaseAtomicOp       , (0b1000100011100000111111, kWX, 30, 1)                                                , kRWI_XRX  , 0                         , 4  , 305 ), // #43
+  INST(Casalb           , BaseAtomicOp       , (0b0000100011100000111111, kW , 0 , 1)                                                , kRWI_XRX  , 0                         , 5  , 311 ), // #44
+  INST(Casalh           , BaseAtomicOp       , (0b0100100011100000111111, kW , 0 , 1)                                                , kRWI_XRX  , 0                         , 6  , 318 ), // #45
+  INST(Casb             , BaseAtomicOp       , (0b0000100010100000011111, kW , 0 , 0)                                                , kRWI_XRX  , 0                         , 7  , 325 ), // #46
+  INST(Cash             , BaseAtomicOp       , (0b0100100010100000011111, kW , 0 , 0)                                                , kRWI_XRX  , 0                         , 8  , 330 ), // #47
+  INST(Casl             , BaseAtomicOp       , (0b1000100010100000111111, kWX, 30, 0)                                                , kRWI_XRX  , 0                         , 9  , 335 ), // #48
+  INST(Caslb            , BaseAtomicOp       , (0b0000100010100000111111, kW , 0 , 0)                                                , kRWI_XRX  , 0                         , 10 , 340 ), // #49
+  INST(Caslh            , BaseAtomicOp       , (0b0100100010100000111111, kW , 0 , 0)                                                , kRWI_XRX  , 0                         , 11 , 346 ), // #50
+  INST(Casp             , BaseAtomicCasp     , (0b0000100000100000011111, kWX, 30)                                                   , kRWI_XXRRX, 0                         , 0  , 352 ), // #51
+  INST(Caspa            , BaseAtomicCasp     , (0b0000100001100000011111, kWX, 30)                                                   , kRWI_XXRRX, 0                         , 1  , 357 ), // #52
+  INST(Caspal           , BaseAtomicCasp     , (0b0000100001100000111111, kWX, 30)                                                   , kRWI_XXRRX, 0                         , 2  , 363 ), // #53
+  INST(Caspl            , BaseAtomicCasp     , (0b0000100000100000111111, kWX, 30)                                                   , kRWI_XXRRX, 0                         , 3  , 370 ), // #54
+  INST(Cbnz             , BaseBranchCmp      , (0b00110101000000000000000000000000)                                                  , kRWI_R    , 0                         , 0  , 376 ), // #55
+  INST(Cbz              , BaseBranchCmp      , (0b00110100000000000000000000000000)                                                  , kRWI_R    , 0                         , 1  , 381 ), // #56
+  INST(Ccmn             , BaseCCmp           , (0b00111010010000000000000000000000)                                                  , kRWI_R    , 0                         , 0  , 385 ), // #57
+  INST(Ccmp             , BaseCCmp           , (0b01111010010000000000000000000000)                                                  , kRWI_R    , 0                         , 1  , 650 ), // #58
+  INST(Cfinv            , BaseOp             , (0b11010101000000000100000000011111)                                                  , 0         , 0                         , 7  , 390 ), // #59
+  INST(Cinc             , BaseCInc           , (0b00011010100000000000010000000000)                                                  , kRWI_W    , 0                         , 0  , 396 ), // #60
+  INST(Cinv             , BaseCInc           , (0b01011010100000000000000000000000)                                                  , kRWI_W    , 0                         , 1  , 401 ), // #61
+  INST(Clrex            , BaseOpImm          , (0b11010101000000110011000001011111, 4, 8)                                            , 0         , 0                         , 1  , 406 ), // #62
+  INST(Cls              , BaseRR             , (0b01011010110000000001010000000000, kWX, kZR, 0, kWX, kZR, 5, true)                  , kRWI_W    , 0                         , 4  , 412 ), // #63
+  INST(Clz              , BaseRR             , (0b01011010110000000001000000000000, kWX, kZR, 0, kWX, kZR, 5, true)                  , kRWI_W    , 0                         , 5  , 416 ), // #64
+  INST(Cmn              , BaseCmpCmn         , (0b0101011000, 0b0101011001, 0b0110001)                                               , kRWI_R    , 0                         , 0  , 386 ), // #65
+  INST(Cmp              , BaseCmpCmn         , (0b1101011000, 0b1101011001, 0b1110001)                                               , kRWI_R    , 0                         , 1  , 651 ), // #66
+  INST(Cmpp             , BaseRR             , (0b10111010110000000000000000011111, kX, kSP, 5, kX, kSP, 16, true)                   , kRWI_R    , 0                         , 6  , 430 ), // #67
+  INST(Cneg             , BaseCInc           , (0b01011010100000000000010000000000)                                                  , kRWI_W    , 0                         , 2  , 441 ), // #68
+  INST(Crc32b           , BaseRRR            , (0b0001101011000000010000, kW, kZR, kW, kZR, kW, kZR, false)                          , kRWI_W    , 0                         , 2  , 450 ), // #69
+  INST(Crc32cb          , BaseRRR            , (0b0001101011000000010100, kW, kZR, kW, kZR, kW, kZR, false)                          , kRWI_W    , 0                         , 3  , 457 ), // #70
+  INST(Crc32ch          , BaseRRR            , (0b0001101011000000010101, kW, kZR, kW, kZR, kW, kZR, false)                          , kRWI_W    , 0                         , 4  , 465 ), // #71
+  INST(Crc32cw          , BaseRRR            , (0b0001101011000000010110, kW, kZR, kW, kZR, kW, kZR, false)                          , kRWI_W    , 0                         , 5  , 473 ), // #72
+  INST(Crc32cx          , BaseRRR            , (0b1001101011000000010111, kW, kZR, kW, kZR, kX, kZR, false)                          , kRWI_W    , 0                         , 6  , 481 ), // #73
+  INST(Crc32h           , BaseRRR            , (0b0001101011000000010001, kW, kZR, kW, kZR, kW, kZR, false)                          , kRWI_W    , 0                         , 7  , 489 ), // #74
+  INST(Crc32w           , BaseRRR            , (0b0001101011000000010010, kW, kZR, kW, kZR, kW, kZR, false)                          , kRWI_W    , 0                         , 8  , 496 ), // #75
+  INST(Crc32x           , BaseRRR            , (0b1001101011000000010011, kW, kZR, kW, kZR, kX, kZR, false)                          , kRWI_W    , 0                         , 9  , 503 ), // #76
+  INST(Csdb             , BaseOp             , (0b11010101000000110010001010011111)                                                  , 0         , 0                         , 8  , 510 ), // #77
+  INST(Csel             , BaseCSel           , (0b00011010100000000000000000000000)                                                  , kRWI_W    , 0                         , 0  , 710 ), // #78
+  INST(Cset             , BaseCSet           , (0b00011010100111110000011111100000)                                                  , kRWI_W    , 0                         , 0  , 515 ), // #79
+  INST(Csetm            , BaseCSet           , (0b01011010100111110000001111100000)                                                  , kRWI_W    , 0                         , 1  , 520 ), // #80
+  INST(Csinc            , BaseCSel           , (0b00011010100000000000010000000000)                                                  , kRWI_W    , 0                         , 1  , 526 ), // #81
+  INST(Csinv            , BaseCSel           , (0b01011010100000000000000000000000)                                                  , kRWI_W    , 0                         , 2  , 532 ), // #82
+  INST(Csneg            , BaseCSel           , (0b01011010100000000000010000000000)                                                  , kRWI_W    , 0                         , 3  , 538 ), // #83
+  INST(Dc               , BaseAtDcIcTlbi     , (0b00011110000000, 0b00001110000000, true)                                            , kRWI_RX   , 0                         , 1  , 2   ), // #84
+  INST(Dcps1            , BaseOpImm          , (0b11010100101000000000000000000001, 16, 5)                                           , 0         , 0                         , 2  , 544 ), // #85
+  INST(Dcps2            , BaseOpImm          , (0b11010100101000000000000000000010, 16, 5)                                           , 0         , 0                         , 3  , 550 ), // #86
+  INST(Dcps3            , BaseOpImm          , (0b11010100101000000000000000000011, 16, 5)                                           , 0         , 0                         , 4  , 556 ), // #87
+  INST(Dgh              , BaseOp             , (0b11010101000000110010000011011111)                                                  , 0         , 0                         , 9  , 562 ), // #88
+  INST(Dmb              , BaseOpImm          , (0b11010101000000110011000010111111, 4, 8)                                            , 0         , 0                         , 5  , 566 ), // #89
+  INST(Drps             , BaseOp             , (0b11010110101111110000001111100000)                                                  , 0         , 0                         , 10 , 570 ), // #90
+  INST(Dsb              , BaseOpImm          , (0b11010101000000110011000010011111, 4, 8)                                            , 0         , 0                         , 6  , 575 ), // #91
+  INST(Eon              , BaseLogical        , (0b1001010001, 0b10100100, 1)                                                         , kRWI_W    , 0                         , 4  , 583 ), // #92
+  INST(Eor              , BaseLogical        , (0b1001010000, 0b10100100, 0)                                                         , kRWI_W    , 0                         , 5  , 1418), // #93
+  INST(Esb              , BaseOp             , (0b11010101000000110010001000011111)                                                  , 0         , 0                         , 11 , 597 ), // #94
+  INST(Extr             , BaseExtract        , (0b00010011100000000000000000000000)                                                  , kRWI_W    , 0                         , 0  , 605 ), // #95
+  INST(Eret             , BaseOp             , (0b11010110100111110000001111100000)                                                  , 0         , 0                         , 12 , 592 ), // #96
+  INST(Gmi              , BaseRRR            , (0b1001101011000000000101, kX , kZR, kX , kSP, kX , kZR, true)                        , kRWI_W    , 0                         , 10 , 1128), // #97
+  INST(Hint             , BaseOpImm          , (0b11010101000000110010000000011111, 7, 5)                                            , 0         , 0                         , 7  , 1132), // #98
+  INST(Hlt              , BaseOpImm          , (0b11010100010000000000000000000000, 16, 5)                                           , 0         , 0                         , 8  , 1137), // #99
+  INST(Hvc              , BaseOpImm          , (0b11010100000000000000000000000010, 16, 5)                                           , 0         , 0                         , 9  , 1141), // #100
+  INST(Ic               , BaseAtDcIcTlbi     , (0b00011110000000, 0b00001110000000, false)                                           , kRWI_RX   , 0                         , 2  , 257 ), // #101
+  INST(Isb              , BaseOpImm          , (0b11010101000000110011000011011111, 4, 8)                                            , 0         , 0                         , 10 , 1149), // #102
+  INST(Ldadd            , BaseAtomicOp       , (0b1011100000100000000000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 12 , 1189), // #103
+  INST(Ldadda           , BaseAtomicOp       , (0b1011100010100000000000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 13 , 1195), // #104
+  INST(Ldaddab          , BaseAtomicOp       , (0b0011100010100000000000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 14 , 1202), // #105
+  INST(Ldaddah          , BaseAtomicOp       , (0b0111100010100000000000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 15 , 1210), // #106
+  INST(Ldaddal          , BaseAtomicOp       , (0b1011100011100000000000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 16 , 1218), // #107
+  INST(Ldaddalb         , BaseAtomicOp       , (0b0011100011100000000000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 17 , 1226), // #108
+  INST(Ldaddalh         , BaseAtomicOp       , (0b0111100011100000000000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 18 , 1235), // #109
+  INST(Ldaddb           , BaseAtomicOp       , (0b0011100000100000000000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 19 , 1244), // #110
+  INST(Ldaddh           , BaseAtomicOp       , (0b0111100000100000000000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 20 , 1251), // #111
+  INST(Ldaddl           , BaseAtomicOp       , (0b1011100001100000000000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 21 , 1258), // #112
+  INST(Ldaddlb          , BaseAtomicOp       , (0b0011100001100000000000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 22 , 1265), // #113
+  INST(Ldaddlh          , BaseAtomicOp       , (0b0111100001100000000000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 23 , 1273), // #114
+  INST(Ldar             , BaseRM_NoImm       , (0b1000100011011111111111, kWX, kZR, 30)                                              , kRWI_W    , 0                         , 0  , 1281), // #115
+  INST(Ldarb            , BaseRM_NoImm       , (0b0000100011011111111111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 1  , 1286), // #116
+  INST(Ldarh            , BaseRM_NoImm       , (0b0100100011011111111111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 2  , 1292), // #117
+  INST(Ldaxp            , BaseLdxp           , (0b1000100001111111100000, kWX, 30)                                                   , kRWI_WW   , 0                         , 0  , 1298), // #118
+  INST(Ldaxr            , BaseRM_NoImm       , (0b1000100001011111111111, kWX, kZR, 30)                                              , kRWI_W    , 0                         , 3  , 1304), // #119
+  INST(Ldaxrb           , BaseRM_NoImm       , (0b0000100001011111111111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 4  , 1310), // #120
+  INST(Ldaxrh           , BaseRM_NoImm       , (0b0100100001011111111111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 5  , 1317), // #121
+  INST(Ldclr            , BaseAtomicOp       , (0b1011100000100000000100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 24 , 1324), // #122
+  INST(Ldclra           , BaseAtomicOp       , (0b1011100010100000000100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 25 , 1330), // #123
+  INST(Ldclrab          , BaseAtomicOp       , (0b0011100010100000000100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 26 , 1337), // #124
+  INST(Ldclrah          , BaseAtomicOp       , (0b0111100010100000000100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 27 , 1345), // #125
+  INST(Ldclral          , BaseAtomicOp       , (0b1011100011100000000100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 28 , 1353), // #126
+  INST(Ldclralb         , BaseAtomicOp       , (0b0011100011100000000100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 29 , 1361), // #127
+  INST(Ldclralh         , BaseAtomicOp       , (0b0111100011100000000100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 30 , 1370), // #128
+  INST(Ldclrb           , BaseAtomicOp       , (0b0011100000100000000100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 31 , 1379), // #129
+  INST(Ldclrh           , BaseAtomicOp       , (0b0111100000100000000100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 32 , 1386), // #130
+  INST(Ldclrl           , BaseAtomicOp       , (0b1011100001100000000100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 33 , 1393), // #131
+  INST(Ldclrlb          , BaseAtomicOp       , (0b0011100001100000000100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 34 , 1400), // #132
+  INST(Ldclrlh          , BaseAtomicOp       , (0b0111100001100000000100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 35 , 1408), // #133
+  INST(Ldeor            , BaseAtomicOp       , (0b1011100000100000001000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 36 , 1416), // #134
+  INST(Ldeora           , BaseAtomicOp       , (0b1011100010100000001000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 37 , 1422), // #135
+  INST(Ldeorab          , BaseAtomicOp       , (0b0011100010100000001000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 38 , 1429), // #136
+  INST(Ldeorah          , BaseAtomicOp       , (0b0111100010100000001000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 39 , 1437), // #137
+  INST(Ldeoral          , BaseAtomicOp       , (0b1011100011100000001000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 40 , 1445), // #138
+  INST(Ldeoralb         , BaseAtomicOp       , (0b0011100011100000001000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 41 , 1453), // #139
+  INST(Ldeoralh         , BaseAtomicOp       , (0b0111100011100000001000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 42 , 1462), // #140
+  INST(Ldeorb           , BaseAtomicOp       , (0b0011100000100000001000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 43 , 1471), // #141
+  INST(Ldeorh           , BaseAtomicOp       , (0b0111100000100000001000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 44 , 1478), // #142
+  INST(Ldeorl           , BaseAtomicOp       , (0b1011100001100000001000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 45 , 1485), // #143
+  INST(Ldeorlb          , BaseAtomicOp       , (0b0011100001100000001000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 46 , 1492), // #144
+  INST(Ldeorlh          , BaseAtomicOp       , (0b0111100001100000001000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 47 , 1500), // #145
+  INST(Ldg              , BaseRM_SImm9       , (0b1101100101100000000000, 0b0000000000000000000000, kX , kZR, 0, 4)                  , kRWI_W    , 0                         , 0  , 1508), // #146
+  INST(Ldgm             , BaseRM_NoImm       , (0b1101100111100000000000, kX , kZR, 0 )                                              , kRWI_W    , 0                         , 6  , 1512), // #147
+  INST(Ldlar            , BaseRM_NoImm       , (0b1000100011011111011111, kWX, kZR, 30)                                              , kRWI_W    , 0                         , 7  , 1517), // #148
+  INST(Ldlarb           , BaseRM_NoImm       , (0b0000100011011111011111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 8  , 1523), // #149
+  INST(Ldlarh           , BaseRM_NoImm       , (0b0100100011011111011111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 9  , 1530), // #150
+  INST(Ldnp             , BaseLdpStp         , (0b0010100001, 0           , kWX, 31, 2)                                              , kRWI_WW   , 0                         , 0  , 1537), // #151
+  INST(Ldp              , BaseLdpStp         , (0b0010100101, 0b0010100011, kWX, 31, 2)                                              , kRWI_W    , 0                         , 1  , 1542), // #152
+  INST(Ldpsw            , BaseLdpStp         , (0b0110100101, 0b0110100011, kX , 0 , 2)                                              , kRWI_WW   , 0                         , 2  , 1546), // #153
+  INST(Ldr              , BaseLdSt           , (0b1011100101, 0b10111000010, 0b10111000011, 0b00011000, kWX, 30, 2, Inst::kIdLdur)   , kRWI_W    , 0                         , 0  , 1552), // #154
+  INST(Ldraa            , BaseRM_SImm10      , (0b1111100000100000000001, kX , kZR, 0, 3)                                            , kRWI_W    , 0                         , 0  , 1556), // #155
+  INST(Ldrab            , BaseRM_SImm10      , (0b1111100010100000000001, kX , kZR, 0, 3)                                            , kRWI_W    , 0                         , 1  , 1562), // #156
+  INST(Ldrb             , BaseLdSt           , (0b0011100101, 0b00111000010, 0b00111000011, 0         , kW , 0 , 0, Inst::kIdLdurb)  , kRWI_W    , 0                         , 1  , 1568), // #157
+  INST(Ldrh             , BaseLdSt           , (0b0111100101, 0b01111000010, 0b01111000011, 0         , kW , 0 , 1, Inst::kIdLdurh)  , kRWI_W    , 0                         , 2  , 1573), // #158
+  INST(Ldrsb            , BaseLdSt           , (0b0011100111, 0b00111000100, 0b00111000101, 0         , kWX, 22, 0, Inst::kIdLdursb) , kRWI_W    , 0                         , 3  , 1578), // #159
+  INST(Ldrsh            , BaseLdSt           , (0b0111100110, 0b01111000100, 0b01111000101, 0         , kWX, 22, 1, Inst::kIdLdursh) , kRWI_W    , 0                         , 4  , 1584), // #160
+  INST(Ldrsw            , BaseLdSt           , (0b1011100110, 0b10111000100, 0b10111000101, 0b10011000, kX , 0 , 2, Inst::kIdLdursw) , kRWI_W    , 0                         , 5  , 1590), // #161
+  INST(Ldset            , BaseAtomicOp       , (0b1011100000100000001100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 48 , 1596), // #162
+  INST(Ldseta           , BaseAtomicOp       , (0b1011100010100000001100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 49 , 1602), // #163
+  INST(Ldsetab          , BaseAtomicOp       , (0b0011100010100000001100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 50 , 1609), // #164
+  INST(Ldsetah          , BaseAtomicOp       , (0b0111100010100000001100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 51 , 1617), // #165
+  INST(Ldsetal          , BaseAtomicOp       , (0b1011100011100000001100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 52 , 1625), // #166
+  INST(Ldsetalb         , BaseAtomicOp       , (0b0011100011100000001100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 53 , 1633), // #167
+  INST(Ldsetalh         , BaseAtomicOp       , (0b0111100011100000001100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 54 , 1642), // #168
+  INST(Ldsetb           , BaseAtomicOp       , (0b0011100000100000001100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 55 , 1651), // #169
+  INST(Ldseth           , BaseAtomicOp       , (0b0111100000100000001100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 56 , 1658), // #170
+  INST(Ldsetl           , BaseAtomicOp       , (0b1011100001100000001100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 57 , 1665), // #171
+  INST(Ldsetlb          , BaseAtomicOp       , (0b0011100001100000001100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 58 , 1672), // #172
+  INST(Ldsetlh          , BaseAtomicOp       , (0b0111100001100000001100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 59 , 1680), // #173
+  INST(Ldsmax           , BaseAtomicOp       , (0b1011100000100000010000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 60 , 1688), // #174
+  INST(Ldsmaxa          , BaseAtomicOp       , (0b1011100010100000010000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 61 , 1695), // #175
+  INST(Ldsmaxab         , BaseAtomicOp       , (0b0011100010100000010000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 62 , 1703), // #176
+  INST(Ldsmaxah         , BaseAtomicOp       , (0b0111100010100000010000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 63 , 1712), // #177
+  INST(Ldsmaxal         , BaseAtomicOp       , (0b1011100011100000010000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 64 , 1721), // #178
+  INST(Ldsmaxalb        , BaseAtomicOp       , (0b0011100011100000010000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 65 , 1730), // #179
+  INST(Ldsmaxalh        , BaseAtomicOp       , (0b0111100011100000010000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 66 , 1740), // #180
+  INST(Ldsmaxb          , BaseAtomicOp       , (0b0011100000100000010000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 67 , 1750), // #181
+  INST(Ldsmaxh          , BaseAtomicOp       , (0b0111100000100000010000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 68 , 1758), // #182
+  INST(Ldsmaxl          , BaseAtomicOp       , (0b1011100001100000010000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 69 , 1766), // #183
+  INST(Ldsmaxlb         , BaseAtomicOp       , (0b0011100001100000010000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 70 , 1774), // #184
+  INST(Ldsmaxlh         , BaseAtomicOp       , (0b0111100001100000010000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 71 , 1783), // #185
+  INST(Ldsmin           , BaseAtomicOp       , (0b1011100000100000010100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 72 , 1792), // #186
+  INST(Ldsmina          , BaseAtomicOp       , (0b1011100010100000010100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 73 , 1799), // #187
+  INST(Ldsminab         , BaseAtomicOp       , (0b0011100010100000010100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 74 , 1807), // #188
+  INST(Ldsminah         , BaseAtomicOp       , (0b0111100010100000010100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 75 , 1816), // #189
+  INST(Ldsminal         , BaseAtomicOp       , (0b1011100011100000010100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 76 , 1825), // #190
+  INST(Ldsminalb        , BaseAtomicOp       , (0b0011100011100000010100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 77 , 1834), // #191
+  INST(Ldsminalh        , BaseAtomicOp       , (0b0111100011100000010100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 78 , 1844), // #192
+  INST(Ldsminb          , BaseAtomicOp       , (0b0011100000100000010100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 79 , 1854), // #193
+  INST(Ldsminh          , BaseAtomicOp       , (0b0111100000100000010100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 80 , 1862), // #194
+  INST(Ldsminl          , BaseAtomicOp       , (0b1011100001100000010100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 81 , 1870), // #195
+  INST(Ldsminlb         , BaseAtomicOp       , (0b0011100001100000010100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 82 , 1878), // #196
+  INST(Ldsminlh         , BaseAtomicOp       , (0b0111100001100000010100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 83 , 1887), // #197
+  INST(Ldtr             , BaseRM_SImm9       , (0b1011100001000000000010, 0b0000000000000000000000, kWX, kZR, 30, 0)                 , kRWI_W    , 0                         , 1  , 1896), // #198
+  INST(Ldtrb            , BaseRM_SImm9       , (0b0011100001000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_W    , 0                         , 2  , 1901), // #199
+  INST(Ldtrh            , BaseRM_SImm9       , (0b0111100001000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_W    , 0                         , 3  , 1907), // #200
+  INST(Ldtrsb           , BaseRM_SImm9       , (0b0011100011000000000010, 0b0000000000000000000000, kWX, kZR, 22, 0)                 , kRWI_W    , 0                         , 4  , 1913), // #201
+  INST(Ldtrsh           , BaseRM_SImm9       , (0b0111100011000000000010, 0b0000000000000000000000, kWX, kZR, 22, 0)                 , kRWI_W    , 0                         , 5  , 1920), // #202
+  INST(Ldtrsw           , BaseRM_SImm9       , (0b1011100010000000000010, 0b0000000000000000000000, kX , kZR, 0 , 0)                 , kRWI_W    , 0                         , 6  , 1927), // #203
+  INST(Ldumax           , BaseAtomicOp       , (0b1011100000100000011000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 84 , 1934), // #204
+  INST(Ldumaxa          , BaseAtomicOp       , (0b1011100010100000011000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 85 , 1941), // #205
+  INST(Ldumaxab         , BaseAtomicOp       , (0b0011100010100000011000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 86 , 1949), // #206
+  INST(Ldumaxah         , BaseAtomicOp       , (0b0111100010100000011000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 87 , 1958), // #207
+  INST(Ldumaxal         , BaseAtomicOp       , (0b1011100011100000011000, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 88 , 1967), // #208
+  INST(Ldumaxalb        , BaseAtomicOp       , (0b0011100011100000011000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 89 , 1976), // #209
+  INST(Ldumaxalh        , BaseAtomicOp       , (0b0111100011100000011000, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 90 , 1986), // #210
+  INST(Ldumaxb          , BaseAtomicOp       , (0b0011100000100000011000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 91 , 1996), // #211
+  INST(Ldumaxh          , BaseAtomicOp       , (0b0111100000100000011000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 92 , 2004), // #212
+  INST(Ldumaxl          , BaseAtomicOp       , (0b1011100001100000011000, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 93 , 2012), // #213
+  INST(Ldumaxlb         , BaseAtomicOp       , (0b0011100001100000011000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 94 , 2020), // #214
+  INST(Ldumaxlh         , BaseAtomicOp       , (0b0111100001100000011000, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 95 , 2029), // #215
+  INST(Ldumin           , BaseAtomicOp       , (0b1011100000100000011100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 96 , 2038), // #216
+  INST(Ldumina          , BaseAtomicOp       , (0b1011100010100000011100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 97 , 2045), // #217
+  INST(Lduminab         , BaseAtomicOp       , (0b0011100010100000011100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 98 , 2053), // #218
+  INST(Lduminah         , BaseAtomicOp       , (0b0111100010100000011100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 99 , 2062), // #219
+  INST(Lduminal         , BaseAtomicOp       , (0b1011100011100000011100, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 100, 2071), // #220
+  INST(Lduminalb        , BaseAtomicOp       , (0b0011100011100000011100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 101, 2080), // #221
+  INST(Lduminalh        , BaseAtomicOp       , (0b0111100011100000011100, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 102, 2090), // #222
+  INST(Lduminb          , BaseAtomicOp       , (0b0011100000100000011100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 103, 2100), // #223
+  INST(Lduminh          , BaseAtomicOp       , (0b0111100000100000011100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 104, 2108), // #224
+  INST(Lduminl          , BaseAtomicOp       , (0b1011100001100000011100, kWX, 30, 0)                                                , kRWI_WRX  , 0                         , 105, 2116), // #225
+  INST(Lduminlb         , BaseAtomicOp       , (0b0011100001100000011100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 106, 2124), // #226
+  INST(Lduminlh         , BaseAtomicOp       , (0b0111100001100000011100, kW , 0 , 0)                                                , kRWI_WRX  , 0                         , 107, 2133), // #227
+  INST(Ldur             , BaseRM_SImm9       , (0b1011100001000000000000, 0b0000000000000000000000, kWX, kZR, 30, 0)                 , kRWI_W    , 0                         , 7  , 2142), // #228
+  INST(Ldurb            , BaseRM_SImm9       , (0b0011100001000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_W    , 0                         , 8  , 2147), // #229
+  INST(Ldurh            , BaseRM_SImm9       , (0b0111100001000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_W    , 0                         , 9  , 2153), // #230
+  INST(Ldursb           , BaseRM_SImm9       , (0b0011100011000000000000, 0b0000000000000000000000, kWX, kZR, 22, 0)                 , kRWI_W    , 0                         , 10 , 2159), // #231
+  INST(Ldursh           , BaseRM_SImm9       , (0b0111100011000000000000, 0b0000000000000000000000, kWX, kZR, 22, 0)                 , kRWI_W    , 0                         , 11 , 2166), // #232
+  INST(Ldursw           , BaseRM_SImm9       , (0b1011100010000000000000, 0b0000000000000000000000, kWX, kZR, 0 , 0)                 , kRWI_W    , 0                         , 12 , 2173), // #233
+  INST(Ldxp             , BaseLdxp           , (0b1000100001111111000000, kWX, 30)                                                   , kRWI_WW   , 0                         , 1  , 2180), // #234
+  INST(Ldxr             , BaseRM_NoImm       , (0b1000100001011111011111, kWX, kZR, 30)                                              , kRWI_W    , 0                         , 10 , 2185), // #235
+  INST(Ldxrb            , BaseRM_NoImm       , (0b0000100001011111011111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 11 , 2190), // #236
+  INST(Ldxrh            , BaseRM_NoImm       , (0b0100100001011111011111, kW , kZR, 0 )                                              , kRWI_W    , 0                         , 12 , 2196), // #237
+  INST(Lsl              , BaseShift          , (0b0001101011000000001000, 0b0101001100000000000000, 0)                               , kRWI_W    , 0                         , 2  , 2880), // #238
+  INST(Lslv             , BaseShift          , (0b0001101011000000001000, 0b0000000000000000000000, 0)                               , kRWI_W    , 0                         , 3  , 2202), // #239
+  INST(Lsr              , BaseShift          , (0b0001101011000000001001, 0b0101001100000000011111, 0)                               , kRWI_W    , 0                         , 4  , 2207), // #240
+  INST(Lsrv             , BaseShift          , (0b0001101011000000001001, 0b0000000000000000000000, 0)                               , kRWI_W    , 0                         , 5  , 2211), // #241
+  INST(Madd             , BaseRRRR           , (0b0001101100000000000000, kWX, kZR, kWX, kZR, kWX, kZR, kWX, kZR, true)              , kRWI_W    , 0                         , 0  , 977 ), // #242
+  INST(Mneg             , BaseRRR            , (0b0001101100000000111111, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 11 , 2216), // #243
+  INST(Mov              , BaseMov            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 949 ), // #244
+  INST(Movk             , BaseMovKNZ         , (0b01110010100000000000000000000000)                                                  , kRWI_X    , 0                         , 0  , 2226), // #245
+  INST(Movn             , BaseMovKNZ         , (0b00010010100000000000000000000000)                                                  , kRWI_W    , 0                         , 1  , 2231), // #246
+  INST(Movz             , BaseMovKNZ         , (0b01010010100000000000000000000000)                                                  , kRWI_W    , 0                         , 2  , 2236), // #247
+  INST(Mrs              , BaseMrs            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 2241), // #248
+  INST(Msr              , BaseMsr            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 2245), // #249
+  INST(Msub             , BaseRRRR           , (0b0001101100000000100000, kWX, kZR, kWX, kZR, kWX, kZR, kWX, kZR, true)              , kRWI_W    , 0                         , 1  , 984 ), // #250
+  INST(Mul              , BaseRRR            , (0b0001101100000000011111, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 12 , 991 ), // #251
+  INST(Mvn              , BaseMvnNeg         , (0b00101010001000000000001111100000)                                                  , kRWI_W    , 0                         , 0  , 2249), // #252
+  INST(Neg              , BaseMvnNeg         , (0b01001011000000000000001111100000)                                                  , kRWI_W    , 0                         , 1  , 540 ), // #253
+  INST(Negs             , BaseMvnNeg         , (0b01101011000000000000001111100000)                                                  , kRWI_W    , 0                         , 2  , 2258), // #254
+  INST(Ngc              , BaseRR             , (0b01011010000000000000001111100000, kWX, kZR, 0, kWX, kZR, 16, true)                 , kRWI_W    , 0                         , 7  , 2263), // #255
+  INST(Ngcs             , BaseRR             , (0b01111010000000000000001111100000, kWX, kZR, 0, kWX, kZR, 16, true)                 , kRWI_W    , 0                         , 8  , 2267), // #256
+  INST(Nop              , BaseOp             , (0b11010101000000110010000000011111)                                                  , 0         , 0                         , 13 , 2272), // #257
+  INST(Orn              , BaseLogical        , (0b0101010001, 0b01100100, 1)                                                         , kRWI_W    , 0                         , 6  , 2280), // #258
+  INST(Orr              , BaseLogical        , (0b0101010000, 0b01100100, 0)                                                         , kRWI_W    , 0                         , 7  , 2284), // #259
+  INST(Pacda            , BaseRR             , (0b11011010110000010000100000000000, kX, kZR, 0, kX, kSP, 5, true)                    , kRWI_X    , 0                         , 9  , 2288), // #260
+  INST(Pacdb            , BaseRR             , (0b11011010110000010000110000000000, kX, kZR, 0, kX, kSP, 5, true)                    , kRWI_X    , 0                         , 10 , 2294), // #261
+  INST(Pacdza           , BaseR              , (0b11011010110000010010101111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 4  , 2300), // #262
+  INST(Pacdzb           , BaseR              , (0b11011010110000010010111111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 5  , 2307), // #263
+  INST(Pacga            , BaseRRR            , (0b1001101011000000001100, kX, kZR, kX, kZR, kX, kSP, false)                          , kRWI_W    , 0                         , 13 , 2314), // #264
+  INST(Pssbb            , BaseOp             , (0b11010101000000110011010010011111)                                                  , 0         , 0                         , 14 , 2338), // #265
+  INST(Rbit             , BaseRR             , (0b01011010110000000000000000000000, kWX, kZR, 0, kWX, kZR, 5, true)                  , kRWI_W    , 0                         , 11 , 2364), // #266
+  INST(Ret              , BaseBranchReg      , (0b11010110010111110000000000000000)                                                  , kRWI_R    , 0                         , 2  , 593 ), // #267
+  INST(Rev              , BaseRev            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 2369), // #268
+  INST(Rev16            , BaseRR             , (0b01011010110000000000010000000000, kWX, kZR, 0, kWX, kZR, 5, true)                  , kRWI_W    , 0                         , 12 , 2373), // #269
+  INST(Rev32            , BaseRR             , (0b11011010110000000000100000000000, kWX, kZR, 0, kWX, kZR, 5, true)                  , kRWI_W    , 0                         , 13 , 2379), // #270
+  INST(Rev64            , BaseRR             , (0b11011010110000000000110000000000, kWX, kZR, 0, kWX, kZR, 5, true)                  , kRWI_W    , 0                         , 14 , 2385), // #271
+  INST(Ror              , BaseShift          , (0b0001101011000000001011, 0b0001001110000000000000, 1)                               , kRWI_W    , 0                         , 6  , 2391), // #272
+  INST(Rorv             , BaseShift          , (0b0001101011000000001011, 0b0000000000000000000000, 1)                               , kRWI_W    , 0                         , 7  , 2395), // #273
+  INST(Sbc              , BaseRRR            , (0b0101101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 14 , 2498), // #274
+  INST(Sbcs             , BaseRRR            , (0b0111101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 15 , 2502), // #275
+  INST(Sbfiz            , BaseBfi            , (0b00010011000000000000000000000000)                                                  , kRWI_W    , 0                         , 1  , 2507), // #276
+  INST(Sbfm             , BaseBfm            , (0b00010011000000000000000000000000)                                                  , kRWI_W    , 0                         , 1  , 2513), // #277
+  INST(Sbfx             , BaseBfx            , (0b00010011000000000000000000000000)                                                  , kRWI_W    , 0                         , 1  , 2518), // #278
+  INST(Sdiv             , BaseRRR            , (0b0001101011000000000011, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 16 , 2529), // #279
+  INST(Setf8            , BaseR              , (0b00111010000000000000100000001101, kW, kZR, 5)                                      , 0         , 0                         , 6  , 2541), // #280
+  INST(Setf16           , BaseR              , (0b00111010000000000100100000001101, kW, kZR, 5)                                      , 0         , 0                         , 7  , 2534), // #281
+  INST(Sev              , BaseOp             , (0b11010101000000110010000010011111)                                                  , 0         , 0                         , 15 , 2547), // #282
+  INST(Sevl             , BaseOp             , (0b11010101000000110010000010111111)                                                  , 0         , 0                         , 16 , 2551), // #283
+  INST(Smaddl           , BaseRRRR           , (0b1001101100100000000000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false)             , kRWI_W    , 0                         , 2  , 2758), // #284
+  INST(Smc              , BaseOpImm          , (0b11010100000000000000000000000011, 16, 5)                                           , 0         , 0                         , 11 , 53  ), // #285
+  INST(Smnegl           , BaseRRR            , (0b1001101100100000111111, kX , kZR, kW , kZR, kW , kZR, false)                       , kRWI_W    , 0                         , 17 , 2815), // #286
+  INST(Smsubl           , BaseRRRR           , (0b1001101100100000100000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false)             , kRWI_W    , 0                         , 3  , 2827), // #287
+  INST(Smulh            , BaseRRR            , (0b1001101101000000011111, kX , kZR, kX , kZR, kX , kZR, true)                        , kRWI_W    , 0                         , 18 , 2834), // #288
+  INST(Smull            , BaseRRR            , (0b1001101100100000011111, kX , kZR, kW , kZR, kW , kZR, false)                       , kRWI_W    , 0                         , 19 , 2840), // #289
+  INST(Ssbb             , BaseOp             , (0b11010101000000110011000010011111)                                                  , 0         , 0                         , 17 , 2339), // #290
+  INST(St2g             , BaseRM_SImm9       , (0b1101100110100000000010, 0b1101100110100000000001, kX, kSP, 0, 4)                   , kRWI_RW   , 0                         , 13 , 3164), // #291
+  INST(Stadd            , BaseAtomicSt       , (0b1011100000100000000000, kWX, 30)                                                   , kRWI_RX   , 0                         , 0  , 3177), // #292
+  INST(Staddl           , BaseAtomicSt       , (0b1011100001100000000000, kWX, 30)                                                   , kRWI_RX   , 0                         , 1  , 3197), // #293
+  INST(Staddb           , BaseAtomicSt       , (0b0011100000100000000000, kW , 0 )                                                   , kRWI_RX   , 0                         , 2  , 3183), // #294
+  INST(Staddlb          , BaseAtomicSt       , (0b0011100001100000000000, kW , 0 )                                                   , kRWI_RX   , 0                         , 3  , 3204), // #295
+  INST(Staddh           , BaseAtomicSt       , (0b0111100000100000000000, kW , 0 )                                                   , kRWI_RX   , 0                         , 4  , 3190), // #296
+  INST(Staddlh          , BaseAtomicSt       , (0b0111100001100000000000, kW , 0 )                                                   , kRWI_RX   , 0                         , 5  , 3212), // #297
+  INST(Stclr            , BaseAtomicSt       , (0b1011100000100000000100, kWX, 30)                                                   , kRWI_RX   , 0                         , 6  , 3220), // #298
+  INST(Stclrl           , BaseAtomicSt       , (0b1011100001100000000100, kWX, 30)                                                   , kRWI_RX   , 0                         , 7  , 3240), // #299
+  INST(Stclrb           , BaseAtomicSt       , (0b0011100000100000000100, kW , 0 )                                                   , kRWI_RX   , 0                         , 8  , 3226), // #300
+  INST(Stclrlb          , BaseAtomicSt       , (0b0011100001100000000100, kW , 0 )                                                   , kRWI_RX   , 0                         , 9  , 3247), // #301
+  INST(Stclrh           , BaseAtomicSt       , (0b0111100000100000000100, kW , 0 )                                                   , kRWI_RX   , 0                         , 10 , 3233), // #302
+  INST(Stclrlh          , BaseAtomicSt       , (0b0111100001100000000100, kW , 0 )                                                   , kRWI_RX   , 0                         , 11 , 3255), // #303
+  INST(Steor            , BaseAtomicSt       , (0b1011100000100000001000, kWX, 30)                                                   , kRWI_RX   , 0                         , 12 , 3263), // #304
+  INST(Steorl           , BaseAtomicSt       , (0b1011100001100000001000, kWX, 30)                                                   , kRWI_RX   , 0                         , 13 , 3283), // #305
+  INST(Steorb           , BaseAtomicSt       , (0b0011100000100000001000, kW , 0 )                                                   , kRWI_RX   , 0                         , 14 , 3269), // #306
+  INST(Steorlb          , BaseAtomicSt       , (0b0011100001100000001000, kW , 0 )                                                   , kRWI_RX   , 0                         , 15 , 3290), // #307
+  INST(Steorh           , BaseAtomicSt       , (0b0111100000100000001000, kW , 0 )                                                   , kRWI_RX   , 0                         , 16 , 3276), // #308
+  INST(Steorlh          , BaseAtomicSt       , (0b0111100001100000001000, kW , 0 )                                                   , kRWI_RX   , 0                         , 17 , 3298), // #309
+  INST(Stg              , BaseRM_SImm9       , (0b1101100100100000000010, 0b1101100100100000000001, kX, kSP, 0, 4)                   , kRWI_RW   , 0                         , 14 , 3306), // #310
+  INST(Stgm             , BaseRM_NoImm       , (0b1101100110100000000000, kX , kZR, 0 )                                              , kRWI_RW   , 0                         , 13 , 3310), // #311
+  INST(Stgp             , BaseLdpStp         , (0b0110100100, 0b0110100010, kX, 0, 4)                                                , kRWI_RRW  , 0                         , 3  , 3315), // #312
+  INST(Stllr            , BaseRM_NoImm       , (0b1000100010011111011111, kWX, kZR, 30)                                              , kRWI_RW   , 0                         , 14 , 3320), // #313
+  INST(Stllrb           , BaseRM_NoImm       , (0b0000100010011111011111, kW , kZR, 0 )                                              , kRWI_RW   , 0                         , 15 , 3326), // #314
+  INST(Stllrh           , BaseRM_NoImm       , (0b0100100010011111011111, kW , kZR, 0 )                                              , kRWI_RW   , 0                         , 16 , 3333), // #315
+  INST(Stlr             , BaseRM_NoImm       , (0b1000100010011111111111, kWX, kZR, 30)                                              , kRWI_RW   , 0                         , 17 , 3340), // #316
+  INST(Stlrb            , BaseRM_NoImm       , (0b0000100010011111111111, kW , kZR, 0 )                                              , kRWI_RW   , 0                         , 18 , 3345), // #317
+  INST(Stlrh            , BaseRM_NoImm       , (0b0100100010011111111111, kW , kZR, 0 )                                              , kRWI_RW   , 0                         , 19 , 3351), // #318
+  INST(Stlxp            , BaseStxp           , (0b1000100000100000100000, kWX, 30)                                                   , kRWI_WRRX , 0                         , 0  , 3357), // #319
+  INST(Stlxr            , BaseAtomicOp       , (0b1000100000000000111111, kWX, 30, 1)                                                , kRWI_WRX  , 0                         , 108, 3363), // #320
+  INST(Stlxrb           , BaseAtomicOp       , (0b0000100000000000111111, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 109, 3369), // #321
+  INST(Stlxrh           , BaseAtomicOp       , (0b0100100000000000111111, kW , 0 , 1)                                                , kRWI_WRX  , 0                         , 110, 3376), // #322
+  INST(Stnp             , BaseLdpStp         , (0b0010100000, 0           , kWX, 31, 2)                                              , kRWI_RRW  , 0                         , 4  , 3383), // #323
+  INST(Stp              , BaseLdpStp         , (0b0010100100, 0b0010100010, kWX, 31, 2)                                              , kRWI_RRW  , 0                         , 5  , 3388), // #324
+  INST(Str              , BaseLdSt           , (0b1011100100, 0b10111000000, 0b10111000001, 0         , kWX, 30, 2, Inst::kIdStur)   , kRWI_RW   , 0                         , 6  , 3392), // #325
+  INST(Strb             , BaseLdSt           , (0b0011100100, 0b00111000000, 0b00111000001, 0         , kW , 30, 0, Inst::kIdSturb)  , kRWI_RW   , 0                         , 7  , 3396), // #326
+  INST(Strh             , BaseLdSt           , (0b0111100100, 0b01111000000, 0b01111000001, 0         , kWX, 30, 1, Inst::kIdSturh)  , kRWI_RW   , 0                         , 8  , 3401), // #327
+  INST(Stset            , BaseAtomicSt       , (0b1011100000100000001100, kWX, 30)                                                   , kRWI_RX   , 0                         , 18 , 3406), // #328
+  INST(Stsetl           , BaseAtomicSt       , (0b1011100001100000001100, kWX, 30)                                                   , kRWI_RX   , 0                         , 19 , 3426), // #329
+  INST(Stsetb           , BaseAtomicSt       , (0b0011100000100000001100, kW , 0 )                                                   , kRWI_RX   , 0                         , 20 , 3412), // #330
+  INST(Stsetlb          , BaseAtomicSt       , (0b0011100001100000001100, kW , 0 )                                                   , kRWI_RX   , 0                         , 21 , 3433), // #331
+  INST(Stseth           , BaseAtomicSt       , (0b0111100000100000001100, kW , 0 )                                                   , kRWI_RX   , 0                         , 22 , 3419), // #332
+  INST(Stsetlh          , BaseAtomicSt       , (0b0111100001100000001100, kW , 0 )                                                   , kRWI_RX   , 0                         , 23 , 3441), // #333
+  INST(Stsmax           , BaseAtomicSt       , (0b1011100000100000010000, kWX, 30)                                                   , kRWI_RX   , 0                         , 24 , 3449), // #334
+  INST(Stsmaxl          , BaseAtomicSt       , (0b1011100001100000010000, kWX, 30)                                                   , kRWI_RX   , 0                         , 25 , 3472), // #335
+  INST(Stsmaxb          , BaseAtomicSt       , (0b0011100000100000010000, kW , 0 )                                                   , kRWI_RX   , 0                         , 26 , 3456), // #336
+  INST(Stsmaxlb         , BaseAtomicSt       , (0b0011100001100000010000, kW , 0 )                                                   , kRWI_RX   , 0                         , 27 , 3480), // #337
+  INST(Stsmaxh          , BaseAtomicSt       , (0b0111100000100000010000, kW , 0 )                                                   , kRWI_RX   , 0                         , 28 , 3464), // #338
+  INST(Stsmaxlh         , BaseAtomicSt       , (0b0111100001100000010000, kW , 0 )                                                   , kRWI_RX   , 0                         , 29 , 3489), // #339
+  INST(Stsmin           , BaseAtomicSt       , (0b1011100000100000010100, kWX, 30)                                                   , kRWI_RX   , 0                         , 30 , 3498), // #340
+  INST(Stsminl          , BaseAtomicSt       , (0b1011100001100000010100, kWX, 30)                                                   , kRWI_RX   , 0                         , 31 , 3521), // #341
+  INST(Stsminb          , BaseAtomicSt       , (0b0011100000100000010100, kW , 0 )                                                   , kRWI_RX   , 0                         , 32 , 3505), // #342
+  INST(Stsminlb         , BaseAtomicSt       , (0b0011100001100000010100, kW , 0 )                                                   , kRWI_RX   , 0                         , 33 , 3529), // #343
+  INST(Stsminh          , BaseAtomicSt       , (0b0111100000100000010100, kW , 0 )                                                   , kRWI_RX   , 0                         , 34 , 3513), // #344
+  INST(Stsminlh         , BaseAtomicSt       , (0b0111100001100000010100, kW , 0 )                                                   , kRWI_RX   , 0                         , 35 , 3538), // #345
+  INST(Sttr             , BaseRM_SImm9       , (0b1011100000000000000010, 0b0000000000000000000000, kWX, kZR, 30, 0)                 , kRWI_RW   , 0                         , 15 , 3547), // #346
+  INST(Sttrb            , BaseRM_SImm9       , (0b0011100000000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_RW   , 0                         , 16 , 3552), // #347
+  INST(Sttrh            , BaseRM_SImm9       , (0b0111100000000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_RW   , 0                         , 17 , 3558), // #348
+  INST(Stumax           , BaseAtomicSt       , (0b1011100000100000011000, kWX, 30)                                                   , kRWI_RX   , 0                         , 36 , 3564), // #349
+  INST(Stumaxl          , BaseAtomicSt       , (0b1011100001100000011000, kWX, 30)                                                   , kRWI_RX   , 0                         , 37 , 3587), // #350
+  INST(Stumaxb          , BaseAtomicSt       , (0b0011100000100000011000, kW , 0 )                                                   , kRWI_RX   , 0                         , 38 , 3571), // #351
+  INST(Stumaxlb         , BaseAtomicSt       , (0b0011100001100000011000, kW , 0 )                                                   , kRWI_RX   , 0                         , 39 , 3595), // #352
+  INST(Stumaxh          , BaseAtomicSt       , (0b0111100000100000011000, kW , 0 )                                                   , kRWI_RX   , 0                         , 40 , 3579), // #353
+  INST(Stumaxlh         , BaseAtomicSt       , (0b0111100001100000011000, kW , 0 )                                                   , kRWI_RX   , 0                         , 41 , 3604), // #354
+  INST(Stumin           , BaseAtomicSt       , (0b1011100000100000011100, kWX, 30)                                                   , kRWI_RX   , 0                         , 42 , 3613), // #355
+  INST(Stuminl          , BaseAtomicSt       , (0b1011100001100000011100, kWX, 30)                                                   , kRWI_RX   , 0                         , 43 , 3636), // #356
+  INST(Stuminb          , BaseAtomicSt       , (0b0011100000100000011100, kW , 0 )                                                   , kRWI_RX   , 0                         , 44 , 3620), // #357
+  INST(Stuminlb         , BaseAtomicSt       , (0b0011100001100000011100, kW , 0 )                                                   , kRWI_RX   , 0                         , 45 , 3644), // #358
+  INST(Stuminh          , BaseAtomicSt       , (0b0111100000100000011100, kW , 0 )                                                   , kRWI_RX   , 0                         , 46 , 3628), // #359
+  INST(Stuminlh         , BaseAtomicSt       , (0b0111100001100000011100, kW , 0 )                                                   , kRWI_RX   , 0                         , 47 , 3653), // #360
+  INST(Stur             , BaseRM_SImm9       , (0b1011100000000000000000, 0b0000000000000000000000, kWX, kZR, 30, 0)                 , kRWI_RW   , 0                         , 18 , 3662), // #361
+  INST(Sturb            , BaseRM_SImm9       , (0b0011100000000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_RW   , 0                         , 19 , 3667), // #362
+  INST(Sturh            , BaseRM_SImm9       , (0b0111100000000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0)                 , kRWI_RW   , 0                         , 20 , 3673), // #363
+  INST(Stxp             , BaseStxp           , (0b1000100000100000000000, kWX, 30)                                                   , kRWI_WRRW , 0                         , 1  , 3679), // #364
+  INST(Stxr             , BaseStx            , (0b1000100000000000011111, kWX, 30)                                                   , kRWI_WRW  , 0                         , 0  , 3684), // #365
+  INST(Stxrb            , BaseStx            , (0b0000100000000000011111, kW , 0 )                                                   , kRWI_WRW  , 0                         , 1  , 3689), // #366
+  INST(Stxrh            , BaseStx            , (0b0100100000000000011111, kW , 0 )                                                   , kRWI_WRW  , 0                         , 2  , 3695), // #367
+  INST(Stz2g            , BaseRM_SImm9       , (0b1101100111100000000010, 0b1101100111100000000001, kX , kSP, 0, 4)                  , kRWI_RW   , 0                         , 21 , 3701), // #368
+  INST(Stzg             , BaseRM_SImm9       , (0b1101100101100000000010, 0b1101100101100000000001, kX , kSP, 0, 4)                  , kRWI_RW   , 0                         , 22 , 3707), // #369
+  INST(Stzgm            , BaseRM_NoImm       , (0b1101100100100000000000, kX , kZR, 0)                                               , kRWI_RW   , 0                         , 20 , 3712), // #370
+  INST(Sub              , BaseAddSub         , (0b1001011000, 0b1001011001, 0b1010001)                                               , kRWI_X    , 0                         , 2  , 985 ), // #371
+  INST(Subg             , BaseRRII           , (0b1101000110000000000000, kX, kSP, kX, kSP, 6, 4, 16, 4, 0, 10)                      , kRWI_W    , 0                         , 1  , 3718), // #372
+  INST(Subp             , BaseRRR            , (0b1001101011000000000000, kX, kZR, kX, kSP, kX, kSP, false)                          , kRWI_W    , 0                         , 20 , 3723), // #373
+  INST(Subps            , BaseRRR            , (0b1011101011000000000000, kX, kZR, kX, kSP, kX, kSP, false)                          , kRWI_W    , 0                         , 21 , 3728), // #374
+  INST(Subs             , BaseAddSub         , (0b1101011000, 0b1101011001, 0b1110001)                                               , kRWI_X    , 0                         , 3  , 3734), // #375
+  INST(Svc              , BaseOpImm          , (0b11010100000000000000000000000001, 16, 5)                                           , 0         , 0                         , 12 , 3752), // #376
+  INST(Swp              , BaseAtomicOp       , (0b1011100000100000100000, kWX, 30, 1)                                                , kRWI_RWX  , 0                         , 111, 3756), // #377
+  INST(Swpa             , BaseAtomicOp       , (0b1011100010100000100000, kWX, 30, 1)                                                , kRWI_RWX  , 0                         , 112, 3760), // #378
+  INST(Swpab            , BaseAtomicOp       , (0b0011100010100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 113, 3765), // #379
+  INST(Swpah            , BaseAtomicOp       , (0b0111100010100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 114, 3771), // #380
+  INST(Swpal            , BaseAtomicOp       , (0b1011100011100000100000, kWX, 30, 1)                                                , kRWI_RWX  , 0                         , 115, 3777), // #381
+  INST(Swpalb           , BaseAtomicOp       , (0b0011100011100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 116, 3783), // #382
+  INST(Swpalh           , BaseAtomicOp       , (0b0111100011100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 117, 3790), // #383
+  INST(Swpb             , BaseAtomicOp       , (0b0011100000100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 118, 3797), // #384
+  INST(Swph             , BaseAtomicOp       , (0b0111100000100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 119, 3802), // #385
+  INST(Swpl             , BaseAtomicOp       , (0b1011100001100000100000, kWX, 30, 1)                                                , kRWI_RWX  , 0                         , 120, 3807), // #386
+  INST(Swplb            , BaseAtomicOp       , (0b0011100001100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 121, 3812), // #387
+  INST(Swplh            , BaseAtomicOp       , (0b0111100001100000100000, kW , 0 , 1)                                                , kRWI_RWX  , 0                         , 122, 3818), // #388
+  INST(Sxtb             , BaseExtend         , (0b0001001100000000000111, kWX, 0)                                                    , kRWI_W    , 0                         , 0  , 3824), // #389
+  INST(Sxth             , BaseExtend         , (0b0001001100000000001111, kWX, 0)                                                    , kRWI_W    , 0                         , 1  , 3829), // #390
+  INST(Sxtw             , BaseExtend         , (0b1001001101000000011111, kX , 0)                                                    , kRWI_W    , 0                         , 2  , 3845), // #391
+  INST(Sys              , BaseSys            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 3850), // #392
+  INST(Tlbi             , BaseAtDcIcTlbi     , (0b00011110000000, 0b00010000000000, false)                                           , kRWI_RX   , 0                         , 3  , 3871), // #393
+  INST(Tst              , BaseTst            , (0b1101010000, 0b111001000)                                                           , kRWI_R    , 0                         , 0  , 437 ), // #394
+  INST(Tbnz             , BaseBranchTst      , (0b00110111000000000000000000000000)                                                  , kRWI_R    , 0                         , 0  , 3858), // #395
+  INST(Tbz              , BaseBranchTst      , (0b00110110000000000000000000000000)                                                  , kRWI_R    , 0                         , 1  , 3867), // #396
+  INST(Ubfiz            , BaseBfi            , (0b01010011000000000000000000000000)                                                  , kRWI_W    , 0                         , 2  , 3969), // #397
+  INST(Ubfm             , BaseBfm            , (0b01010011000000000000000000000000)                                                  , kRWI_W    , 0                         , 2  , 3975), // #398
+  INST(Ubfx             , BaseBfx            , (0b01010011000000000000000000000000)                                                  , kRWI_W    , 0                         , 2  , 3980), // #399
+  INST(Udf              , BaseOpImm          , (0b00000000000000000000000000000000, 16, 0)                                           , 0         , 0                         , 13 , 3991), // #400
+  INST(Udiv             , BaseRRR            , (0b0001101011000000000010, kWX, kZR, kWX, kZR, kWX, kZR, true)                        , kRWI_W    , 0                         , 22 , 3995), // #401
+  INST(Umaddl           , BaseRRRR           , (0b1001101110100000000000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false)             , kRWI_W    , 0                         , 4  , 4012), // #402
+  INST(Umnegl           , BaseRRR            , (0b1001101110100000111111, kX , kZR, kW , kZR, kW , kZR, false)                       , kRWI_W    , 0                         , 23 , 4075), // #403
+  INST(Umull            , BaseRRR            , (0b1001101110100000011111, kX , kZR, kW , kZR, kW , kZR, false)                       , kRWI_W    , 0                         , 24 , 4100), // #404
+  INST(Umulh            , BaseRRR            , (0b1001101111000000011111, kX , kZR, kX , kZR, kX , kZR, false)                       , kRWI_W    , 0                         , 25 , 4094), // #405
+  INST(Umsubl           , BaseRRRR           , (0b1001101110100000100000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false)             , kRWI_W    , 0                         , 5  , 4087), // #406
+  INST(Uxtb             , BaseExtend         , (0b0101001100000000000111, kW, 1)                                                     , kRWI_W    , 0                         , 3  , 4291), // #407
+  INST(Uxth             , BaseExtend         , (0b0101001100000000001111, kW, 1)                                                     , kRWI_W    , 0                         , 4  , 4296), // #408
+  INST(Wfe              , BaseOp             , (0b11010101000000110010000001011111)                                                  , 0         , 0                         , 18 , 4322), // #409
+  INST(Wfi              , BaseOp             , (0b11010101000000110010000001111111)                                                  , 0         , 0                         , 19 , 4326), // #410
+  INST(Xaflag           , BaseOp             , (0b11010101000000000100000000111111)                                                  , 0         , 0                         , 20 , 4330), // #411
+  INST(Xpacd            , BaseR              , (0b11011010110000010100011111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 8  , 4341), // #412
+  INST(Xpaci            , BaseR              , (0b11011010110000010100001111100000, kX, kZR, 0)                                      , kRWI_X    , 0                         , 9  , 4347), // #413
+  INST(Xpaclri          , BaseOp             , (0b11010101000000110010000011111111)                                                  , kRWI_X    , 0                         , 21 , 4353), // #414
+  INST(Yield            , BaseOp             , (0b11010101000000110010000000111111)                                                  , 0         , 0                         , 22 , 4361), // #415
+  INST(Abs_v            , ISimdVV            , (0b0000111000100000101110, kVO_V_Any)                                                 , kRWI_W    , 0                         , 0  , 2855), // #416
+  INST(Add_v            , ISimdVVV           , (0b0000111000100000100001, kVO_V_Any)                                                 , kRWI_W    , 0                         , 0  , 978 ), // #417
+  INST(Addhn_v          , ISimdVVV           , (0b0000111000100000010000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Narrow)                 , 1  , 2345), // #418
+  INST(Addhn2_v         , ISimdVVV           , (0b0100111000100000010000, kVO_V_B16H8S4)                                             , kRWI_W    , F(Narrow)                 , 2  , 2352), // #419
+  INST(Addp_v           , ISimdPair          , (0b0101111000110001101110, 0b0000111000100000101111, kVO_V_Any)                       , kRWI_W    , F(Pair)                   , 0  , 638 ), // #420
+  INST(Addv_v           , ISimdSV            , (0b0000111000110001101110, kVO_V_BH_4S)                                               , kRWI_W    , 0                         , 0  , 20  ), // #421
+  INST(Aesd_v           , ISimdVVx           , (0b0100111000101000010110, kOp_V16B, kOp_V16B)                                        , kRWI_X    , 0                         , 0  , 34  ), // #422
+  INST(Aese_v           , ISimdVVx           , (0b0100111000101000010010, kOp_V16B, kOp_V16B)                                        , kRWI_X    , 0                         , 1  , 39  ), // #423
+  INST(Aesimc_v         , ISimdVVx           , (0b0100111000101000011110, kOp_V16B, kOp_V16B)                                        , kRWI_W    , 0                         , 2  , 44  ), // #424
+  INST(Aesmc_v          , ISimdVVx           , (0b0100111000101000011010, kOp_V16B, kOp_V16B)                                        , kRWI_W    , 0                         , 3  , 51  ), // #425
+  INST(And_v            , ISimdVVV           , (0b0000111000100000000111, kVO_V_B)                                                   , kRWI_W    , 0                         , 3  , 57  ), // #426
+  INST(Bcax_v           , ISimdVVVV          , (0b1100111000100000000000, kVO_V_B16)                                                 , kRWI_W    , 0                         , 0  , 187 ), // #427
+  INST(Bfcvt_v          , ISimdVVx           , (0b0001111001100011010000, kOp_H, kOp_S)                                              , kRWI_W    , 0                         , 4  , 196 ), // #428
+  INST(Bfcvtn_v         , ISimdVVx           , (0b0000111010100001011010, kOp_V4H, kOp_V4S)                                          , kRWI_W    , F(Narrow)                 , 5  , 202 ), // #429
+  INST(Bfcvtn2_v        , ISimdVVx           , (0b0100111010100001011010, kOp_V8H, kOp_V4S)                                          , kRWI_W    , F(Narrow)                 , 6  , 209 ), // #430
+  INST(Bfdot_v          , SimdDot            , (0b0010111001000000111111, 0b0000111101000000111100, kET_S, kET_H, kET_2H)            , kRWI_X    , 0                         , 0  , 217 ), // #431
+  INST(Bfmlalb_v        , SimdFmlal          , (0b0010111011000000111111, 0b0000111111000000111100, 0, kET_S, kET_H, kET_H)          , kRWI_X    , F(VH0_15)                 , 0  , 227 ), // #432
+  INST(Bfmlalt_v        , SimdFmlal          , (0b0110111011000000111111, 0b0100111111000000111100, 0, kET_S, kET_H, kET_H)          , kRWI_X    , F(VH0_15)                 , 1  , 235 ), // #433
+  INST(Bfmmla_v         , ISimdVVVx          , (0b0110111001000000111011, kOp_V4S, kOp_V8H, kOp_V8H)                                 , kRWI_X    , F(Long)                   , 0  , 243 ), // #434
+  INST(Bic_v            , SimdBicOrr         , (0b0000111001100000000111, 0b0010111100000000000001)                                  , kRWI_W    , 0                         , 0  , 256 ), // #435
+  INST(Bif_v            , ISimdVVV           , (0b0010111011100000000111, kVO_V_B)                                                   , kRWI_X    , 0                         , 4  , 265 ), // #436
+  INST(Bit_v            , ISimdVVV           , (0b0010111010100000000111, kVO_V_B)                                                   , kRWI_X    , 0                         , 5  , 2365), // #437
+  INST(Bsl_v            , ISimdVVV           , (0b0010111001100000000111, kVO_V_B)                                                   , kRWI_X    , 0                         , 6  , 280 ), // #438
+  INST(Cls_v            , ISimdVV            , (0b0000111000100000010010, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 1  , 412 ), // #439
+  INST(Clz_v            , ISimdVV            , (0b0010111000100000010010, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 2  , 416 ), // #440
+  INST(Cmeq_v           , SimdCmp            , (0b0010111000100000100011, 0b0000111000100000100110, kVO_V_Any)                       , kRWI_W    , 0                         , 0  , 663 ), // #441
+  INST(Cmge_v           , SimdCmp            , (0b0000111000100000001111, 0b0010111000100000100010, kVO_V_Any)                       , kRWI_W    , 0                         , 1  , 669 ), // #442
+  INST(Cmgt_v           , SimdCmp            , (0b0000111000100000001101, 0b0000111000100000100010, kVO_V_Any)                       , kRWI_W    , 0                         , 2  , 675 ), // #443
+  INST(Cmhi_v           , SimdCmp            , (0b0010111000100000001101, 0b0000000000000000000000, kVO_V_Any)                       , kRWI_W    , 0                         , 3  , 420 ), // #444
+  INST(Cmhs_v           , SimdCmp            , (0b0010111000100000001111, 0b0000000000000000000000, kVO_V_Any)                       , kRWI_W    , 0                         , 4  , 425 ), // #445
+  INST(Cmle_v           , SimdCmp            , (0b0000000000000000000000, 0b0010111000100000100110, kVO_V_Any)                       , kRWI_W    , 0                         , 5  , 687 ), // #446
+  INST(Cmlt_v           , SimdCmp            , (0b0000000000000000000000, 0b0000111000100000101010, kVO_V_Any)                       , kRWI_W    , 0                         , 6  , 693 ), // #447
+  INST(Cmtst_v          , ISimdVVV           , (0b0000111000100000100011, kVO_V_Any)                                                 , kRWI_W    , 0                         , 7  , 435 ), // #448
+  INST(Cnt_v            , ISimdVV            , (0b0000111000100000010110, kVO_V_B)                                                   , kRWI_W    , 0                         , 3  , 446 ), // #449
+  INST(Dup_v            , SimdDup            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 579 ), // #450
+  INST(Eor_v            , ISimdVVV           , (0b0010111000100000000111, kVO_V_B)                                                   , kRWI_W    , 0                         , 8  , 1418), // #451
+  INST(Eor3_v           , ISimdVVVV          , (0b1100111000000000000000, kVO_V_B16)                                                 , kRWI_W    , 0                         , 1  , 587 ), // #452
+  INST(Ext_v            , ISimdVVVI          , (0b0010111000000000000000, kVO_V_B, 4, 11, 1)                                         , kRWI_W    , 0                         , 0  , 601 ), // #453
+  INST(Fabd_v           , FSimdVVV           , (0b0111111010100000110101, kHF_C, 0b0010111010100000110101, kHF_C)                    , kRWI_W    , 0                         , 0  , 610 ), // #454
+  INST(Fabs_v           , FSimdVV            , (0b0001111000100000110000, kHF_A, 0b0000111010100000111110, kHF_B)                    , kRWI_W    , 0                         , 0  , 615 ), // #455
+  INST(Facge_v          , FSimdVVV           , (0b0111111000100000111011, kHF_C, 0b0010111000100000111011, kHF_C)                    , kRWI_W    , 0                         , 1  , 620 ), // #456
+  INST(Facgt_v          , FSimdVVV           , (0b0111111010100000111011, kHF_C, 0b0010111010100000111011, kHF_C)                    , kRWI_W    , 0                         , 2  , 626 ), // #457
+  INST(Fadd_v           , FSimdVVV           , (0b0001111000100000001010, kHF_A, 0b0000111000100000110101, kHF_C)                    , kRWI_W    , 0                         , 3  , 632 ), // #458
+  INST(Faddp_v          , FSimdPair          , (0b0111111000110000110110, 0b0010111000100000110101)                                  , kRWI_W    , 0                         , 0  , 637 ), // #459
+  INST(Fcadd_v          , SimdFcadd          , (0b0010111000000000111001)                                                            , kRWI_W    , 0                         , 0  , 643 ), // #460
+  INST(Fccmp_v          , SimdFccmpFccmpe    , (0b00011110001000000000010000000000)                                                  , kRWI_R    , 0                         , 0  , 649 ), // #461
+  INST(Fccmpe_v         , SimdFccmpFccmpe    , (0b00011110001000000000010000010000)                                                  , kRWI_R    , 0                         , 1  , 655 ), // #462
+  INST(Fcmeq_v          , SimdFcm            , (0b0000111000100000111001, kHF_C, 0b0000111010100000110110)                           , kRWI_W    , 0                         , 0  , 662 ), // #463
+  INST(Fcmge_v          , SimdFcm            , (0b0010111000100000111001, kHF_C, 0b0010111010100000110010)                           , kRWI_W    , 0                         , 1  , 668 ), // #464
+  INST(Fcmgt_v          , SimdFcm            , (0b0010111010100000111001, kHF_C, 0b0000111010100000110010)                           , kRWI_W    , 0                         , 2  , 674 ), // #465
+  INST(Fcmla_v          , SimdFcmla          , (0b0010111000000000110001, 0b0010111100000000000100)                                  , kRWI_X    , 0                         , 0  , 680 ), // #466
+  INST(Fcmle_v          , SimdFcm            , (0b0000000000000000000000, kHF_C, 0b0010111010100000110110)                           , kRWI_W    , 0                         , 3  , 686 ), // #467
+  INST(Fcmlt_v          , SimdFcm            , (0b0000000000000000000000, kHF_C, 0b0000111010100000111010)                           , kRWI_W    , 0                         , 4  , 692 ), // #468
+  INST(Fcmp_v           , SimdFcmpFcmpe      , (0b00011110001000000010000000000000)                                                  , kRWI_R    , 0                         , 0  , 698 ), // #469
+  INST(Fcmpe_v          , SimdFcmpFcmpe      , (0b00011110001000000010000000010000)                                                  , kRWI_R    , 0                         , 1  , 703 ), // #470
+  INST(Fcsel_v          , SimdFcsel          , (_)                                                                                   , kRWI_W    , 0                         , 0  , 709 ), // #471
+  INST(Fcvt_v           , SimdFcvt           , (_)                                                                                   , kRWI_W    , 0                         , 0  , 197 ), // #472
+  INST(Fcvtas_v         , SimdFcvtSV         , (0b0000111000100001110010, 0b0000000000000000000000, 0b0001111000100100000000, 1)     , kRWI_W    , 0                         , 0  , 715 ), // #473
+  INST(Fcvtau_v         , SimdFcvtSV         , (0b0010111000100001110010, 0b0000000000000000000000, 0b0001111000100101000000, 1)     , kRWI_W    , 0                         , 1  , 722 ), // #474
+  INST(Fcvtl_v          , SimdFcvtLN         , (0b0000111000100001011110, 0, 0)                                                      , kRWI_W    , F(Long)                   , 0  , 729 ), // #475
+  INST(Fcvtl2_v         , SimdFcvtLN         , (0b0100111000100001011110, 0, 0)                                                      , kRWI_W    , F(Long)                   , 1  , 735 ), // #476
+  INST(Fcvtms_v         , SimdFcvtSV         , (0b0000111000100001101110, 0b0000000000000000000000, 0b0001111000110000000000, 1)     , kRWI_W    , 0                         , 2  , 742 ), // #477
+  INST(Fcvtmu_v         , SimdFcvtSV         , (0b0010111000100001101110, 0b0000000000000000000000, 0b0001111000110001000000, 1)     , kRWI_W    , 0                         , 3  , 749 ), // #478
+  INST(Fcvtn_v          , SimdFcvtLN         , (0b0000111000100001011010, 0, 0)                                                      , kRWI_W    , F(Narrow)                 , 2  , 203 ), // #479
+  INST(Fcvtn2_v         , SimdFcvtLN         , (0b0100111000100001011010, 0, 0)                                                      , kRWI_X    , F(Narrow)                 , 3  , 210 ), // #480
+  INST(Fcvtns_v         , SimdFcvtSV         , (0b0000111000100001101010, 0b0000000000000000000000, 0b0001111000100000000000, 1)     , kRWI_W    , 0                         , 4  , 756 ), // #481
+  INST(Fcvtnu_v         , SimdFcvtSV         , (0b0010111000100001101010, 0b0000000000000000000000, 0b0001111000100001000000, 1)     , kRWI_W    , 0                         , 5  , 763 ), // #482
+  INST(Fcvtps_v         , SimdFcvtSV         , (0b0000111010100001101010, 0b0000000000000000000000, 0b0001111000101000000000, 1)     , kRWI_W    , 0                         , 6  , 770 ), // #483
+  INST(Fcvtpu_v         , SimdFcvtSV         , (0b0010111010100001101010, 0b0000000000000000000000, 0b0001111000101001000000, 1)     , kRWI_W    , 0                         , 7  , 777 ), // #484
+  INST(Fcvtxn_v         , SimdFcvtLN         , (0b0010111000100001011010, 1, 1)                                                      , kRWI_W    , F(Narrow)                 , 4  , 784 ), // #485
+  INST(Fcvtxn2_v        , SimdFcvtLN         , (0b0110111000100001011010, 1, 0)                                                      , kRWI_X    , F(Narrow)                 , 5  , 791 ), // #486
+  INST(Fcvtzs_v         , SimdFcvtSV         , (0b0000111010100001101110, 0b0000111100000000111111, 0b0001111000111000000000, 1)     , kRWI_W    , 0                         , 8  , 799 ), // #487
+  INST(Fcvtzu_v         , SimdFcvtSV         , (0b0010111010100001101110, 0b0010111100000000111111, 0b0001111000111001000000, 1)     , kRWI_W    , 0                         , 9  , 806 ), // #488
+  INST(Fdiv_v           , FSimdVVV           , (0b0001111000100000000110, kHF_A, 0b0010111000100000111111, kHF_C)                    , kRWI_W    , 0                         , 4  , 813 ), // #489
+  INST(Fjcvtzs_v        , ISimdVVx           , (0b0001111001111110000000, kOp_GpW, kOp_D)                                            , kRWI_W    , 0                         , 7  , 818 ), // #490
+  INST(Fmadd_v          , FSimdVVVV          , (0b0001111100000000000000, kHF_A, 0b0000000000000000000000, kHF_N)                    , kRWI_W    , 0                         , 0  , 826 ), // #491
+  INST(Fmax_v           , FSimdVVV           , (0b0001111000100000010010, kHF_A, 0b0000111000100000111101, kHF_C)                    , kRWI_W    , 0                         , 5  , 832 ), // #492
+  INST(Fmaxnm_v         , FSimdVVV           , (0b0001111000100000011010, kHF_A, 0b0000111000100000110001, kHF_C)                    , kRWI_W    , 0                         , 6  , 837 ), // #493
+  INST(Fmaxnmp_v        , FSimdPair          , (0b0111111000110000110010, 0b0010111000100000110001)                                  , kRWI_W    , 0                         , 1  , 844 ), // #494
+  INST(Fmaxnmv_v        , FSimdSV            , (0b0010111000110000110010)                                                            , kRWI_W    , 0                         , 0  , 852 ), // #495
+  INST(Fmaxp_v          , FSimdPair          , (0b0111111000110000111110, 0b0010111000100000111101)                                  , kRWI_W    , 0                         , 2  , 860 ), // #496
+  INST(Fmaxv_v          , FSimdSV            , (0b0010111000110000111110)                                                            , kRWI_W    , 0                         , 1  , 866 ), // #497
+  INST(Fmin_v           , FSimdVVV           , (0b0001111000100000010110, kHF_A, 0b0000111010100000111101, kHF_C)                    , kRWI_W    , 0                         , 7  , 872 ), // #498
+  INST(Fminnm_v         , FSimdVVV           , (0b0001111000100000011110, kHF_A, 0b0000111010100000110001, kHF_C)                    , kRWI_W    , 0                         , 8  , 877 ), // #499
+  INST(Fminnmp_v        , FSimdPair          , (0b0111111010110000110010, 0b0010111010100000110001)                                  , kRWI_W    , 0                         , 3  , 884 ), // #500
+  INST(Fminnmv_v        , FSimdSV            , (0b0010111010110000110010)                                                            , kRWI_W    , 0                         , 2  , 892 ), // #501
+  INST(Fminp_v          , FSimdPair          , (0b0111111010110000111110, 0b0010111010100000111101)                                  , kRWI_W    , 0                         , 4  , 900 ), // #502
+  INST(Fminv_v          , FSimdSV            , (0b0010111010110000111110)                                                            , kRWI_W    , 0                         , 3  , 906 ), // #503
+  INST(Fmla_v           , FSimdVVVe          , (0b0000000000000000000000, kHF_N, 0b0000111000100000110011, 0b0000111110000000000100) , kRWI_X    , F(VH0_15)                 , 0  , 912 ), // #504
+  INST(Fmlal_v          , SimdFmlal          , (0b0000111000100000111011, 0b0000111110000000000000, 1, kET_S, kET_H, kET_H)          , kRWI_X    , F(VH0_15)                 , 2  , 917 ), // #505
+  INST(Fmlal2_v         , SimdFmlal          , (0b0010111000100000110011, 0b0010111110000000100000, 1, kET_S, kET_H, kET_H)          , kRWI_X    , F(VH0_15)                 , 3  , 923 ), // #506
+  INST(Fmls_v           , FSimdVVVe          , (0b0000000000000000000000, kHF_N, 0b0000111010100000110011, 0b0000111110000000010100) , kRWI_X    , F(VH0_15)                 , 1  , 930 ), // #507
+  INST(Fmlsl_v          , SimdFmlal          , (0b0000111010100000111011, 0b0000111110000000010000, 1, kET_S, kET_H, kET_H)          , kRWI_X    , F(VH0_15)                 , 4  , 935 ), // #508
+  INST(Fmlsl2_v         , SimdFmlal          , (0b0010111010100000110011, 0b0010111110000000110000, 1, kET_S, kET_H, kET_H)          , kRWI_X    , F(VH0_15)                 , 5  , 941 ), // #509
+  INST(Fmov_v           , SimdFmov           , (_)                                                                                   , kRWI_W    , 0                         , 0  , 948 ), // #510
+  INST(Fmsub_v          , FSimdVVVV          , (0b0001111100000000100000, kHF_A, 0b0000000000000000000000, kHF_N)                    , kRWI_W    , 0                         , 1  , 953 ), // #511
+  INST(Fmul_v           , FSimdVVVe          , (0b0001111000100000000010, kHF_A, 0b0010111000100000110111, 0b0000111110000000100100) , kRWI_W    , F(VH0_15)                 , 2  , 959 ), // #512
+  INST(Fmulx_v          , FSimdVVVe          , (0b0101111000100000110111, kHF_C, 0b0000111000100000110111, 0b0010111110000000100100) , kRWI_W    , F(VH0_15)                 , 3  , 964 ), // #513
+  INST(Fneg_v           , FSimdVV            , (0b0001111000100001010000, kHF_A, 0b0010111010100000111110, kHF_B)                    , kRWI_W    , 0                         , 1  , 970 ), // #514
+  INST(Fnmadd_v         , FSimdVVVV          , (0b0001111100100000000000, kHF_A, 0b0000000000000000000000, kHF_N)                    , kRWI_W    , 0                         , 2  , 975 ), // #515
+  INST(Fnmsub_v         , FSimdVVVV          , (0b0001111100100000100000, kHF_A, 0b0000000000000000000000, kHF_N)                    , kRWI_W    , 0                         , 3  , 982 ), // #516
+  INST(Fnmul_v          , FSimdVVV           , (0b0001111000100000100010, kHF_A, 0b0000000000000000000000, kHF_N)                    , kRWI_W    , 0                         , 9  , 989 ), // #517
+  INST(Frecpe_v         , FSimdVV            , (0b0101111010100001110110, kHF_B, 0b0000111010100001110110, kHF_B)                    , kRWI_W    , 0                         , 2  , 995 ), // #518
+  INST(Frecps_v         , FSimdVVV           , (0b0101111000100000111111, kHF_C, 0b0000111000100000111111, kHF_C)                    , kRWI_W    , 0                         , 10 , 1002), // #519
+  INST(Frecpx_v         , FSimdVV            , (0b0101111010100001111110, kHF_B, 0b0000000000000000000000, kHF_N)                    , kRWI_W    , 0                         , 3  , 1009), // #520
+  INST(Frint32x_v       , FSimdVV            , (0b0001111000101000110000, kHF_N, 0b0010111000100001111010, kHF_N)                    , kRWI_W    , 0                         , 4  , 1016), // #521
+  INST(Frint32z_v       , FSimdVV            , (0b0001111000101000010000, kHF_N, 0b0000111000100001111010, kHF_N)                    , kRWI_W    , 0                         , 5  , 1025), // #522
+  INST(Frint64x_v       , FSimdVV            , (0b0001111000101001110000, kHF_N, 0b0010111000100001111110, kHF_N)                    , kRWI_W    , 0                         , 6  , 1034), // #523
+  INST(Frint64z_v       , FSimdVV            , (0b0001111000101001010000, kHF_N, 0b0000111000100001111110, kHF_N)                    , kRWI_W    , 0                         , 7  , 1043), // #524
+  INST(Frinta_v         , FSimdVV            , (0b0001111000100110010000, kHF_A, 0b0010111000100001100010, kHF_B)                    , kRWI_W    , 0                         , 8  , 1052), // #525
+  INST(Frinti_v         , FSimdVV            , (0b0001111000100111110000, kHF_A, 0b0010111010100001100110, kHF_B)                    , kRWI_W    , 0                         , 9  , 1059), // #526
+  INST(Frintm_v         , FSimdVV            , (0b0001111000100101010000, kHF_A, 0b0000111000100001100110, kHF_B)                    , kRWI_W    , 0                         , 10 , 1066), // #527
+  INST(Frintn_v         , FSimdVV            , (0b0001111000100100010000, kHF_A, 0b0000111000100001100010, kHF_B)                    , kRWI_W    , 0                         , 11 , 1073), // #528
+  INST(Frintp_v         , FSimdVV            , (0b0001111000100100110000, kHF_A, 0b0000111010100001100010, kHF_B)                    , kRWI_W    , 0                         , 12 , 1080), // #529
+  INST(Frintx_v         , FSimdVV            , (0b0001111000100111010000, kHF_A, 0b0010111000100001100110, kHF_B)                    , kRWI_W    , 0                         , 13 , 1087), // #530
+  INST(Frintz_v         , FSimdVV            , (0b0001111000100101110000, kHF_A, 0b0000111010100001100110, kHF_B)                    , kRWI_W    , 0                         , 14 , 1094), // #531
+  INST(Frsqrte_v        , FSimdVV            , (0b0111111010100001110110, kHF_B, 0b0010111010100001110110, kHF_B)                    , kRWI_W    , 0                         , 15 , 1101), // #532
+  INST(Frsqrts_v        , FSimdVVV           , (0b0101111010100000111111, kHF_C, 0b0000111010100000111111, kHF_C)                    , kRWI_W    , 0                         , 11 , 1109), // #533
+  INST(Fsqrt_v          , FSimdVV            , (0b0001111000100001110000, kHF_A, 0b0010111010100001111110, kHF_B)                    , kRWI_W    , 0                         , 16 , 1117), // #534
+  INST(Fsub_v           , FSimdVVV           , (0b0001111000100000001110, kHF_A, 0b0000111010100000110101, kHF_C)                    , kRWI_W    , 0                         , 12 , 1123), // #535
+  INST(Ins_v            , SimdIns            , (_)                                                                                   , kRWI_X    , 0                         , 0  , 1145), // #536
+  INST(Ld1_v            , SimdLdNStN         , (0b0000110101000000000000, 0b0000110001000000001000, 1, 0)                            , kRWI_LDn  , F(Consecutive)            , 0  , 1153), // #537
+  INST(Ld1r_v           , SimdLdNStN         , (0b0000110101000000110000, 0b0000000000000000000000, 1, 1)                            , kRWI_LDn  , F(Consecutive)            , 1  , 1157), // #538
+  INST(Ld2_v            , SimdLdNStN         , (0b0000110101100000000000, 0b0000110001000000100000, 2, 0)                            , kRWI_LDn  , F(Consecutive)            , 2  , 1162), // #539
+  INST(Ld2r_v           , SimdLdNStN         , (0b0000110101100000110000, 0b0000000000000000000000, 2, 1)                            , kRWI_LDn  , F(Consecutive)            , 3  , 1166), // #540
+  INST(Ld3_v            , SimdLdNStN         , (0b0000110101000000001000, 0b0000110001000000010000, 3, 0)                            , kRWI_LDn  , F(Consecutive)            , 4  , 1171), // #541
+  INST(Ld3r_v           , SimdLdNStN         , (0b0000110101000000111000, 0b0000000000000000000000, 3, 1)                            , kRWI_LDn  , F(Consecutive)            , 5  , 1175), // #542
+  INST(Ld4_v            , SimdLdNStN         , (0b0000110101100000001000, 0b0000110001000000000000, 4, 0)                            , kRWI_LDn  , F(Consecutive)            , 6  , 1180), // #543
+  INST(Ld4r_v           , SimdLdNStN         , (0b0000110101100000111000, 0b0000000000000000000000, 4, 1)                            , kRWI_LDn  , F(Consecutive)            , 7  , 1184), // #544
+  INST(Ldnp_v           , SimdLdpStp         , (0b0010110001, 0b0000000000)                                                          , kRWI_WW   , 0                         , 0  , 1537), // #545
+  INST(Ldp_v            , SimdLdpStp         , (0b0010110101, 0b0010110011)                                                          , kRWI_WW   , 0                         , 1  , 1542), // #546
+  INST(Ldr_v            , SimdLdSt           , (0b0011110101, 0b00111100010, 0b00111100011, 0b00011100, Inst::kIdLdur_v)             , kRWI_W    , 0                         , 0  , 1552), // #547
+  INST(Ldur_v           , SimdLdurStur       , (0b0011110001000000000000)                                                            , kRWI_W    , 0                         , 0  , 2142), // #548
+  INST(Mla_v            , ISimdVVVe          , (0b0000111000100000100101, kVO_V_BHS, 0b0010111100000000000000, kVO_V_HS)             , kRWI_X    , F(VH0_15)                 , 0  , 246 ), // #549
+  INST(Mls_v            , ISimdVVVe          , (0b0010111000100000100101, kVO_V_BHS, 0b0010111100000000010000, kVO_V_HS)             , kRWI_X    , F(VH0_15)                 , 1  , 931 ), // #550
+  INST(Mov_v            , SimdMov            , (_)                                                                                   , kRWI_W    , 0                         , 0  , 949 ), // #551
+  INST(Movi_v           , SimdMoviMvni       , (0b0000111100000000000001, 0)                                                         , kRWI_W    , 0                         , 0  , 2221), // #552
+  INST(Mul_v            , ISimdVVVe          , (0b0000111000100000100111, kVO_V_BHS, 0b0000111100000000100000, kVO_V_HS)             , kRWI_W    , F(VH0_15)                 , 2  , 991 ), // #553
+  INST(Mvn_v            , ISimdVV            , (0b0010111000100000010110, kVO_V_B)                                                   , kRWI_W    , 0                         , 4  , 2249), // #554
+  INST(Mvni_v           , SimdMoviMvni       , (0b0000111100000000000001, 1)                                                         , kRWI_W    , 0                         , 1  , 2253), // #555
+  INST(Neg_v            , ISimdVV            , (0b0010111000100000101110, kVO_V_Any)                                                 , kRWI_W    , 0                         , 5  , 540 ), // #556
+  INST(Not_v            , ISimdVV            , (0b0010111000100000010110, kVO_V_B)                                                   , kRWI_W    , 0                         , 6  , 2276), // #557
+  INST(Orn_v            , ISimdVVV           , (0b0000111011100000000111, kVO_V_B)                                                   , kRWI_W    , 0                         , 9  , 2280), // #558
+  INST(Orr_v            , SimdBicOrr         , (0b0000111010100000000111, 0b0000111100000000000001)                                  , kRWI_W    , 0                         , 1  , 2284), // #559
+  INST(Pmul_v           , ISimdVVV           , (0b0010111000100000100111, kVO_V_B)                                                   , kRWI_W    , 0                         , 10 , 2320), // #560
+  INST(Pmull_v          , ISimdVVV           , (0b0000111000100000111000, kVO_V_B8D1)                                                , kRWI_W    , F(Long)                   , 11 , 2325), // #561
+  INST(Pmull2_v         , ISimdVVV           , (0b0100111000100000111000, kVO_V_B16D2)                                               , kRWI_W    , F(Long)                   , 12 , 2331), // #562
+  INST(Raddhn_v         , ISimdVVV           , (0b0010111000100000010000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Narrow)                 , 13 , 2344), // #563
+  INST(Raddhn2_v        , ISimdVVV           , (0b0110111000100000010000, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 14 , 2351), // #564
+  INST(Rax1_v           , ISimdVVV           , (0b1100111001100000100011, kVO_V_D2)                                                  , kRWI_W    , 0                         , 15 , 2359), // #565
+  INST(Rbit_v           , ISimdVV            , (0b0010111001100000010110, kVO_V_B)                                                   , kRWI_W    , 0                         , 7  , 2364), // #566
+  INST(Rev16_v          , ISimdVV            , (0b0000111000100000000110, kVO_V_B)                                                   , kRWI_W    , 0                         , 8  , 2373), // #567
+  INST(Rev32_v          , ISimdVV            , (0b0010111000100000000010, kVO_V_BH)                                                  , kRWI_W    , 0                         , 9  , 2379), // #568
+  INST(Rev64_v          , ISimdVV            , (0b0000111000100000000010, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 10 , 2385), // #569
+  INST(Rshrn_v          , SimdShift          , (0b0000000000000000000000, 0b0000111100000000100011, 1, kVO_V_B8H4S2)                 , kRWI_W    , F(Narrow)                 , 0  , 2960), // #570
+  INST(Rshrn2_v         , SimdShift          , (0b0000000000000000000000, 0b0100111100000000100011, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 1  , 2968), // #571
+  INST(Rsubhn_v         , ISimdVVV           , (0b0010111000100000011000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Narrow)                 , 16 , 2400), // #572
+  INST(Rsubhn2_v        , ISimdVVV           , (0b0110111000100000011000, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 17 , 2407), // #573
+  INST(Saba_v           , ISimdVVV           , (0b0000111000100000011111, kVO_V_BHS)                                                 , kRWI_X    , 0                         , 18 , 2415), // #574
+  INST(Sabal_v          , ISimdVVV           , (0b0000111000100000010100, kVO_V_B8H4S2)                                              , kRWI_X    , F(Long)                   , 19 , 2420), // #575
+  INST(Sabal2_v         , ISimdVVV           , (0b0100111000100000010100, kVO_V_B16H8S4)                                             , kRWI_X    , F(Long)                   , 20 , 2426), // #576
+  INST(Sabd_v           , ISimdVVV           , (0b0000111000100000011101, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 21 , 2433), // #577
+  INST(Sabdl_v          , ISimdVVV           , (0b0000111000100000011100, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 22 , 2438), // #578
+  INST(Sabdl2_v         , ISimdVVV           , (0b0100111000100000011100, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 23 , 2444), // #579
+  INST(Sadalp_v         , ISimdVV            , (0b0000111000100000011010, kVO_V_BHS)                                                 , kRWI_X    , F(Long) | F(Pair)         , 11 , 2451), // #580
+  INST(Saddl_v          , ISimdVVV           , (0b0000111000100000000000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 24 , 2458), // #581
+  INST(Saddl2_v         , ISimdVVV           , (0b0100111000100000000000, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 25 , 2464), // #582
+  INST(Saddlp_v         , ISimdVV            , (0b0000111000100000001010, kVO_V_BHS)                                                 , kRWI_W    , F(Long) | F(Pair)         , 12 , 2471), // #583
+  INST(Saddlv_v         , ISimdSV            , (0b0000111000110000001110, kVO_V_BH_4S)                                               , kRWI_W    , F(Long)                   , 1  , 2478), // #584
+  INST(Saddw_v          , ISimdWWV           , (0b0000111000100000000100, kVO_V_B8H4S2)                                              , kRWI_W    , 0                         , 0  , 2485), // #585
+  INST(Saddw2_v         , ISimdWWV           , (0b0000111000100000000100, kVO_V_B16H8S4)                                             , kRWI_W    , 0                         , 1  , 2491), // #586
+  INST(Scvtf_v          , SimdFcvtSV         , (0b0000111000100001110110, 0b0000111100000000111001, 0b0001111000100010000000, 0)     , kRWI_W    , 0                         , 10 , 2523), // #587
+  INST(Sdot_v           , SimdDot            , (0b0000111010000000100101, 0b0000111110000000111000, kET_S, kET_B, kET_4B)            , kRWI_X    , 0                         , 1  , 4218), // #588
+  INST(Sha1c_v          , ISimdVVVx          , (0b0101111000000000000000, kOp_Q, kOp_S, kOp_V4S)                                     , kRWI_X    , 0                         , 1  , 2556), // #589
+  INST(Sha1h_v          , ISimdVVx           , (0b0101111000101000000010, kOp_S, kOp_S)                                              , kRWI_W    , 0                         , 8  , 2562), // #590
+  INST(Sha1m_v          , ISimdVVVx          , (0b0101111000000000001000, kOp_Q, kOp_S, kOp_V4S)                                     , kRWI_X    , 0                         , 2  , 2568), // #591
+  INST(Sha1p_v          , ISimdVVVx          , (0b0101111000000000000100, kOp_Q, kOp_S, kOp_V4S)                                     , kRWI_X    , 0                         , 3  , 2574), // #592
+  INST(Sha1su0_v        , ISimdVVVx          , (0b0101111000000000001100, kOp_V4S, kOp_V4S, kOp_V4S)                                 , kRWI_X    , 0                         , 4  , 2580), // #593
+  INST(Sha1su1_v        , ISimdVVx           , (0b0101111000101000000110, kOp_V4S, kOp_V4S)                                          , kRWI_X    , 0                         , 9  , 2588), // #594
+  INST(Sha256h_v        , ISimdVVVx          , (0b0101111000000000010000, kOp_Q, kOp_Q, kOp_V4S)                                     , kRWI_X    , 0                         , 5  , 2596), // #595
+  INST(Sha256h2_v       , ISimdVVVx          , (0b0101111000000000010100, kOp_Q, kOp_Q, kOp_V4S)                                     , kRWI_X    , 0                         , 6  , 2604), // #596
+  INST(Sha256su0_v      , ISimdVVx           , (0b0101111000101000001010, kOp_V4S, kOp_V4S)                                          , kRWI_X    , 0                         , 10 , 2613), // #597
+  INST(Sha256su1_v      , ISimdVVVx          , (0b0101111000000000011000, kOp_V4S, kOp_V4S, kOp_V4S)                                 , kRWI_X    , 0                         , 7  , 2623), // #598
+  INST(Sha512h_v        , ISimdVVVx          , (0b1100111001100000100000, kOp_Q, kOp_Q, kOp_V2D)                                     , kRWI_X    , 0                         , 8  , 2633), // #599
+  INST(Sha512h2_v       , ISimdVVVx          , (0b1100111001100000100001, kOp_Q, kOp_Q, kOp_V2D)                                     , kRWI_X    , 0                         , 9  , 2641), // #600
+  INST(Sha512su0_v      , ISimdVVx           , (0b1100111011000000100000, kOp_V2D, kOp_V2D)                                          , kRWI_X    , 0                         , 11 , 2650), // #601
+  INST(Sha512su1_v      , ISimdVVVx          , (0b1100111001100000100010, kOp_V2D, kOp_V2D, kOp_V2D)                                 , kRWI_X    , 0                         , 10 , 2660), // #602
+  INST(Shadd_v          , ISimdVVV           , (0b0000111000100000000001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 26 , 2670), // #603
+  INST(Shl_v            , SimdShift          , (0b0000000000000000000000, 0b0000111100000000010101, 0, kVO_V_Any)                    , kRWI_W    , 0                         , 2  , 2954), // #604
+  INST(Shll_v           , SimdShiftES        , (0b0010111000100001001110, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 0  , 3108), // #605
+  INST(Shll2_v          , SimdShiftES        , (0b0110111000100001001110, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 1  , 3114), // #606
+  INST(Shrn_v           , SimdShift          , (0b0000000000000000000000, 0b0000111100000000100001, 1, kVO_V_B8H4S2)                 , kRWI_W    , F(Narrow)                 , 3  , 2961), // #607
+  INST(Shrn2_v          , SimdShift          , (0b0000000000000000000000, 0b0100111100000000100001, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 4  , 2969), // #608
+  INST(Shsub_v          , ISimdVVV           , (0b0000111000100000001001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 27 , 2676), // #609
+  INST(Sli_v            , SimdShift          , (0b0000000000000000000000, 0b0010111100000000010101, 0, kVO_V_Any)                    , kRWI_X    , 0                         , 5  , 2682), // #610
+  INST(Sm3partw1_v      , ISimdVVVx          , (0b1100111001100000110000, kOp_V4S, kOp_V4S, kOp_V4S)                                 , kRWI_X    , 0                         , 11 , 2686), // #611
+  INST(Sm3partw2_v      , ISimdVVVx          , (0b1100111001100000110001, kOp_V4S, kOp_V4S, kOp_V4S)                                 , kRWI_X    , 0                         , 12 , 2696), // #612
+  INST(Sm3ss1_v         , ISimdVVVVx         , (0b1100111001000000000000, kOp_V4S, kOp_V4S, kOp_V4S, kOp_V4S)                        , kRWI_W    , 0                         , 0  , 2706), // #613
+  INST(Sm3tt1a_v        , SimdSm3tt          , (0b1100111001000000100000)                                                            , kRWI_X    , 0                         , 0  , 2713), // #614
+  INST(Sm3tt1b_v        , SimdSm3tt          , (0b1100111001000000100001)                                                            , kRWI_X    , 0                         , 1  , 2721), // #615
+  INST(Sm3tt2a_v        , SimdSm3tt          , (0b1100111001000000100010)                                                            , kRWI_X    , 0                         , 2  , 2729), // #616
+  INST(Sm3tt2b_v        , SimdSm3tt          , (0b1100111001000000100011)                                                            , kRWI_X    , 0                         , 3  , 2737), // #617
+  INST(Sm4e_v           , ISimdVVx           , (0b1100111011000000100001, kOp_V4S, kOp_V4S)                                          , kRWI_X    , 0                         , 12 , 2745), // #618
+  INST(Sm4ekey_v        , ISimdVVVx          , (0b1100111001100000110010, kOp_V4S, kOp_V4S, kOp_V4S)                                 , kRWI_X    , 0                         , 13 , 2750), // #619
+  INST(Smax_v           , ISimdVVV           , (0b0000111000100000011001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 28 , 1690), // #620
+  INST(Smaxp_v          , ISimdVVV           , (0b0000111000100000101001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 29 , 2765), // #621
+  INST(Smaxv_v          , ISimdSV            , (0b0000111000110000101010, kVO_V_BH_4S)                                               , kRWI_W    , 0                         , 2  , 2771), // #622
+  INST(Smin_v           , ISimdVVV           , (0b0000111000100000011011, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 30 , 1794), // #623
+  INST(Sminp_v          , ISimdVVV           , (0b0000111000100000101011, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 31 , 2777), // #624
+  INST(Sminv_v          , ISimdSV            , (0b0000111000110001101010, kVO_V_BH_4S)                                               , kRWI_W    , 0                         , 3  , 2783), // #625
+  INST(Smlal_v          , ISimdVVVe          , (0b0000111000100000100000, kVO_V_B8H4S2, 0b0000111100000000001000, kVO_V_H4S2)        , kRWI_X    , F(Long) | F(VH0_15)       , 3  , 2789), // #626
+  INST(Smlal2_v         , ISimdVVVe          , (0b0100111000100000100000, kVO_V_B16H8S4, 0b0100111100000000001000, kVO_V_H8S4)       , kRWI_X    , F(Long) | F(VH0_15)       , 4  , 2795), // #627
+  INST(Smlsl_v          , ISimdVVVe          , (0b0000111000100000101000, kVO_V_B8H4S2, 0b0000111100000000011000, kVO_V_H4S2)        , kRWI_X    , F(Long) | F(VH0_15)       , 5  , 2802), // #628
+  INST(Smlsl2_v         , ISimdVVVe          , (0b0100111000100000101000, kVO_V_B16H8S4, 0b0100111100000000011000, kVO_V_H8S4)       , kRWI_X    , F(Long) | F(VH0_15)       , 6  , 2808), // #629
+  INST(Smmla_v          , ISimdVVVx          , (0b0100111010000000101001, kOp_V4S, kOp_V16B, kOp_V16B)                               , kRWI_X    , 0                         , 14 , 4247), // #630
+  INST(Smov_v           , SimdSmovUmov       , (0b0000111000000000001011, kVO_V_BHS, 1)                                              , kRWI_W    , 0                         , 0  , 2822), // #631
+  INST(Smull_v          , ISimdVVVe          , (0b0000111000100000110000, kVO_V_B8H4S2, 0b0000111100000000101000, kVO_V_H4S2)        , kRWI_W    , F(Long) | F(VH0_15)       , 7  , 2840), // #632
+  INST(Smull2_v         , ISimdVVVe          , (0b0100111000100000110000, kVO_V_B16H8S4, 0b0100111100000000101000, kVO_V_H8S4)       , kRWI_W    , F(Long) | F(VH0_15)       , 8  , 2846), // #633
+  INST(Sqabs_v          , ISimdVV            , (0b0000111000100000011110, kVO_SV_Any)                                                , kRWI_W    , 0                         , 13 , 2853), // #634
+  INST(Sqadd_v          , ISimdVVV           , (0b0000111000100000000011, kVO_SV_Any)                                                , kRWI_W    , 0                         , 32 , 4254), // #635
+  INST(Sqdmlal_v        , ISimdVVVe          , (0b0000111000100000100100, kVO_SV_BHS, 0b0000111100000000001100, kVO_V_H4S2)          , kRWI_X    , F(Long) | F(VH0_15)       , 9  , 2859), // #636
+  INST(Sqdmlal2_v       , ISimdVVVe          , (0b0100111000100000100100, kVO_V_B16H8S4, 0b0100111100000000001100, kVO_V_H8S4)       , kRWI_X    , F(Long) | F(VH0_15)       , 10 , 2867), // #637
+  INST(Sqdmlsl_v        , ISimdVVVe          , (0b0000111000100000101100, kVO_SV_BHS, 0b0000111100000000011100, kVO_V_H4S2)          , kRWI_X    , F(Long) | F(VH0_15)       , 11 , 2876), // #638
+  INST(Sqdmlsl2_v       , ISimdVVVe          , (0b0100111000100000101100, kVO_V_B16H8S4, 0b0100111100000000011100, kVO_V_H8S4)       , kRWI_X    , F(Long) | F(VH0_15)       , 12 , 2884), // #639
+  INST(Sqdmulh_v        , ISimdVVVe          , (0b0000111000100000101101, kVO_SV_HS, 0b0000111100000000110000, kVO_SV_HS)            , kRWI_W    , F(VH0_15)                 , 13 , 2893), // #640
+  INST(Sqdmull_v        , ISimdVVVe          , (0b0000111000100000110100, kVO_SV_BHS, 0b0000111100000000101100, kVO_V_H4S2)          , kRWI_W    , F(Long) | F(VH0_15)       , 14 , 2901), // #641
+  INST(Sqdmull2_v       , ISimdVVVe          , (0b0100111000100000110100, kVO_V_B16H8S4, 0b0100111100000000101100, kVO_V_H8S4)       , kRWI_W    , F(Long) | F(VH0_15)       , 15 , 2909), // #642
+  INST(Sqneg_v          , ISimdVV            , (0b0010111000100000011110, kVO_SV_Any)                                                , kRWI_W    , 0                         , 14 , 2918), // #643
+  INST(Sqrdmlah_v       , ISimdVVVe          , (0b0010111000000000100001, kVO_SV_HS, 0b0010111100000000110100, kVO_SV_HS)            , kRWI_X    , F(VH0_15)                 , 16 , 2924), // #644
+  INST(Sqrdmlsh_v       , ISimdVVVe          , (0b0010111000000000100011, kVO_SV_HS, 0b0010111100000000111100, kVO_SV_HS)            , kRWI_X    , F(VH0_15)                 , 17 , 2933), // #645
+  INST(Sqrdmulh_v       , ISimdVVVe          , (0b0010111000100000101101, kVO_SV_HS, 0b0000111100000000110100, kVO_SV_HS)            , kRWI_W    , F(VH0_15)                 , 18 , 2942), // #646
+  INST(Sqrshl_v         , SimdShift          , (0b0000111000100000010111, 0b0000000000000000000000, 1, kVO_SV_Any)                   , kRWI_W    , 0                         , 6  , 2951), // #647
+  INST(Sqrshrn_v        , SimdShift          , (0b0000000000000000000000, 0b0000111100000000100111, 1, kVO_SV_B8H4S2)                , kRWI_W    , F(Narrow)                 , 7  , 2958), // #648
+  INST(Sqrshrn2_v       , SimdShift          , (0b0000000000000000000000, 0b0100111100000000100111, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 8  , 2966), // #649
+  INST(Sqrshrun_v       , SimdShift          , (0b0000000000000000000000, 0b0010111100000000100011, 1, kVO_SV_B8H4S2)                , kRWI_W    , F(Narrow)                 , 9  , 2975), // #650
+  INST(Sqrshrun2_v      , SimdShift          , (0b0000000000000000000000, 0b0110111100000000100011, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 10 , 2984), // #651
+  INST(Sqshl_v          , SimdShift          , (0b0000111000100000010011, 0b0000111100000000011101, 0, kVO_SV_Any)                   , kRWI_W    , 0                         , 11 , 2994), // #652
+  INST(Sqshlu_v         , SimdShift          , (0b0000000000000000000000, 0b0010111100000000011001, 0, kVO_SV_Any)                   , kRWI_W    , 0                         , 12 , 3000), // #653
+  INST(Sqshrn_v         , SimdShift          , (0b0000000000000000000000, 0b0000111100000000100101, 1, kVO_SV_B8H4S2)                , kRWI_W    , F(Narrow)                 , 13 , 3007), // #654
+  INST(Sqshrn2_v        , SimdShift          , (0b0000000000000000000000, 0b0100111100000000100101, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 14 , 3014), // #655
+  INST(Sqshrun_v        , SimdShift          , (0b0000000000000000000000, 0b0010111100000000100001, 1, kVO_SV_B8H4S2)                , kRWI_W    , F(Narrow)                 , 15 , 3022), // #656
+  INST(Sqshrun2_v       , SimdShift          , (0b0000000000000000000000, 0b0110111100000000100001, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 16 , 3030), // #657
+  INST(Sqsub_v          , ISimdVVV           , (0b0000111000100000001011, kVO_SV_Any)                                                , kRWI_W    , 0                         , 33 , 3039), // #658
+  INST(Sqxtn_v          , ISimdVV            , (0b0000111000100001010010, kVO_SV_B8H4S2)                                             , kRWI_W    , F(Narrow)                 , 15 , 3045), // #659
+  INST(Sqxtn2_v         , ISimdVV            , (0b0100111000100001010010, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 16 , 3051), // #660
+  INST(Sqxtun_v         , ISimdVV            , (0b0010111000100001001010, kVO_SV_B8H4S2)                                             , kRWI_W    , F(Narrow)                 , 17 , 3058), // #661
+  INST(Sqxtun2_v        , ISimdVV            , (0b0110111000100001001010, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 18 , 3065), // #662
+  INST(Srhadd_v         , ISimdVVV           , (0b0000111000100000000101, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 34 , 3073), // #663
+  INST(Sri_v            , SimdShift          , (0b0000000000000000000000, 0b0010111100000000010001, 1, kVO_V_Any)                    , kRWI_W    , 0                         , 17 , 3080), // #664
+  INST(Srshl_v          , SimdShift          , (0b0000111000100000010101, 0b0000000000000000000000, 0, kVO_V_Any)                    , kRWI_W    , 0                         , 18 , 3084), // #665
+  INST(Srshr_v          , SimdShift          , (0b0000000000000000000000, 0b0000111100000000001001, 1, kVO_V_Any)                    , kRWI_W    , 0                         , 19 , 3090), // #666
+  INST(Srsra_v          , SimdShift          , (0b0000000000000000000000, 0b0000111100000000001101, 1, kVO_V_Any)                    , kRWI_X    , 0                         , 20 , 3096), // #667
+  INST(Sshl_v           , SimdShift          , (0b0000111000100000010001, 0b0000000000000000000000, 0, kVO_V_Any)                    , kRWI_W    , 0                         , 21 , 3102), // #668
+  INST(Sshll_v          , SimdShift          , (0b0000000000000000000000, 0b0000111100000000101001, 0, kVO_V_B8H4S2)                 , kRWI_W    , F(Long)                   , 22 , 3107), // #669
+  INST(Sshll2_v         , SimdShift          , (0b0000000000000000000000, 0b0100111100000000101001, 0, kVO_V_B16H8S4)                , kRWI_W    , F(Long)                   , 23 , 3113), // #670
+  INST(Sshr_v           , SimdShift          , (0b0000000000000000000000, 0b0000111100000000000001, 1, kVO_V_Any)                    , kRWI_W    , 0                         , 24 , 3120), // #671
+  INST(Ssra_v           , SimdShift          , (0b0000000000000000000000, 0b0000111100000000000101, 1, kVO_V_Any)                    , kRWI_X    , 0                         , 25 , 3125), // #672
+  INST(Ssubl_v          , ISimdVVV           , (0b0000111000100000001000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 35 , 3130), // #673
+  INST(Ssubl2_v         , ISimdVVV           , (0b0100111000100000001000, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 36 , 3136), // #674
+  INST(Ssubw_v          , ISimdWWV           , (0b0000111000100000001100, kVO_V_B8H4S2)                                              , kRWI_W    , 0                         , 2  , 3143), // #675
+  INST(Ssubw2_v         , ISimdWWV           , (0b0000111000100000001100, kVO_V_B16H8S4)                                             , kRWI_X    , 0                         , 3  , 3149), // #676
+  INST(St1_v            , SimdLdNStN         , (0b0000110100000000000000, 0b0000110000000000001000, 1, 0)                            , kRWI_STn  , F(Consecutive)            , 8  , 3156), // #677
+  INST(St2_v            , SimdLdNStN         , (0b0000110100100000000000, 0b0000110000000000100000, 2, 0)                            , kRWI_STn  , F(Consecutive)            , 9  , 3160), // #678
+  INST(St3_v            , SimdLdNStN         , (0b0000110100000000001000, 0b0000110000000000010000, 3, 0)                            , kRWI_STn  , F(Consecutive)            , 10 , 3169), // #679
+  INST(St4_v            , SimdLdNStN         , (0b0000110100100000001000, 0b0000110000000000000000, 4, 0)                            , kRWI_STn  , F(Consecutive)            , 11 , 3173), // #680
+  INST(Stnp_v           , SimdLdpStp         , (0b0010110000, 0b0000000000)                                                          , kRWI_RRW  , 0                         , 2  , 3383), // #681
+  INST(Stp_v            , SimdLdpStp         , (0b0010110100, 0b0010110010)                                                          , kRWI_RRW  , 0                         , 3  , 3388), // #682
+  INST(Str_v            , SimdLdSt           , (0b0011110100, 0b00111100000, 0b00111100001, 0b00000000, Inst::kIdStur_v)             , kRWI_RW   , 0                         , 1  , 3392), // #683
+  INST(Stur_v           , SimdLdurStur       , (0b0011110000000000000000)                                                            , kRWI_RW   , 0                         , 1  , 3662), // #684
+  INST(Sub_v            , ISimdVVV           , (0b0010111000100000100001, kVO_V_Any)                                                 , kRWI_W    , 0                         , 37 , 985 ), // #685
+  INST(Subhn_v          , ISimdVVV           , (0b0000111000100000011000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Narrow)                 , 38 , 2401), // #686
+  INST(Subhn2_v         , ISimdVVV           , (0b0000111000100000011000, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 39 , 2408), // #687
+  INST(Sudot_v          , SimdDot            , (0b0000000000000000000000, 0b0000111100000000111100, kET_S, kET_B, kET_4B)            , kRWI_X    , 0                         , 2  , 3739), // #688
+  INST(Suqadd_v         , ISimdVV            , (0b0000111000100000001110, kVO_SV_Any)                                                , kRWI_X    , 0                         , 19 , 3745), // #689
+  INST(Sxtl_v           , SimdSxtlUxtl       , (0b0000111100000000101001, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 0  , 3834), // #690
+  INST(Sxtl2_v          , SimdSxtlUxtl       , (0b0100111100000000101001, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 1  , 3839), // #691
+  INST(Tbl_v            , SimdTblTbx         , (0b0000111000000000000000)                                                            , kRWI_W    , 0                         , 0  , 3854), // #692
+  INST(Tbx_v            , SimdTblTbx         , (0b0000111000000000000100)                                                            , kRWI_W    , 0                         , 1  , 3863), // #693
+  INST(Trn1_v           , ISimdVVV           , (0b0000111000000000001010, kVO_V_BHS_D2)                                              , kRWI_W    , 0                         , 40 , 3876), // #694
+  INST(Trn2_v           , ISimdVVV           , (0b0000111000000000011010, kVO_V_BHS_D2)                                              , kRWI_W    , 0                         , 41 , 3881), // #695
+  INST(Uaba_v           , ISimdVVV           , (0b0010111000100000011111, kVO_V_BHS)                                                 , kRWI_X    , 0                         , 42 , 3886), // #696
+  INST(Uabal_v          , ISimdVVV           , (0b0010111000100000010100, kVO_V_B8H4S2)                                              , kRWI_X    , F(Long)                   , 43 , 3891), // #697
+  INST(Uabal2_v         , ISimdVVV           , (0b0110111000100000010100, kVO_V_B16H8S4)                                             , kRWI_X    , F(Long)                   , 44 , 3897), // #698
+  INST(Uabd_v           , ISimdVVV           , (0b0010111000100000011101, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 45 , 3904), // #699
+  INST(Uabdl_v          , ISimdVVV           , (0b0010111000100000011100, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 46 , 3909), // #700
+  INST(Uabdl2_v         , ISimdVVV           , (0b0110111000100000011100, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 47 , 3915), // #701
+  INST(Uadalp_v         , ISimdVV            , (0b0010111000100000011010, kVO_V_BHS)                                                 , kRWI_X    , F(Long) | F(Pair)         , 20 , 3922), // #702
+  INST(Uaddl_v          , ISimdVVV           , (0b0010111000100000000000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 48 , 3929), // #703
+  INST(Uaddl2_v         , ISimdVVV           , (0b0110111000100000000000, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 49 , 3935), // #704
+  INST(Uaddlp_v         , ISimdVV            , (0b0010111000100000001010, kVO_V_BHS)                                                 , kRWI_W    , F(Long) | F(Pair)         , 21 , 3942), // #705
+  INST(Uaddlv_v         , ISimdSV            , (0b0010111000110000001110, kVO_V_BH_4S)                                               , kRWI_W    , F(Long)                   , 4  , 3949), // #706
+  INST(Uaddw_v          , ISimdWWV           , (0b0010111000100000000100, kVO_V_B8H4S2)                                              , kRWI_W    , 0                         , 4  , 3956), // #707
+  INST(Uaddw2_v         , ISimdWWV           , (0b0010111000100000000100, kVO_V_B16H8S4)                                             , kRWI_W    , 0                         , 5  , 3962), // #708
+  INST(Ucvtf_v          , SimdFcvtSV         , (0b0010111000100001110110, 0b0010111100000000111001, 0b0001111000100011000000, 0)     , kRWI_W    , 0                         , 11 , 3985), // #709
+  INST(Udot_v           , SimdDot            , (0b0010111010000000100101, 0b0010111110000000111000, kET_S, kET_B, kET_4B)            , kRWI_X    , 0                         , 3  , 3740), // #710
+  INST(Uhadd_v          , ISimdVVV           , (0b0010111000100000000001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 50 , 4000), // #711
+  INST(Uhsub_v          , ISimdVVV           , (0b0010111000100000001001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 51 , 4006), // #712
+  INST(Umax_v           , ISimdVVV           , (0b0010111000100000011001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 52 , 1936), // #713
+  INST(Umaxp_v          , ISimdVVV           , (0b0010111000100000101001, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 53 , 4019), // #714
+  INST(Umaxv_v          , ISimdSV            , (0b0010111000110000101010, kVO_V_BH_4S)                                               , kRWI_W    , 0                         , 5  , 4025), // #715
+  INST(Umin_v           , ISimdVVV           , (0b0010111000100000011011, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 54 , 2040), // #716
+  INST(Uminp_v          , ISimdVVV           , (0b0010111000100000101011, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 55 , 4031), // #717
+  INST(Uminv_v          , ISimdSV            , (0b0010111000110001101010, kVO_V_BH_4S)                                               , kRWI_W    , 0                         , 6  , 4037), // #718
+  INST(Umlal_v          , ISimdVVVe          , (0b0010111000100000100000, kVO_V_B8H4S2, 0b0010111100000000001000, kVO_V_H4S2)        , kRWI_X    , F(Long) | F(VH0_15)       , 19 , 4043), // #719
+  INST(Umlal2_v         , ISimdVVVe          , (0b0110111000100000100000, kVO_V_B16H8S4, 0b0010111100000000001000, kVO_V_H8S4)       , kRWI_X    , F(Long) | F(VH0_15)       , 20 , 4049), // #720
+  INST(Umlsl_v          , ISimdVVVe          , (0b0010111000100000101000, kVO_V_B8H4S2, 0b0010111100000000011000, kVO_V_H4S2)        , kRWI_X    , F(Long) | F(VH0_15)       , 21 , 4056), // #721
+  INST(Umlsl2_v         , ISimdVVVe          , (0b0110111000100000101000, kVO_V_B16H8S4, 0b0110111100000000011000, kVO_V_H8S4)       , kRWI_X    , F(Long) | F(VH0_15)       , 22 , 4062), // #722
+  INST(Ummla_v          , ISimdVVVx          , (0b0110111010000000101001, kOp_V4S, kOp_V16B, kOp_V16B)                               , kRWI_X    , 0                         , 15 , 4069), // #723
+  INST(Umov_v           , SimdSmovUmov       , (0b0000111000000000001111, kVO_V_Any, 0)                                              , kRWI_W    , 0                         , 1  , 4082), // #724
+  INST(Umull_v          , ISimdVVVe          , (0b0010111000100000110000, kVO_V_B8H4S2, 0b0010111100000000101000, kVO_V_H4S2)        , kRWI_W    , F(Long) | F(VH0_15)       , 23 , 4100), // #725
+  INST(Umull2_v         , ISimdVVVe          , (0b0110111000100000110000, kVO_V_B16H8S4, 0b0110111100000000101000, kVO_V_H8S4)       , kRWI_W    , F(Long) | F(VH0_15)       , 24 , 4106), // #726
+  INST(Uqadd_v          , ISimdVVV           , (0b0010111000100000000011, kVO_SV_Any)                                                , kRWI_W    , 0                         , 56 , 3746), // #727
+  INST(Uqrshl_v         , SimdShift          , (0b0010111000100000010111, 0b0000000000000000000000, 0, kVO_SV_Any)                   , kRWI_W    , 0                         , 26 , 4113), // #728
+  INST(Uqrshrn_v        , SimdShift          , (0b0000000000000000000000, 0b0010111100000000100111, 1, kVO_SV_B8H4S2)                , kRWI_W    , F(Narrow)                 , 27 , 4120), // #729
+  INST(Uqrshrn2_v       , SimdShift          , (0b0000000000000000000000, 0b0110111100000000100111, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 28 , 4128), // #730
+  INST(Uqshl_v          , SimdShift          , (0b0010111000100000010011, 0b0010111100000000011101, 0, kVO_SV_Any)                   , kRWI_W    , 0                         , 29 , 4137), // #731
+  INST(Uqshrn_v         , SimdShift          , (0b0000000000000000000000, 0b0010111100000000100101, 1, kVO_SV_B8H4S2)                , kRWI_W    , F(Narrow)                 , 30 , 4143), // #732
+  INST(Uqshrn2_v        , SimdShift          , (0b0000000000000000000000, 0b0110111100000000100101, 1, kVO_V_B16H8S4)                , kRWI_X    , F(Narrow)                 , 31 , 4150), // #733
+  INST(Uqsub_v          , ISimdVVV           , (0b0010111000100000001011, kVO_SV_Any)                                                , kRWI_W    , 0                         , 57 , 4158), // #734
+  INST(Uqxtn_v          , ISimdVV            , (0b0010111000100001010010, kVO_SV_B8H4S2)                                             , kRWI_W    , F(Narrow)                 , 22 , 4164), // #735
+  INST(Uqxtn2_v         , ISimdVV            , (0b0110111000100001010010, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 23 , 4170), // #736
+  INST(Urecpe_v         , ISimdVV            , (0b0000111010100001110010, kVO_V_S)                                                   , kRWI_W    , 0                         , 24 , 4177), // #737
+  INST(Urhadd_v         , ISimdVVV           , (0b0010111000100000000101, kVO_V_BHS)                                                 , kRWI_W    , 0                         , 58 , 4184), // #738
+  INST(Urshl_v          , SimdShift          , (0b0010111000100000010101, 0b0000000000000000000000, 0, kVO_V_Any)                    , kRWI_W    , 0                         , 32 , 4191), // #739
+  INST(Urshr_v          , SimdShift          , (0b0000000000000000000000, 0b0010111100000000001001, 1, kVO_V_Any)                    , kRWI_W    , 0                         , 33 , 4197), // #740
+  INST(Ursqrte_v        , ISimdVV            , (0b0010111010100001110010, kVO_V_S)                                                   , kRWI_W    , 0                         , 25 , 4203), // #741
+  INST(Ursra_v          , SimdShift          , (0b0000000000000000000000, 0b0010111100000000001101, 1, kVO_V_Any)                    , kRWI_X    , 0                         , 34 , 4211), // #742
+  INST(Usdot_v          , SimdDot            , (0b0000111010000000100111, 0b0000111110000000111100, kET_S, kET_B, kET_4B)            , kRWI_X    , 0                         , 4  , 4217), // #743
+  INST(Ushl_v           , SimdShift          , (0b0010111000100000010001, 0b0000000000000000000000, 0, kVO_V_Any)                    , kRWI_W    , 0                         , 35 , 4223), // #744
+  INST(Ushll_v          , SimdShift          , (0b0000000000000000000000, 0b0010111100000000101001, 0, kVO_V_B8H4S2)                 , kRWI_W    , F(Long)                   , 36 , 4228), // #745
+  INST(Ushll2_v         , SimdShift          , (0b0000000000000000000000, 0b0110111100000000101001, 0, kVO_V_B16H8S4)                , kRWI_W    , F(Long)                   , 37 , 4234), // #746
+  INST(Ushr_v           , SimdShift          , (0b0000000000000000000000, 0b0010111100000000000001, 1, kVO_V_Any)                    , kRWI_W    , 0                         , 38 , 4241), // #747
+  INST(Usmmla_v         , ISimdVVVx          , (0b0100111010000000101011, kOp_V4S, kOp_V16B, kOp_V16B)                               , kRWI_X    , 0                         , 16 , 4246), // #748
+  INST(Usqadd_v         , ISimdVV            , (0b0010111000100000001110, kVO_SV_Any)                                                , kRWI_X    , 0                         , 26 , 4253), // #749
+  INST(Usra_v           , SimdShift          , (0b0000000000000000000000, 0b0010111100000000000101, 1, kVO_V_Any)                    , kRWI_X    , 0                         , 39 , 4260), // #750
+  INST(Usubl_v          , ISimdVVV           , (0b0010111000100000001000, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 59 , 4265), // #751
+  INST(Usubl2_v         , ISimdVVV           , (0b0110111000100000001000, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 60 , 4271), // #752
+  INST(Usubw_v          , ISimdWWV           , (0b0010111000100000001100, kVO_V_B8H4S2)                                              , kRWI_W    , 0                         , 6  , 4278), // #753
+  INST(Usubw2_v         , ISimdWWV           , (0b0010111000100000001100, kVO_V_B16H8S4)                                             , kRWI_W    , 0                         , 7  , 4284), // #754
+  INST(Uxtl_v           , SimdSxtlUxtl       , (0b0010111100000000101001, kVO_V_B8H4S2)                                              , kRWI_W    , F(Long)                   , 2  , 4301), // #755
+  INST(Uxtl2_v          , SimdSxtlUxtl       , (0b0110111100000000101001, kVO_V_B16H8S4)                                             , kRWI_W    , F(Long)                   , 3  , 4306), // #756
+  INST(Uzp1_v           , ISimdVVV           , (0b0000111000000000000110, kVO_V_BHS_D2)                                              , kRWI_W    , 0                         , 61 , 4312), // #757
+  INST(Uzp2_v           , ISimdVVV           , (0b0000111000000000010110, kVO_V_BHS_D2)                                              , kRWI_W    , 0                         , 62 , 4317), // #758
+  INST(Xar_v            , ISimdVVVI          , (0b1100111001100000100011, kVO_V_D2, 6, 10, 0)                                        , kRWI_W    , 0                         , 1  , 4337), // #759
+  INST(Xtn_v            , ISimdVV            , (0b0000111000100001001010, kVO_V_B8H4S2)                                              , kRWI_W    , F(Narrow)                 , 27 , 3047), // #760
+  INST(Xtn2_v           , ISimdVV            , (0b0100111000100001001010, kVO_V_B16H8S4)                                             , kRWI_X    , F(Narrow)                 , 28 , 3053), // #761
+  INST(Zip1_v           , ISimdVVV           , (0b0000111000000000001110, kVO_V_BHS_D2)                                              , kRWI_W    , 0                         , 63 , 4367), // #762
+  INST(Zip2_v           , ISimdVVV           , (0b0000111000000000011110, kVO_V_BHS_D2)                                              , kRWI_W    , 0                         , 64 , 4372)  // #763
+  // ${InstInfo:End}
+};
+
+#undef F
+#undef INST
+#undef NAME_DATA_INDEX
+
+namespace EncodingData {
+
+// ${EncodingData:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const BaseAddSub baseAddSub[4] = {
+  { 0b0001011000, 0b0001011001, 0b0010001 }, // add
+  { 0b0101011000, 0b0101011001, 0b0110001 }, // adds
+  { 0b1001011000, 0b1001011001, 0b1010001 }, // sub
+  { 0b1101011000, 0b1101011001, 0b1110001 }  // subs
+};
+
+const BaseAdr baseAdr[2] = {
+  { 0b0001000000000000000000, OffsetType::kAArch64_ADR }, // adr
+  { 0b1001000000000000000000, OffsetType::kAArch64_ADRP }  // adrp
+};
+
+const BaseAtDcIcTlbi baseAtDcIcTlbi[4] = {
+  { 0b00011111110000, 0b00001111000000, true }, // at
+  { 0b00011110000000, 0b00001110000000, true }, // dc
+  { 0b00011110000000, 0b00001110000000, false }, // ic
+  { 0b00011110000000, 0b00010000000000, false }  // tlbi
+};
+
+const BaseAtomicCasp baseAtomicCasp[4] = {
+  { 0b0000100000100000011111, kWX, 30 }, // casp
+  { 0b0000100001100000011111, kWX, 30 }, // caspa
+  { 0b0000100001100000111111, kWX, 30 }, // caspal
+  { 0b0000100000100000111111, kWX, 30 }  // caspl
+};
+
+const BaseAtomicOp baseAtomicOp[123] = {
+  { 0b1000100010100000011111, kWX, 30, 0 }, // cas
+  { 0b1000100011100000011111, kWX, 30, 1 }, // casa
+  { 0b0000100011100000011111, kW , 0 , 1 }, // casab
+  { 0b0100100011100000011111, kW , 0 , 1 }, // casah
+  { 0b1000100011100000111111, kWX, 30, 1 }, // casal
+  { 0b0000100011100000111111, kW , 0 , 1 }, // casalb
+  { 0b0100100011100000111111, kW , 0 , 1 }, // casalh
+  { 0b0000100010100000011111, kW , 0 , 0 }, // casb
+  { 0b0100100010100000011111, kW , 0 , 0 }, // cash
+  { 0b1000100010100000111111, kWX, 30, 0 }, // casl
+  { 0b0000100010100000111111, kW , 0 , 0 }, // caslb
+  { 0b0100100010100000111111, kW , 0 , 0 }, // caslh
+  { 0b1011100000100000000000, kWX, 30, 0 }, // ldadd
+  { 0b1011100010100000000000, kWX, 30, 1 }, // ldadda
+  { 0b0011100010100000000000, kW , 0 , 1 }, // ldaddab
+  { 0b0111100010100000000000, kW , 0 , 1 }, // ldaddah
+  { 0b1011100011100000000000, kWX, 30, 1 }, // ldaddal
+  { 0b0011100011100000000000, kW , 0 , 1 }, // ldaddalb
+  { 0b0111100011100000000000, kW , 0 , 1 }, // ldaddalh
+  { 0b0011100000100000000000, kW , 0 , 0 }, // ldaddb
+  { 0b0111100000100000000000, kW , 0 , 0 }, // ldaddh
+  { 0b1011100001100000000000, kWX, 30, 0 }, // ldaddl
+  { 0b0011100001100000000000, kW , 0 , 0 }, // ldaddlb
+  { 0b0111100001100000000000, kW , 0 , 0 }, // ldaddlh
+  { 0b1011100000100000000100, kWX, 30, 0 }, // ldclr
+  { 0b1011100010100000000100, kWX, 30, 1 }, // ldclra
+  { 0b0011100010100000000100, kW , 0 , 1 }, // ldclrab
+  { 0b0111100010100000000100, kW , 0 , 1 }, // ldclrah
+  { 0b1011100011100000000100, kWX, 30, 1 }, // ldclral
+  { 0b0011100011100000000100, kW , 0 , 1 }, // ldclralb
+  { 0b0111100011100000000100, kW , 0 , 1 }, // ldclralh
+  { 0b0011100000100000000100, kW , 0 , 0 }, // ldclrb
+  { 0b0111100000100000000100, kW , 0 , 0 }, // ldclrh
+  { 0b1011100001100000000100, kWX, 30, 0 }, // ldclrl
+  { 0b0011100001100000000100, kW , 0 , 0 }, // ldclrlb
+  { 0b0111100001100000000100, kW , 0 , 0 }, // ldclrlh
+  { 0b1011100000100000001000, kWX, 30, 0 }, // ldeor
+  { 0b1011100010100000001000, kWX, 30, 1 }, // ldeora
+  { 0b0011100010100000001000, kW , 0 , 1 }, // ldeorab
+  { 0b0111100010100000001000, kW , 0 , 1 }, // ldeorah
+  { 0b1011100011100000001000, kWX, 30, 1 }, // ldeoral
+  { 0b0011100011100000001000, kW , 0 , 1 }, // ldeoralb
+  { 0b0111100011100000001000, kW , 0 , 1 }, // ldeoralh
+  { 0b0011100000100000001000, kW , 0 , 0 }, // ldeorb
+  { 0b0111100000100000001000, kW , 0 , 0 }, // ldeorh
+  { 0b1011100001100000001000, kWX, 30, 0 }, // ldeorl
+  { 0b0011100001100000001000, kW , 0 , 0 }, // ldeorlb
+  { 0b0111100001100000001000, kW , 0 , 0 }, // ldeorlh
+  { 0b1011100000100000001100, kWX, 30, 0 }, // ldset
+  { 0b1011100010100000001100, kWX, 30, 1 }, // ldseta
+  { 0b0011100010100000001100, kW , 0 , 1 }, // ldsetab
+  { 0b0111100010100000001100, kW , 0 , 1 }, // ldsetah
+  { 0b1011100011100000001100, kWX, 30, 1 }, // ldsetal
+  { 0b0011100011100000001100, kW , 0 , 1 }, // ldsetalb
+  { 0b0111100011100000001100, kW , 0 , 1 }, // ldsetalh
+  { 0b0011100000100000001100, kW , 0 , 0 }, // ldsetb
+  { 0b0111100000100000001100, kW , 0 , 0 }, // ldseth
+  { 0b1011100001100000001100, kWX, 30, 0 }, // ldsetl
+  { 0b0011100001100000001100, kW , 0 , 0 }, // ldsetlb
+  { 0b0111100001100000001100, kW , 0 , 0 }, // ldsetlh
+  { 0b1011100000100000010000, kWX, 30, 0 }, // ldsmax
+  { 0b1011100010100000010000, kWX, 30, 1 }, // ldsmaxa
+  { 0b0011100010100000010000, kW , 0 , 1 }, // ldsmaxab
+  { 0b0111100010100000010000, kW , 0 , 1 }, // ldsmaxah
+  { 0b1011100011100000010000, kWX, 30, 1 }, // ldsmaxal
+  { 0b0011100011100000010000, kW , 0 , 1 }, // ldsmaxalb
+  { 0b0111100011100000010000, kW , 0 , 1 }, // ldsmaxalh
+  { 0b0011100000100000010000, kW , 0 , 0 }, // ldsmaxb
+  { 0b0111100000100000010000, kW , 0 , 0 }, // ldsmaxh
+  { 0b1011100001100000010000, kWX, 30, 0 }, // ldsmaxl
+  { 0b0011100001100000010000, kW , 0 , 0 }, // ldsmaxlb
+  { 0b0111100001100000010000, kW , 0 , 0 }, // ldsmaxlh
+  { 0b1011100000100000010100, kWX, 30, 0 }, // ldsmin
+  { 0b1011100010100000010100, kWX, 30, 1 }, // ldsmina
+  { 0b0011100010100000010100, kW , 0 , 1 }, // ldsminab
+  { 0b0111100010100000010100, kW , 0 , 1 }, // ldsminah
+  { 0b1011100011100000010100, kWX, 30, 1 }, // ldsminal
+  { 0b0011100011100000010100, kW , 0 , 1 }, // ldsminalb
+  { 0b0111100011100000010100, kW , 0 , 1 }, // ldsminalh
+  { 0b0011100000100000010100, kW , 0 , 0 }, // ldsminb
+  { 0b0111100000100000010100, kW , 0 , 0 }, // ldsminh
+  { 0b1011100001100000010100, kWX, 30, 0 }, // ldsminl
+  { 0b0011100001100000010100, kW , 0 , 0 }, // ldsminlb
+  { 0b0111100001100000010100, kW , 0 , 0 }, // ldsminlh
+  { 0b1011100000100000011000, kWX, 30, 0 }, // ldumax
+  { 0b1011100010100000011000, kWX, 30, 1 }, // ldumaxa
+  { 0b0011100010100000011000, kW , 0 , 1 }, // ldumaxab
+  { 0b0111100010100000011000, kW , 0 , 1 }, // ldumaxah
+  { 0b1011100011100000011000, kWX, 30, 1 }, // ldumaxal
+  { 0b0011100011100000011000, kW , 0 , 1 }, // ldumaxalb
+  { 0b0111100011100000011000, kW , 0 , 1 }, // ldumaxalh
+  { 0b0011100000100000011000, kW , 0 , 0 }, // ldumaxb
+  { 0b0111100000100000011000, kW , 0 , 0 }, // ldumaxh
+  { 0b1011100001100000011000, kWX, 30, 0 }, // ldumaxl
+  { 0b0011100001100000011000, kW , 0 , 0 }, // ldumaxlb
+  { 0b0111100001100000011000, kW , 0 , 0 }, // ldumaxlh
+  { 0b1011100000100000011100, kWX, 30, 0 }, // ldumin
+  { 0b1011100010100000011100, kWX, 30, 1 }, // ldumina
+  { 0b0011100010100000011100, kW , 0 , 1 }, // lduminab
+  { 0b0111100010100000011100, kW , 0 , 1 }, // lduminah
+  { 0b1011100011100000011100, kWX, 30, 1 }, // lduminal
+  { 0b0011100011100000011100, kW , 0 , 1 }, // lduminalb
+  { 0b0111100011100000011100, kW , 0 , 1 }, // lduminalh
+  { 0b0011100000100000011100, kW , 0 , 0 }, // lduminb
+  { 0b0111100000100000011100, kW , 0 , 0 }, // lduminh
+  { 0b1011100001100000011100, kWX, 30, 0 }, // lduminl
+  { 0b0011100001100000011100, kW , 0 , 0 }, // lduminlb
+  { 0b0111100001100000011100, kW , 0 , 0 }, // lduminlh
+  { 0b1000100000000000111111, kWX, 30, 1 }, // stlxr
+  { 0b0000100000000000111111, kW , 0 , 1 }, // stlxrb
+  { 0b0100100000000000111111, kW , 0 , 1 }, // stlxrh
+  { 0b1011100000100000100000, kWX, 30, 1 }, // swp
+  { 0b1011100010100000100000, kWX, 30, 1 }, // swpa
+  { 0b0011100010100000100000, kW , 0 , 1 }, // swpab
+  { 0b0111100010100000100000, kW , 0 , 1 }, // swpah
+  { 0b1011100011100000100000, kWX, 30, 1 }, // swpal
+  { 0b0011100011100000100000, kW , 0 , 1 }, // swpalb
+  { 0b0111100011100000100000, kW , 0 , 1 }, // swpalh
+  { 0b0011100000100000100000, kW , 0 , 1 }, // swpb
+  { 0b0111100000100000100000, kW , 0 , 1 }, // swph
+  { 0b1011100001100000100000, kWX, 30, 1 }, // swpl
+  { 0b0011100001100000100000, kW , 0 , 1 }, // swplb
+  { 0b0111100001100000100000, kW , 0 , 1 }  // swplh
+};
+
+const BaseAtomicSt baseAtomicSt[48] = {
+  { 0b1011100000100000000000, kWX, 30 }, // stadd
+  { 0b1011100001100000000000, kWX, 30 }, // staddl
+  { 0b0011100000100000000000, kW , 0  }, // staddb
+  { 0b0011100001100000000000, kW , 0  }, // staddlb
+  { 0b0111100000100000000000, kW , 0  }, // staddh
+  { 0b0111100001100000000000, kW , 0  }, // staddlh
+  { 0b1011100000100000000100, kWX, 30 }, // stclr
+  { 0b1011100001100000000100, kWX, 30 }, // stclrl
+  { 0b0011100000100000000100, kW , 0  }, // stclrb
+  { 0b0011100001100000000100, kW , 0  }, // stclrlb
+  { 0b0111100000100000000100, kW , 0  }, // stclrh
+  { 0b0111100001100000000100, kW , 0  }, // stclrlh
+  { 0b1011100000100000001000, kWX, 30 }, // steor
+  { 0b1011100001100000001000, kWX, 30 }, // steorl
+  { 0b0011100000100000001000, kW , 0  }, // steorb
+  { 0b0011100001100000001000, kW , 0  }, // steorlb
+  { 0b0111100000100000001000, kW , 0  }, // steorh
+  { 0b0111100001100000001000, kW , 0  }, // steorlh
+  { 0b1011100000100000001100, kWX, 30 }, // stset
+  { 0b1011100001100000001100, kWX, 30 }, // stsetl
+  { 0b0011100000100000001100, kW , 0  }, // stsetb
+  { 0b0011100001100000001100, kW , 0  }, // stsetlb
+  { 0b0111100000100000001100, kW , 0  }, // stseth
+  { 0b0111100001100000001100, kW , 0  }, // stsetlh
+  { 0b1011100000100000010000, kWX, 30 }, // stsmax
+  { 0b1011100001100000010000, kWX, 30 }, // stsmaxl
+  { 0b0011100000100000010000, kW , 0  }, // stsmaxb
+  { 0b0011100001100000010000, kW , 0  }, // stsmaxlb
+  { 0b0111100000100000010000, kW , 0  }, // stsmaxh
+  { 0b0111100001100000010000, kW , 0  }, // stsmaxlh
+  { 0b1011100000100000010100, kWX, 30 }, // stsmin
+  { 0b1011100001100000010100, kWX, 30 }, // stsminl
+  { 0b0011100000100000010100, kW , 0  }, // stsminb
+  { 0b0011100001100000010100, kW , 0  }, // stsminlb
+  { 0b0111100000100000010100, kW , 0  }, // stsminh
+  { 0b0111100001100000010100, kW , 0  }, // stsminlh
+  { 0b1011100000100000011000, kWX, 30 }, // stumax
+  { 0b1011100001100000011000, kWX, 30 }, // stumaxl
+  { 0b0011100000100000011000, kW , 0  }, // stumaxb
+  { 0b0011100001100000011000, kW , 0  }, // stumaxlb
+  { 0b0111100000100000011000, kW , 0  }, // stumaxh
+  { 0b0111100001100000011000, kW , 0  }, // stumaxlh
+  { 0b1011100000100000011100, kWX, 30 }, // stumin
+  { 0b1011100001100000011100, kWX, 30 }, // stuminl
+  { 0b0011100000100000011100, kW , 0  }, // stuminb
+  { 0b0011100001100000011100, kW , 0  }, // stuminlb
+  { 0b0111100000100000011100, kW , 0  }, // stuminh
+  { 0b0111100001100000011100, kW , 0  }  // stuminlh
+};
+
+const BaseBfc baseBfc[1] = {
+  { 0b00110011000000000000001111100000 }  // bfc
+};
+
+const BaseBfi baseBfi[3] = {
+  { 0b00110011000000000000000000000000 }, // bfi
+  { 0b00010011000000000000000000000000 }, // sbfiz
+  { 0b01010011000000000000000000000000 }  // ubfiz
+};
+
+const BaseBfm baseBfm[3] = {
+  { 0b00110011000000000000000000000000 }, // bfm
+  { 0b00010011000000000000000000000000 }, // sbfm
+  { 0b01010011000000000000000000000000 }  // ubfm
+};
+
+const BaseBfx baseBfx[3] = {
+  { 0b00110011000000000000000000000000 }, // bfxil
+  { 0b00010011000000000000000000000000 }, // sbfx
+  { 0b01010011000000000000000000000000 }  // ubfx
+};
+
+const BaseBranchCmp baseBranchCmp[2] = {
+  { 0b00110101000000000000000000000000 }, // cbnz
+  { 0b00110100000000000000000000000000 }  // cbz
+};
+
+const BaseBranchReg baseBranchReg[3] = {
+  { 0b11010110001111110000000000000000 }, // blr
+  { 0b11010110000111110000000000000000 }, // br
+  { 0b11010110010111110000000000000000 }  // ret
+};
+
+const BaseBranchRel baseBranchRel[2] = {
+  { 0b00010100000000000000000000000000 }, // b
+  { 0b10010100000000000000000000000000 }  // bl
+};
+
+const BaseBranchTst baseBranchTst[2] = {
+  { 0b00110111000000000000000000000000 }, // tbnz
+  { 0b00110110000000000000000000000000 }  // tbz
+};
+
+const BaseCCmp baseCCmp[2] = {
+  { 0b00111010010000000000000000000000 }, // ccmn
+  { 0b01111010010000000000000000000000 }  // ccmp
+};
+
+const BaseCInc baseCInc[3] = {
+  { 0b00011010100000000000010000000000 }, // cinc
+  { 0b01011010100000000000000000000000 }, // cinv
+  { 0b01011010100000000000010000000000 }  // cneg
+};
+
+const BaseCSel baseCSel[4] = {
+  { 0b00011010100000000000000000000000 }, // csel
+  { 0b00011010100000000000010000000000 }, // csinc
+  { 0b01011010100000000000000000000000 }, // csinv
+  { 0b01011010100000000000010000000000 }  // csneg
+};
+
+const BaseCSet baseCSet[2] = {
+  { 0b00011010100111110000011111100000 }, // cset
+  { 0b01011010100111110000001111100000 }  // csetm
+};
+
+const BaseCmpCmn baseCmpCmn[2] = {
+  { 0b0101011000, 0b0101011001, 0b0110001 }, // cmn
+  { 0b1101011000, 0b1101011001, 0b1110001 }  // cmp
+};
+
+const BaseExtend baseExtend[5] = {
+  { 0b0001001100000000000111, kWX, 0 }, // sxtb
+  { 0b0001001100000000001111, kWX, 0 }, // sxth
+  { 0b1001001101000000011111, kX , 0 }, // sxtw
+  { 0b0101001100000000000111, kW, 1 }, // uxtb
+  { 0b0101001100000000001111, kW, 1 }  // uxth
+};
+
+const BaseExtract baseExtract[1] = {
+  { 0b00010011100000000000000000000000 }  // extr
+};
+
+const BaseLdSt baseLdSt[9] = {
+  { 0b1011100101, 0b10111000010, 0b10111000011, 0b00011000, kWX, 30, 2, Inst::kIdLdur }, // ldr
+  { 0b0011100101, 0b00111000010, 0b00111000011, 0         , kW , 0 , 0, Inst::kIdLdurb }, // ldrb
+  { 0b0111100101, 0b01111000010, 0b01111000011, 0         , kW , 0 , 1, Inst::kIdLdurh }, // ldrh
+  { 0b0011100111, 0b00111000100, 0b00111000101, 0         , kWX, 22, 0, Inst::kIdLdursb }, // ldrsb
+  { 0b0111100110, 0b01111000100, 0b01111000101, 0         , kWX, 22, 1, Inst::kIdLdursh }, // ldrsh
+  { 0b1011100110, 0b10111000100, 0b10111000101, 0b10011000, kX , 0 , 2, Inst::kIdLdursw }, // ldrsw
+  { 0b1011100100, 0b10111000000, 0b10111000001, 0         , kWX, 30, 2, Inst::kIdStur }, // str
+  { 0b0011100100, 0b00111000000, 0b00111000001, 0         , kW , 30, 0, Inst::kIdSturb }, // strb
+  { 0b0111100100, 0b01111000000, 0b01111000001, 0         , kWX, 30, 1, Inst::kIdSturh }  // strh
+};
+
+const BaseLdpStp baseLdpStp[6] = {
+  { 0b0010100001, 0           , kWX, 31, 2 }, // ldnp
+  { 0b0010100101, 0b0010100011, kWX, 31, 2 }, // ldp
+  { 0b0110100101, 0b0110100011, kX , 0 , 2 }, // ldpsw
+  { 0b0110100100, 0b0110100010, kX, 0, 4 }, // stgp
+  { 0b0010100000, 0           , kWX, 31, 2 }, // stnp
+  { 0b0010100100, 0b0010100010, kWX, 31, 2 }  // stp
+};
+
+const BaseLdxp baseLdxp[2] = {
+  { 0b1000100001111111100000, kWX, 30 }, // ldaxp
+  { 0b1000100001111111000000, kWX, 30 }  // ldxp
+};
+
+const BaseLogical baseLogical[8] = {
+  { 0b0001010000, 0b00100100, 0 }, // and
+  { 0b1101010000, 0b11100100, 0 }, // ands
+  { 0b0001010001, 0b00100100, 1 }, // bic
+  { 0b1101010001, 0b11100100, 1 }, // bics
+  { 0b1001010001, 0b10100100, 1 }, // eon
+  { 0b1001010000, 0b10100100, 0 }, // eor
+  { 0b0101010001, 0b01100100, 1 }, // orn
+  { 0b0101010000, 0b01100100, 0 }  // orr
+};
+
+const BaseMovKNZ baseMovKNZ[3] = {
+  { 0b01110010100000000000000000000000 }, // movk
+  { 0b00010010100000000000000000000000 }, // movn
+  { 0b01010010100000000000000000000000 }  // movz
+};
+
+const BaseMvnNeg baseMvnNeg[3] = {
+  { 0b00101010001000000000001111100000 }, // mvn
+  { 0b01001011000000000000001111100000 }, // neg
+  { 0b01101011000000000000001111100000 }  // negs
+};
+
+const BaseOp baseOp[23] = {
+  { 0b11010101000000110010000110011111 }, // autia1716
+  { 0b11010101000000110010001110111111 }, // autiasp
+  { 0b11010101000000110010001110011111 }, // autiaz
+  { 0b11010101000000110010000111011111 }, // autib1716
+  { 0b11010101000000110010001111111111 }, // autibsp
+  { 0b11010101000000110010001111011111 }, // autibz
+  { 0b11010101000000000100000001011111 }, // axflag
+  { 0b11010101000000000100000000011111 }, // cfinv
+  { 0b11010101000000110010001010011111 }, // csdb
+  { 0b11010101000000110010000011011111 }, // dgh
+  { 0b11010110101111110000001111100000 }, // drps
+  { 0b11010101000000110010001000011111 }, // esb
+  { 0b11010110100111110000001111100000 }, // eret
+  { 0b11010101000000110010000000011111 }, // nop
+  { 0b11010101000000110011010010011111 }, // pssbb
+  { 0b11010101000000110010000010011111 }, // sev
+  { 0b11010101000000110010000010111111 }, // sevl
+  { 0b11010101000000110011000010011111 }, // ssbb
+  { 0b11010101000000110010000001011111 }, // wfe
+  { 0b11010101000000110010000001111111 }, // wfi
+  { 0b11010101000000000100000000111111 }, // xaflag
+  { 0b11010101000000110010000011111111 }, // xpaclri
+  { 0b11010101000000110010000000111111 }  // yield
+};
+
+const BaseOpImm baseOpImm[14] = {
+  { 0b11010100001000000000000000000000, 16, 5 }, // brk
+  { 0b11010101000000110011000001011111, 4, 8 }, // clrex
+  { 0b11010100101000000000000000000001, 16, 5 }, // dcps1
+  { 0b11010100101000000000000000000010, 16, 5 }, // dcps2
+  { 0b11010100101000000000000000000011, 16, 5 }, // dcps3
+  { 0b11010101000000110011000010111111, 4, 8 }, // dmb
+  { 0b11010101000000110011000010011111, 4, 8 }, // dsb
+  { 0b11010101000000110010000000011111, 7, 5 }, // hint
+  { 0b11010100010000000000000000000000, 16, 5 }, // hlt
+  { 0b11010100000000000000000000000010, 16, 5 }, // hvc
+  { 0b11010101000000110011000011011111, 4, 8 }, // isb
+  { 0b11010100000000000000000000000011, 16, 5 }, // smc
+  { 0b11010100000000000000000000000001, 16, 5 }, // svc
+  { 0b00000000000000000000000000000000, 16, 0 }  // udf
+};
+
+const BaseR baseR[10] = {
+  { 0b11011010110000010011101111100000, kX, kZR, 0 }, // autdza
+  { 0b11011010110000010011111111100000, kX, kZR, 0 }, // autdzb
+  { 0b11011010110000010011001111100000, kX, kZR, 0 }, // autiza
+  { 0b11011010110000010011011111100000, kX, kZR, 0 }, // autizb
+  { 0b11011010110000010010101111100000, kX, kZR, 0 }, // pacdza
+  { 0b11011010110000010010111111100000, kX, kZR, 0 }, // pacdzb
+  { 0b00111010000000000000100000001101, kW, kZR, 5 }, // setf8
+  { 0b00111010000000000100100000001101, kW, kZR, 5 }, // setf16
+  { 0b11011010110000010100011111100000, kX, kZR, 0 }, // xpacd
+  { 0b11011010110000010100001111100000, kX, kZR, 0 }  // xpaci
+};
+
+const BaseRM_NoImm baseRM_NoImm[21] = {
+  { 0b1000100011011111111111, kWX, kZR, 30 }, // ldar
+  { 0b0000100011011111111111, kW , kZR, 0  }, // ldarb
+  { 0b0100100011011111111111, kW , kZR, 0  }, // ldarh
+  { 0b1000100001011111111111, kWX, kZR, 30 }, // ldaxr
+  { 0b0000100001011111111111, kW , kZR, 0  }, // ldaxrb
+  { 0b0100100001011111111111, kW , kZR, 0  }, // ldaxrh
+  { 0b1101100111100000000000, kX , kZR, 0  }, // ldgm
+  { 0b1000100011011111011111, kWX, kZR, 30 }, // ldlar
+  { 0b0000100011011111011111, kW , kZR, 0  }, // ldlarb
+  { 0b0100100011011111011111, kW , kZR, 0  }, // ldlarh
+  { 0b1000100001011111011111, kWX, kZR, 30 }, // ldxr
+  { 0b0000100001011111011111, kW , kZR, 0  }, // ldxrb
+  { 0b0100100001011111011111, kW , kZR, 0  }, // ldxrh
+  { 0b1101100110100000000000, kX , kZR, 0  }, // stgm
+  { 0b1000100010011111011111, kWX, kZR, 30 }, // stllr
+  { 0b0000100010011111011111, kW , kZR, 0  }, // stllrb
+  { 0b0100100010011111011111, kW , kZR, 0  }, // stllrh
+  { 0b1000100010011111111111, kWX, kZR, 30 }, // stlr
+  { 0b0000100010011111111111, kW , kZR, 0  }, // stlrb
+  { 0b0100100010011111111111, kW , kZR, 0  }, // stlrh
+  { 0b1101100100100000000000, kX , kZR, 0 }  // stzgm
+};
+
+const BaseRM_SImm10 baseRM_SImm10[2] = {
+  { 0b1111100000100000000001, kX , kZR, 0, 3 }, // ldraa
+  { 0b1111100010100000000001, kX , kZR, 0, 3 }  // ldrab
+};
+
+const BaseRM_SImm9 baseRM_SImm9[23] = {
+  { 0b1101100101100000000000, 0b0000000000000000000000, kX , kZR, 0, 4 }, // ldg
+  { 0b1011100001000000000010, 0b0000000000000000000000, kWX, kZR, 30, 0 }, // ldtr
+  { 0b0011100001000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // ldtrb
+  { 0b0111100001000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // ldtrh
+  { 0b0011100011000000000010, 0b0000000000000000000000, kWX, kZR, 22, 0 }, // ldtrsb
+  { 0b0111100011000000000010, 0b0000000000000000000000, kWX, kZR, 22, 0 }, // ldtrsh
+  { 0b1011100010000000000010, 0b0000000000000000000000, kX , kZR, 0 , 0 }, // ldtrsw
+  { 0b1011100001000000000000, 0b0000000000000000000000, kWX, kZR, 30, 0 }, // ldur
+  { 0b0011100001000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // ldurb
+  { 0b0111100001000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // ldurh
+  { 0b0011100011000000000000, 0b0000000000000000000000, kWX, kZR, 22, 0 }, // ldursb
+  { 0b0111100011000000000000, 0b0000000000000000000000, kWX, kZR, 22, 0 }, // ldursh
+  { 0b1011100010000000000000, 0b0000000000000000000000, kWX, kZR, 0 , 0 }, // ldursw
+  { 0b1101100110100000000010, 0b1101100110100000000001, kX, kSP, 0, 4 }, // st2g
+  { 0b1101100100100000000010, 0b1101100100100000000001, kX, kSP, 0, 4 }, // stg
+  { 0b1011100000000000000010, 0b0000000000000000000000, kWX, kZR, 30, 0 }, // sttr
+  { 0b0011100000000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // sttrb
+  { 0b0111100000000000000010, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // sttrh
+  { 0b1011100000000000000000, 0b0000000000000000000000, kWX, kZR, 30, 0 }, // stur
+  { 0b0011100000000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // sturb
+  { 0b0111100000000000000000, 0b0000000000000000000000, kW , kZR, 0 , 0 }, // sturh
+  { 0b1101100111100000000010, 0b1101100111100000000001, kX , kSP, 0, 4 }, // stz2g
+  { 0b1101100101100000000010, 0b1101100101100000000001, kX , kSP, 0, 4 }  // stzg
+};
+
+const BaseRR baseRR[15] = {
+  { 0b11011010110000010001100000000000, kX, kZR, 0, kX, kSP, 5, true }, // autda
+  { 0b11011010110000010001110000000000, kX, kZR, 0, kX, kSP, 5, true }, // autdb
+  { 0b11011010110000010001000000000000, kX, kZR, 0, kX, kSP, 5, true }, // autia
+  { 0b11011010110000010001010000000000, kX, kZR, 0, kX, kSP, 5, true }, // autib
+  { 0b01011010110000000001010000000000, kWX, kZR, 0, kWX, kZR, 5, true }, // cls
+  { 0b01011010110000000001000000000000, kWX, kZR, 0, kWX, kZR, 5, true }, // clz
+  { 0b10111010110000000000000000011111, kX, kSP, 5, kX, kSP, 16, true }, // cmpp
+  { 0b01011010000000000000001111100000, kWX, kZR, 0, kWX, kZR, 16, true }, // ngc
+  { 0b01111010000000000000001111100000, kWX, kZR, 0, kWX, kZR, 16, true }, // ngcs
+  { 0b11011010110000010000100000000000, kX, kZR, 0, kX, kSP, 5, true }, // pacda
+  { 0b11011010110000010000110000000000, kX, kZR, 0, kX, kSP, 5, true }, // pacdb
+  { 0b01011010110000000000000000000000, kWX, kZR, 0, kWX, kZR, 5, true }, // rbit
+  { 0b01011010110000000000010000000000, kWX, kZR, 0, kWX, kZR, 5, true }, // rev16
+  { 0b11011010110000000000100000000000, kWX, kZR, 0, kWX, kZR, 5, true }, // rev32
+  { 0b11011010110000000000110000000000, kWX, kZR, 0, kWX, kZR, 5, true }  // rev64
+};
+
+const BaseRRII baseRRII[2] = {
+  { 0b1001000110000000000000, kX, kSP, kX, kSP, 6, 4, 16, 4, 0, 10 }, // addg
+  { 0b1101000110000000000000, kX, kSP, kX, kSP, 6, 4, 16, 4, 0, 10 }  // subg
+};
+
+const BaseRRR baseRRR[26] = {
+  { 0b0001101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true }, // adc
+  { 0b0011101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true }, // adcs
+  { 0b0001101011000000010000, kW, kZR, kW, kZR, kW, kZR, false }, // crc32b
+  { 0b0001101011000000010100, kW, kZR, kW, kZR, kW, kZR, false }, // crc32cb
+  { 0b0001101011000000010101, kW, kZR, kW, kZR, kW, kZR, false }, // crc32ch
+  { 0b0001101011000000010110, kW, kZR, kW, kZR, kW, kZR, false }, // crc32cw
+  { 0b1001101011000000010111, kW, kZR, kW, kZR, kX, kZR, false }, // crc32cx
+  { 0b0001101011000000010001, kW, kZR, kW, kZR, kW, kZR, false }, // crc32h
+  { 0b0001101011000000010010, kW, kZR, kW, kZR, kW, kZR, false }, // crc32w
+  { 0b1001101011000000010011, kW, kZR, kW, kZR, kX, kZR, false }, // crc32x
+  { 0b1001101011000000000101, kX , kZR, kX , kSP, kX , kZR, true }, // gmi
+  { 0b0001101100000000111111, kWX, kZR, kWX, kZR, kWX, kZR, true }, // mneg
+  { 0b0001101100000000011111, kWX, kZR, kWX, kZR, kWX, kZR, true }, // mul
+  { 0b1001101011000000001100, kX, kZR, kX, kZR, kX, kSP, false }, // pacga
+  { 0b0101101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true }, // sbc
+  { 0b0111101000000000000000, kWX, kZR, kWX, kZR, kWX, kZR, true }, // sbcs
+  { 0b0001101011000000000011, kWX, kZR, kWX, kZR, kWX, kZR, true }, // sdiv
+  { 0b1001101100100000111111, kX , kZR, kW , kZR, kW , kZR, false }, // smnegl
+  { 0b1001101101000000011111, kX , kZR, kX , kZR, kX , kZR, true }, // smulh
+  { 0b1001101100100000011111, kX , kZR, kW , kZR, kW , kZR, false }, // smull
+  { 0b1001101011000000000000, kX, kZR, kX, kSP, kX, kSP, false }, // subp
+  { 0b1011101011000000000000, kX, kZR, kX, kSP, kX, kSP, false }, // subps
+  { 0b0001101011000000000010, kWX, kZR, kWX, kZR, kWX, kZR, true }, // udiv
+  { 0b1001101110100000111111, kX , kZR, kW , kZR, kW , kZR, false }, // umnegl
+  { 0b1001101110100000011111, kX , kZR, kW , kZR, kW , kZR, false }, // umull
+  { 0b1001101111000000011111, kX , kZR, kX , kZR, kX , kZR, false }  // umulh
+};
+
+const BaseRRRR baseRRRR[6] = {
+  { 0b0001101100000000000000, kWX, kZR, kWX, kZR, kWX, kZR, kWX, kZR, true }, // madd
+  { 0b0001101100000000100000, kWX, kZR, kWX, kZR, kWX, kZR, kWX, kZR, true }, // msub
+  { 0b1001101100100000000000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false }, // smaddl
+  { 0b1001101100100000100000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false }, // smsubl
+  { 0b1001101110100000000000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false }, // umaddl
+  { 0b1001101110100000100000, kX , kZR, kW , kZR, kW , kZR, kX , kZR, false }  // umsubl
+};
+
+const BaseShift baseShift[8] = {
+  { 0b0001101011000000001010, 0b0001001100000000011111, 0 }, // asr
+  { 0b0001101011000000001010, 0b0000000000000000000000, 0 }, // asrv
+  { 0b0001101011000000001000, 0b0101001100000000000000, 0 }, // lsl
+  { 0b0001101011000000001000, 0b0000000000000000000000, 0 }, // lslv
+  { 0b0001101011000000001001, 0b0101001100000000011111, 0 }, // lsr
+  { 0b0001101011000000001001, 0b0000000000000000000000, 0 }, // lsrv
+  { 0b0001101011000000001011, 0b0001001110000000000000, 1 }, // ror
+  { 0b0001101011000000001011, 0b0000000000000000000000, 1 }  // rorv
+};
+
+const BaseStx baseStx[3] = {
+  { 0b1000100000000000011111, kWX, 30 }, // stxr
+  { 0b0000100000000000011111, kW , 0  }, // stxrb
+  { 0b0100100000000000011111, kW , 0  }  // stxrh
+};
+
+const BaseStxp baseStxp[2] = {
+  { 0b1000100000100000100000, kWX, 30 }, // stlxp
+  { 0b1000100000100000000000, kWX, 30 }  // stxp
+};
+
+const BaseTst baseTst[1] = {
+  { 0b1101010000, 0b111001000 }  // tst
+};
+
+const FSimdPair fSimdPair[5] = {
+  { 0b0111111000110000110110, 0b0010111000100000110101 }, // faddp_v
+  { 0b0111111000110000110010, 0b0010111000100000110001 }, // fmaxnmp_v
+  { 0b0111111000110000111110, 0b0010111000100000111101 }, // fmaxp_v
+  { 0b0111111010110000110010, 0b0010111010100000110001 }, // fminnmp_v
+  { 0b0111111010110000111110, 0b0010111010100000111101 }  // fminp_v
+};
+
+const FSimdSV fSimdSV[4] = {
+  { 0b0010111000110000110010 }, // fmaxnmv_v
+  { 0b0010111000110000111110 }, // fmaxv_v
+  { 0b0010111010110000110010 }, // fminnmv_v
+  { 0b0010111010110000111110 }  // fminv_v
+};
+
+const FSimdVV fSimdVV[17] = {
+  { 0b0001111000100000110000, kHF_A, 0b0000111010100000111110, kHF_B }, // fabs_v
+  { 0b0001111000100001010000, kHF_A, 0b0010111010100000111110, kHF_B }, // fneg_v
+  { 0b0101111010100001110110, kHF_B, 0b0000111010100001110110, kHF_B }, // frecpe_v
+  { 0b0101111010100001111110, kHF_B, 0b0000000000000000000000, kHF_N }, // frecpx_v
+  { 0b0001111000101000110000, kHF_N, 0b0010111000100001111010, kHF_N }, // frint32x_v
+  { 0b0001111000101000010000, kHF_N, 0b0000111000100001111010, kHF_N }, // frint32z_v
+  { 0b0001111000101001110000, kHF_N, 0b0010111000100001111110, kHF_N }, // frint64x_v
+  { 0b0001111000101001010000, kHF_N, 0b0000111000100001111110, kHF_N }, // frint64z_v
+  { 0b0001111000100110010000, kHF_A, 0b0010111000100001100010, kHF_B }, // frinta_v
+  { 0b0001111000100111110000, kHF_A, 0b0010111010100001100110, kHF_B }, // frinti_v
+  { 0b0001111000100101010000, kHF_A, 0b0000111000100001100110, kHF_B }, // frintm_v
+  { 0b0001111000100100010000, kHF_A, 0b0000111000100001100010, kHF_B }, // frintn_v
+  { 0b0001111000100100110000, kHF_A, 0b0000111010100001100010, kHF_B }, // frintp_v
+  { 0b0001111000100111010000, kHF_A, 0b0010111000100001100110, kHF_B }, // frintx_v
+  { 0b0001111000100101110000, kHF_A, 0b0000111010100001100110, kHF_B }, // frintz_v
+  { 0b0111111010100001110110, kHF_B, 0b0010111010100001110110, kHF_B }, // frsqrte_v
+  { 0b0001111000100001110000, kHF_A, 0b0010111010100001111110, kHF_B }  // fsqrt_v
+};
+
+const FSimdVVV fSimdVVV[13] = {
+  { 0b0111111010100000110101, kHF_C, 0b0010111010100000110101, kHF_C }, // fabd_v
+  { 0b0111111000100000111011, kHF_C, 0b0010111000100000111011, kHF_C }, // facge_v
+  { 0b0111111010100000111011, kHF_C, 0b0010111010100000111011, kHF_C }, // facgt_v
+  { 0b0001111000100000001010, kHF_A, 0b0000111000100000110101, kHF_C }, // fadd_v
+  { 0b0001111000100000000110, kHF_A, 0b0010111000100000111111, kHF_C }, // fdiv_v
+  { 0b0001111000100000010010, kHF_A, 0b0000111000100000111101, kHF_C }, // fmax_v
+  { 0b0001111000100000011010, kHF_A, 0b0000111000100000110001, kHF_C }, // fmaxnm_v
+  { 0b0001111000100000010110, kHF_A, 0b0000111010100000111101, kHF_C }, // fmin_v
+  { 0b0001111000100000011110, kHF_A, 0b0000111010100000110001, kHF_C }, // fminnm_v
+  { 0b0001111000100000100010, kHF_A, 0b0000000000000000000000, kHF_N }, // fnmul_v
+  { 0b0101111000100000111111, kHF_C, 0b0000111000100000111111, kHF_C }, // frecps_v
+  { 0b0101111010100000111111, kHF_C, 0b0000111010100000111111, kHF_C }, // frsqrts_v
+  { 0b0001111000100000001110, kHF_A, 0b0000111010100000110101, kHF_C }  // fsub_v
+};
+
+const FSimdVVVV fSimdVVVV[4] = {
+  { 0b0001111100000000000000, kHF_A, 0b0000000000000000000000, kHF_N }, // fmadd_v
+  { 0b0001111100000000100000, kHF_A, 0b0000000000000000000000, kHF_N }, // fmsub_v
+  { 0b0001111100100000000000, kHF_A, 0b0000000000000000000000, kHF_N }, // fnmadd_v
+  { 0b0001111100100000100000, kHF_A, 0b0000000000000000000000, kHF_N }  // fnmsub_v
+};
+
+const FSimdVVVe fSimdVVVe[4] = {
+  { 0b0000000000000000000000, kHF_N, 0b0000111000100000110011, 0b0000111110000000000100 }, // fmla_v
+  { 0b0000000000000000000000, kHF_N, 0b0000111010100000110011, 0b0000111110000000010100 }, // fmls_v
+  { 0b0001111000100000000010, kHF_A, 0b0010111000100000110111, 0b0000111110000000100100 }, // fmul_v
+  { 0b0101111000100000110111, kHF_C, 0b0000111000100000110111, 0b0010111110000000100100 }  // fmulx_v
+};
+
+const ISimdPair iSimdPair[1] = {
+  { 0b0101111000110001101110, 0b0000111000100000101111, kVO_V_Any }  // addp_v
+};
+
+const ISimdSV iSimdSV[7] = {
+  { 0b0000111000110001101110, kVO_V_BH_4S }, // addv_v
+  { 0b0000111000110000001110, kVO_V_BH_4S }, // saddlv_v
+  { 0b0000111000110000101010, kVO_V_BH_4S }, // smaxv_v
+  { 0b0000111000110001101010, kVO_V_BH_4S }, // sminv_v
+  { 0b0010111000110000001110, kVO_V_BH_4S }, // uaddlv_v
+  { 0b0010111000110000101010, kVO_V_BH_4S }, // umaxv_v
+  { 0b0010111000110001101010, kVO_V_BH_4S }  // uminv_v
+};
+
+const ISimdVV iSimdVV[29] = {
+  { 0b0000111000100000101110, kVO_V_Any }, // abs_v
+  { 0b0000111000100000010010, kVO_V_BHS }, // cls_v
+  { 0b0010111000100000010010, kVO_V_BHS }, // clz_v
+  { 0b0000111000100000010110, kVO_V_B }, // cnt_v
+  { 0b0010111000100000010110, kVO_V_B }, // mvn_v
+  { 0b0010111000100000101110, kVO_V_Any }, // neg_v
+  { 0b0010111000100000010110, kVO_V_B }, // not_v
+  { 0b0010111001100000010110, kVO_V_B }, // rbit_v
+  { 0b0000111000100000000110, kVO_V_B }, // rev16_v
+  { 0b0010111000100000000010, kVO_V_BH }, // rev32_v
+  { 0b0000111000100000000010, kVO_V_BHS }, // rev64_v
+  { 0b0000111000100000011010, kVO_V_BHS }, // sadalp_v
+  { 0b0000111000100000001010, kVO_V_BHS }, // saddlp_v
+  { 0b0000111000100000011110, kVO_SV_Any }, // sqabs_v
+  { 0b0010111000100000011110, kVO_SV_Any }, // sqneg_v
+  { 0b0000111000100001010010, kVO_SV_B8H4S2 }, // sqxtn_v
+  { 0b0100111000100001010010, kVO_V_B16H8S4 }, // sqxtn2_v
+  { 0b0010111000100001001010, kVO_SV_B8H4S2 }, // sqxtun_v
+  { 0b0110111000100001001010, kVO_V_B16H8S4 }, // sqxtun2_v
+  { 0b0000111000100000001110, kVO_SV_Any }, // suqadd_v
+  { 0b0010111000100000011010, kVO_V_BHS }, // uadalp_v
+  { 0b0010111000100000001010, kVO_V_BHS }, // uaddlp_v
+  { 0b0010111000100001010010, kVO_SV_B8H4S2 }, // uqxtn_v
+  { 0b0110111000100001010010, kVO_V_B16H8S4 }, // uqxtn2_v
+  { 0b0000111010100001110010, kVO_V_S }, // urecpe_v
+  { 0b0010111010100001110010, kVO_V_S }, // ursqrte_v
+  { 0b0010111000100000001110, kVO_SV_Any }, // usqadd_v
+  { 0b0000111000100001001010, kVO_V_B8H4S2 }, // xtn_v
+  { 0b0100111000100001001010, kVO_V_B16H8S4 }  // xtn2_v
+};
+
+const ISimdVVV iSimdVVV[65] = {
+  { 0b0000111000100000100001, kVO_V_Any }, // add_v
+  { 0b0000111000100000010000, kVO_V_B8H4S2 }, // addhn_v
+  { 0b0100111000100000010000, kVO_V_B16H8S4 }, // addhn2_v
+  { 0b0000111000100000000111, kVO_V_B }, // and_v
+  { 0b0010111011100000000111, kVO_V_B }, // bif_v
+  { 0b0010111010100000000111, kVO_V_B }, // bit_v
+  { 0b0010111001100000000111, kVO_V_B }, // bsl_v
+  { 0b0000111000100000100011, kVO_V_Any }, // cmtst_v
+  { 0b0010111000100000000111, kVO_V_B }, // eor_v
+  { 0b0000111011100000000111, kVO_V_B }, // orn_v
+  { 0b0010111000100000100111, kVO_V_B }, // pmul_v
+  { 0b0000111000100000111000, kVO_V_B8D1 }, // pmull_v
+  { 0b0100111000100000111000, kVO_V_B16D2 }, // pmull2_v
+  { 0b0010111000100000010000, kVO_V_B8H4S2 }, // raddhn_v
+  { 0b0110111000100000010000, kVO_V_B16H8S4 }, // raddhn2_v
+  { 0b1100111001100000100011, kVO_V_D2 }, // rax1_v
+  { 0b0010111000100000011000, kVO_V_B8H4S2 }, // rsubhn_v
+  { 0b0110111000100000011000, kVO_V_B16H8S4 }, // rsubhn2_v
+  { 0b0000111000100000011111, kVO_V_BHS }, // saba_v
+  { 0b0000111000100000010100, kVO_V_B8H4S2 }, // sabal_v
+  { 0b0100111000100000010100, kVO_V_B16H8S4 }, // sabal2_v
+  { 0b0000111000100000011101, kVO_V_BHS }, // sabd_v
+  { 0b0000111000100000011100, kVO_V_B8H4S2 }, // sabdl_v
+  { 0b0100111000100000011100, kVO_V_B16H8S4 }, // sabdl2_v
+  { 0b0000111000100000000000, kVO_V_B8H4S2 }, // saddl_v
+  { 0b0100111000100000000000, kVO_V_B16H8S4 }, // saddl2_v
+  { 0b0000111000100000000001, kVO_V_BHS }, // shadd_v
+  { 0b0000111000100000001001, kVO_V_BHS }, // shsub_v
+  { 0b0000111000100000011001, kVO_V_BHS }, // smax_v
+  { 0b0000111000100000101001, kVO_V_BHS }, // smaxp_v
+  { 0b0000111000100000011011, kVO_V_BHS }, // smin_v
+  { 0b0000111000100000101011, kVO_V_BHS }, // sminp_v
+  { 0b0000111000100000000011, kVO_SV_Any }, // sqadd_v
+  { 0b0000111000100000001011, kVO_SV_Any }, // sqsub_v
+  { 0b0000111000100000000101, kVO_V_BHS }, // srhadd_v
+  { 0b0000111000100000001000, kVO_V_B8H4S2 }, // ssubl_v
+  { 0b0100111000100000001000, kVO_V_B16H8S4 }, // ssubl2_v
+  { 0b0010111000100000100001, kVO_V_Any }, // sub_v
+  { 0b0000111000100000011000, kVO_V_B8H4S2 }, // subhn_v
+  { 0b0000111000100000011000, kVO_V_B16H8S4 }, // subhn2_v
+  { 0b0000111000000000001010, kVO_V_BHS_D2 }, // trn1_v
+  { 0b0000111000000000011010, kVO_V_BHS_D2 }, // trn2_v
+  { 0b0010111000100000011111, kVO_V_BHS }, // uaba_v
+  { 0b0010111000100000010100, kVO_V_B8H4S2 }, // uabal_v
+  { 0b0110111000100000010100, kVO_V_B16H8S4 }, // uabal2_v
+  { 0b0010111000100000011101, kVO_V_BHS }, // uabd_v
+  { 0b0010111000100000011100, kVO_V_B8H4S2 }, // uabdl_v
+  { 0b0110111000100000011100, kVO_V_B16H8S4 }, // uabdl2_v
+  { 0b0010111000100000000000, kVO_V_B8H4S2 }, // uaddl_v
+  { 0b0110111000100000000000, kVO_V_B16H8S4 }, // uaddl2_v
+  { 0b0010111000100000000001, kVO_V_BHS }, // uhadd_v
+  { 0b0010111000100000001001, kVO_V_BHS }, // uhsub_v
+  { 0b0010111000100000011001, kVO_V_BHS }, // umax_v
+  { 0b0010111000100000101001, kVO_V_BHS }, // umaxp_v
+  { 0b0010111000100000011011, kVO_V_BHS }, // umin_v
+  { 0b0010111000100000101011, kVO_V_BHS }, // uminp_v
+  { 0b0010111000100000000011, kVO_SV_Any }, // uqadd_v
+  { 0b0010111000100000001011, kVO_SV_Any }, // uqsub_v
+  { 0b0010111000100000000101, kVO_V_BHS }, // urhadd_v
+  { 0b0010111000100000001000, kVO_V_B8H4S2 }, // usubl_v
+  { 0b0110111000100000001000, kVO_V_B16H8S4 }, // usubl2_v
+  { 0b0000111000000000000110, kVO_V_BHS_D2 }, // uzp1_v
+  { 0b0000111000000000010110, kVO_V_BHS_D2 }, // uzp2_v
+  { 0b0000111000000000001110, kVO_V_BHS_D2 }, // zip1_v
+  { 0b0000111000000000011110, kVO_V_BHS_D2 }  // zip2_v
+};
+
+const ISimdVVVI iSimdVVVI[2] = {
+  { 0b0010111000000000000000, kVO_V_B, 4, 11, 1 }, // ext_v
+  { 0b1100111001100000100011, kVO_V_D2, 6, 10, 0 }  // xar_v
+};
+
+const ISimdVVVV iSimdVVVV[2] = {
+  { 0b1100111000100000000000, kVO_V_B16 }, // bcax_v
+  { 0b1100111000000000000000, kVO_V_B16 }  // eor3_v
+};
+
+const ISimdVVVVx iSimdVVVVx[1] = {
+  { 0b1100111001000000000000, kOp_V4S, kOp_V4S, kOp_V4S, kOp_V4S }  // sm3ss1_v
+};
+
+const ISimdVVVe iSimdVVVe[25] = {
+  { 0b0000111000100000100101, kVO_V_BHS, 0b0010111100000000000000, kVO_V_HS }, // mla_v
+  { 0b0010111000100000100101, kVO_V_BHS, 0b0010111100000000010000, kVO_V_HS }, // mls_v
+  { 0b0000111000100000100111, kVO_V_BHS, 0b0000111100000000100000, kVO_V_HS }, // mul_v
+  { 0b0000111000100000100000, kVO_V_B8H4S2, 0b0000111100000000001000, kVO_V_H4S2 }, // smlal_v
+  { 0b0100111000100000100000, kVO_V_B16H8S4, 0b0100111100000000001000, kVO_V_H8S4 }, // smlal2_v
+  { 0b0000111000100000101000, kVO_V_B8H4S2, 0b0000111100000000011000, kVO_V_H4S2 }, // smlsl_v
+  { 0b0100111000100000101000, kVO_V_B16H8S4, 0b0100111100000000011000, kVO_V_H8S4 }, // smlsl2_v
+  { 0b0000111000100000110000, kVO_V_B8H4S2, 0b0000111100000000101000, kVO_V_H4S2 }, // smull_v
+  { 0b0100111000100000110000, kVO_V_B16H8S4, 0b0100111100000000101000, kVO_V_H8S4 }, // smull2_v
+  { 0b0000111000100000100100, kVO_SV_BHS, 0b0000111100000000001100, kVO_V_H4S2 }, // sqdmlal_v
+  { 0b0100111000100000100100, kVO_V_B16H8S4, 0b0100111100000000001100, kVO_V_H8S4 }, // sqdmlal2_v
+  { 0b0000111000100000101100, kVO_SV_BHS, 0b0000111100000000011100, kVO_V_H4S2 }, // sqdmlsl_v
+  { 0b0100111000100000101100, kVO_V_B16H8S4, 0b0100111100000000011100, kVO_V_H8S4 }, // sqdmlsl2_v
+  { 0b0000111000100000101101, kVO_SV_HS, 0b0000111100000000110000, kVO_SV_HS }, // sqdmulh_v
+  { 0b0000111000100000110100, kVO_SV_BHS, 0b0000111100000000101100, kVO_V_H4S2 }, // sqdmull_v
+  { 0b0100111000100000110100, kVO_V_B16H8S4, 0b0100111100000000101100, kVO_V_H8S4 }, // sqdmull2_v
+  { 0b0010111000000000100001, kVO_SV_HS, 0b0010111100000000110100, kVO_SV_HS }, // sqrdmlah_v
+  { 0b0010111000000000100011, kVO_SV_HS, 0b0010111100000000111100, kVO_SV_HS }, // sqrdmlsh_v
+  { 0b0010111000100000101101, kVO_SV_HS, 0b0000111100000000110100, kVO_SV_HS }, // sqrdmulh_v
+  { 0b0010111000100000100000, kVO_V_B8H4S2, 0b0010111100000000001000, kVO_V_H4S2 }, // umlal_v
+  { 0b0110111000100000100000, kVO_V_B16H8S4, 0b0010111100000000001000, kVO_V_H8S4 }, // umlal2_v
+  { 0b0010111000100000101000, kVO_V_B8H4S2, 0b0010111100000000011000, kVO_V_H4S2 }, // umlsl_v
+  { 0b0110111000100000101000, kVO_V_B16H8S4, 0b0110111100000000011000, kVO_V_H8S4 }, // umlsl2_v
+  { 0b0010111000100000110000, kVO_V_B8H4S2, 0b0010111100000000101000, kVO_V_H4S2 }, // umull_v
+  { 0b0110111000100000110000, kVO_V_B16H8S4, 0b0110111100000000101000, kVO_V_H8S4 }  // umull2_v
+};
+
+const ISimdVVVx iSimdVVVx[17] = {
+  { 0b0110111001000000111011, kOp_V4S, kOp_V8H, kOp_V8H }, // bfmmla_v
+  { 0b0101111000000000000000, kOp_Q, kOp_S, kOp_V4S }, // sha1c_v
+  { 0b0101111000000000001000, kOp_Q, kOp_S, kOp_V4S }, // sha1m_v
+  { 0b0101111000000000000100, kOp_Q, kOp_S, kOp_V4S }, // sha1p_v
+  { 0b0101111000000000001100, kOp_V4S, kOp_V4S, kOp_V4S }, // sha1su0_v
+  { 0b0101111000000000010000, kOp_Q, kOp_Q, kOp_V4S }, // sha256h_v
+  { 0b0101111000000000010100, kOp_Q, kOp_Q, kOp_V4S }, // sha256h2_v
+  { 0b0101111000000000011000, kOp_V4S, kOp_V4S, kOp_V4S }, // sha256su1_v
+  { 0b1100111001100000100000, kOp_Q, kOp_Q, kOp_V2D }, // sha512h_v
+  { 0b1100111001100000100001, kOp_Q, kOp_Q, kOp_V2D }, // sha512h2_v
+  { 0b1100111001100000100010, kOp_V2D, kOp_V2D, kOp_V2D }, // sha512su1_v
+  { 0b1100111001100000110000, kOp_V4S, kOp_V4S, kOp_V4S }, // sm3partw1_v
+  { 0b1100111001100000110001, kOp_V4S, kOp_V4S, kOp_V4S }, // sm3partw2_v
+  { 0b1100111001100000110010, kOp_V4S, kOp_V4S, kOp_V4S }, // sm4ekey_v
+  { 0b0100111010000000101001, kOp_V4S, kOp_V16B, kOp_V16B }, // smmla_v
+  { 0b0110111010000000101001, kOp_V4S, kOp_V16B, kOp_V16B }, // ummla_v
+  { 0b0100111010000000101011, kOp_V4S, kOp_V16B, kOp_V16B }  // usmmla_v
+};
+
+const ISimdVVx iSimdVVx[13] = {
+  { 0b0100111000101000010110, kOp_V16B, kOp_V16B }, // aesd_v
+  { 0b0100111000101000010010, kOp_V16B, kOp_V16B }, // aese_v
+  { 0b0100111000101000011110, kOp_V16B, kOp_V16B }, // aesimc_v
+  { 0b0100111000101000011010, kOp_V16B, kOp_V16B }, // aesmc_v
+  { 0b0001111001100011010000, kOp_H, kOp_S }, // bfcvt_v
+  { 0b0000111010100001011010, kOp_V4H, kOp_V4S }, // bfcvtn_v
+  { 0b0100111010100001011010, kOp_V8H, kOp_V4S }, // bfcvtn2_v
+  { 0b0001111001111110000000, kOp_GpW, kOp_D }, // fjcvtzs_v
+  { 0b0101111000101000000010, kOp_S, kOp_S }, // sha1h_v
+  { 0b0101111000101000000110, kOp_V4S, kOp_V4S }, // sha1su1_v
+  { 0b0101111000101000001010, kOp_V4S, kOp_V4S }, // sha256su0_v
+  { 0b1100111011000000100000, kOp_V2D, kOp_V2D }, // sha512su0_v
+  { 0b1100111011000000100001, kOp_V4S, kOp_V4S }  // sm4e_v
+};
+
+const ISimdWWV iSimdWWV[8] = {
+  { 0b0000111000100000000100, kVO_V_B8H4S2 }, // saddw_v
+  { 0b0000111000100000000100, kVO_V_B16H8S4 }, // saddw2_v
+  { 0b0000111000100000001100, kVO_V_B8H4S2 }, // ssubw_v
+  { 0b0000111000100000001100, kVO_V_B16H8S4 }, // ssubw2_v
+  { 0b0010111000100000000100, kVO_V_B8H4S2 }, // uaddw_v
+  { 0b0010111000100000000100, kVO_V_B16H8S4 }, // uaddw2_v
+  { 0b0010111000100000001100, kVO_V_B8H4S2 }, // usubw_v
+  { 0b0010111000100000001100, kVO_V_B16H8S4 }  // usubw2_v
+};
+
+const SimdBicOrr simdBicOrr[2] = {
+  { 0b0000111001100000000111, 0b0010111100000000000001 }, // bic_v
+  { 0b0000111010100000000111, 0b0000111100000000000001 }  // orr_v
+};
+
+const SimdCmp simdCmp[7] = {
+  { 0b0010111000100000100011, 0b0000111000100000100110, kVO_V_Any }, // cmeq_v
+  { 0b0000111000100000001111, 0b0010111000100000100010, kVO_V_Any }, // cmge_v
+  { 0b0000111000100000001101, 0b0000111000100000100010, kVO_V_Any }, // cmgt_v
+  { 0b0010111000100000001101, 0b0000000000000000000000, kVO_V_Any }, // cmhi_v
+  { 0b0010111000100000001111, 0b0000000000000000000000, kVO_V_Any }, // cmhs_v
+  { 0b0000000000000000000000, 0b0010111000100000100110, kVO_V_Any }, // cmle_v
+  { 0b0000000000000000000000, 0b0000111000100000101010, kVO_V_Any }  // cmlt_v
+};
+
+const SimdDot simdDot[5] = {
+  { 0b0010111001000000111111, 0b0000111101000000111100, kET_S, kET_H, kET_2H }, // bfdot_v
+  { 0b0000111010000000100101, 0b0000111110000000111000, kET_S, kET_B, kET_4B }, // sdot_v
+  { 0b0000000000000000000000, 0b0000111100000000111100, kET_S, kET_B, kET_4B }, // sudot_v
+  { 0b0010111010000000100101, 0b0010111110000000111000, kET_S, kET_B, kET_4B }, // udot_v
+  { 0b0000111010000000100111, 0b0000111110000000111100, kET_S, kET_B, kET_4B }  // usdot_v
+};
+
+const SimdFcadd simdFcadd[1] = {
+  { 0b0010111000000000111001 }  // fcadd_v
+};
+
+const SimdFccmpFccmpe simdFccmpFccmpe[2] = {
+  { 0b00011110001000000000010000000000 }, // fccmp_v
+  { 0b00011110001000000000010000010000 }  // fccmpe_v
+};
+
+const SimdFcm simdFcm[5] = {
+  { 0b0000111000100000111001, kHF_C, 0b0000111010100000110110 }, // fcmeq_v
+  { 0b0010111000100000111001, kHF_C, 0b0010111010100000110010 }, // fcmge_v
+  { 0b0010111010100000111001, kHF_C, 0b0000111010100000110010 }, // fcmgt_v
+  { 0b0000000000000000000000, kHF_C, 0b0010111010100000110110 }, // fcmle_v
+  { 0b0000000000000000000000, kHF_C, 0b0000111010100000111010 }  // fcmlt_v
+};
+
+const SimdFcmla simdFcmla[1] = {
+  { 0b0010111000000000110001, 0b0010111100000000000100 }  // fcmla_v
+};
+
+const SimdFcmpFcmpe simdFcmpFcmpe[2] = {
+  { 0b00011110001000000010000000000000 }, // fcmp_v
+  { 0b00011110001000000010000000010000 }  // fcmpe_v
+};
+
+const SimdFcvtLN simdFcvtLN[6] = {
+  { 0b0000111000100001011110, 0, 0 }, // fcvtl_v
+  { 0b0100111000100001011110, 0, 0 }, // fcvtl2_v
+  { 0b0000111000100001011010, 0, 0 }, // fcvtn_v
+  { 0b0100111000100001011010, 0, 0 }, // fcvtn2_v
+  { 0b0010111000100001011010, 1, 1 }, // fcvtxn_v
+  { 0b0110111000100001011010, 1, 0 }  // fcvtxn2_v
+};
+
+const SimdFcvtSV simdFcvtSV[12] = {
+  { 0b0000111000100001110010, 0b0000000000000000000000, 0b0001111000100100000000, 1 }, // fcvtas_v
+  { 0b0010111000100001110010, 0b0000000000000000000000, 0b0001111000100101000000, 1 }, // fcvtau_v
+  { 0b0000111000100001101110, 0b0000000000000000000000, 0b0001111000110000000000, 1 }, // fcvtms_v
+  { 0b0010111000100001101110, 0b0000000000000000000000, 0b0001111000110001000000, 1 }, // fcvtmu_v
+  { 0b0000111000100001101010, 0b0000000000000000000000, 0b0001111000100000000000, 1 }, // fcvtns_v
+  { 0b0010111000100001101010, 0b0000000000000000000000, 0b0001111000100001000000, 1 }, // fcvtnu_v
+  { 0b0000111010100001101010, 0b0000000000000000000000, 0b0001111000101000000000, 1 }, // fcvtps_v
+  { 0b0010111010100001101010, 0b0000000000000000000000, 0b0001111000101001000000, 1 }, // fcvtpu_v
+  { 0b0000111010100001101110, 0b0000111100000000111111, 0b0001111000111000000000, 1 }, // fcvtzs_v
+  { 0b0010111010100001101110, 0b0010111100000000111111, 0b0001111000111001000000, 1 }, // fcvtzu_v
+  { 0b0000111000100001110110, 0b0000111100000000111001, 0b0001111000100010000000, 0 }, // scvtf_v
+  { 0b0010111000100001110110, 0b0010111100000000111001, 0b0001111000100011000000, 0 }  // ucvtf_v
+};
+
+const SimdFmlal simdFmlal[6] = {
+  { 0b0010111011000000111111, 0b0000111111000000111100, 0, kET_S, kET_H, kET_H }, // bfmlalb_v
+  { 0b0110111011000000111111, 0b0100111111000000111100, 0, kET_S, kET_H, kET_H }, // bfmlalt_v
+  { 0b0000111000100000111011, 0b0000111110000000000000, 1, kET_S, kET_H, kET_H }, // fmlal_v
+  { 0b0010111000100000110011, 0b0010111110000000100000, 1, kET_S, kET_H, kET_H }, // fmlal2_v
+  { 0b0000111010100000111011, 0b0000111110000000010000, 1, kET_S, kET_H, kET_H }, // fmlsl_v
+  { 0b0010111010100000110011, 0b0010111110000000110000, 1, kET_S, kET_H, kET_H }  // fmlsl2_v
+};
+
+const SimdLdNStN simdLdNStN[12] = {
+  { 0b0000110101000000000000, 0b0000110001000000001000, 1, 0 }, // ld1_v
+  { 0b0000110101000000110000, 0b0000000000000000000000, 1, 1 }, // ld1r_v
+  { 0b0000110101100000000000, 0b0000110001000000100000, 2, 0 }, // ld2_v
+  { 0b0000110101100000110000, 0b0000000000000000000000, 2, 1 }, // ld2r_v
+  { 0b0000110101000000001000, 0b0000110001000000010000, 3, 0 }, // ld3_v
+  { 0b0000110101000000111000, 0b0000000000000000000000, 3, 1 }, // ld3r_v
+  { 0b0000110101100000001000, 0b0000110001000000000000, 4, 0 }, // ld4_v
+  { 0b0000110101100000111000, 0b0000000000000000000000, 4, 1 }, // ld4r_v
+  { 0b0000110100000000000000, 0b0000110000000000001000, 1, 0 }, // st1_v
+  { 0b0000110100100000000000, 0b0000110000000000100000, 2, 0 }, // st2_v
+  { 0b0000110100000000001000, 0b0000110000000000010000, 3, 0 }, // st3_v
+  { 0b0000110100100000001000, 0b0000110000000000000000, 4, 0 }  // st4_v
+};
+
+const SimdLdSt simdLdSt[2] = {
+  { 0b0011110101, 0b00111100010, 0b00111100011, 0b00011100, Inst::kIdLdur_v }, // ldr_v
+  { 0b0011110100, 0b00111100000, 0b00111100001, 0b00000000, Inst::kIdStur_v }  // str_v
+};
+
+const SimdLdpStp simdLdpStp[4] = {
+  { 0b0010110001, 0b0000000000 }, // ldnp_v
+  { 0b0010110101, 0b0010110011 }, // ldp_v
+  { 0b0010110000, 0b0000000000 }, // stnp_v
+  { 0b0010110100, 0b0010110010 }  // stp_v
+};
+
+const SimdLdurStur simdLdurStur[2] = {
+  { 0b0011110001000000000000 }, // ldur_v
+  { 0b0011110000000000000000 }  // stur_v
+};
+
+const SimdMoviMvni simdMoviMvni[2] = {
+  { 0b0000111100000000000001, 0 }, // movi_v
+  { 0b0000111100000000000001, 1 }  // mvni_v
+};
+
+const SimdShift simdShift[40] = {
+  { 0b0000000000000000000000, 0b0000111100000000100011, 1, kVO_V_B8H4S2 }, // rshrn_v
+  { 0b0000000000000000000000, 0b0100111100000000100011, 1, kVO_V_B16H8S4 }, // rshrn2_v
+  { 0b0000000000000000000000, 0b0000111100000000010101, 0, kVO_V_Any }, // shl_v
+  { 0b0000000000000000000000, 0b0000111100000000100001, 1, kVO_V_B8H4S2 }, // shrn_v
+  { 0b0000000000000000000000, 0b0100111100000000100001, 1, kVO_V_B16H8S4 }, // shrn2_v
+  { 0b0000000000000000000000, 0b0010111100000000010101, 0, kVO_V_Any }, // sli_v
+  { 0b0000111000100000010111, 0b0000000000000000000000, 1, kVO_SV_Any }, // sqrshl_v
+  { 0b0000000000000000000000, 0b0000111100000000100111, 1, kVO_SV_B8H4S2 }, // sqrshrn_v
+  { 0b0000000000000000000000, 0b0100111100000000100111, 1, kVO_V_B16H8S4 }, // sqrshrn2_v
+  { 0b0000000000000000000000, 0b0010111100000000100011, 1, kVO_SV_B8H4S2 }, // sqrshrun_v
+  { 0b0000000000000000000000, 0b0110111100000000100011, 1, kVO_V_B16H8S4 }, // sqrshrun2_v
+  { 0b0000111000100000010011, 0b0000111100000000011101, 0, kVO_SV_Any }, // sqshl_v
+  { 0b0000000000000000000000, 0b0010111100000000011001, 0, kVO_SV_Any }, // sqshlu_v
+  { 0b0000000000000000000000, 0b0000111100000000100101, 1, kVO_SV_B8H4S2 }, // sqshrn_v
+  { 0b0000000000000000000000, 0b0100111100000000100101, 1, kVO_V_B16H8S4 }, // sqshrn2_v
+  { 0b0000000000000000000000, 0b0010111100000000100001, 1, kVO_SV_B8H4S2 }, // sqshrun_v
+  { 0b0000000000000000000000, 0b0110111100000000100001, 1, kVO_V_B16H8S4 }, // sqshrun2_v
+  { 0b0000000000000000000000, 0b0010111100000000010001, 1, kVO_V_Any }, // sri_v
+  { 0b0000111000100000010101, 0b0000000000000000000000, 0, kVO_V_Any }, // srshl_v
+  { 0b0000000000000000000000, 0b0000111100000000001001, 1, kVO_V_Any }, // srshr_v
+  { 0b0000000000000000000000, 0b0000111100000000001101, 1, kVO_V_Any }, // srsra_v
+  { 0b0000111000100000010001, 0b0000000000000000000000, 0, kVO_V_Any }, // sshl_v
+  { 0b0000000000000000000000, 0b0000111100000000101001, 0, kVO_V_B8H4S2 }, // sshll_v
+  { 0b0000000000000000000000, 0b0100111100000000101001, 0, kVO_V_B16H8S4 }, // sshll2_v
+  { 0b0000000000000000000000, 0b0000111100000000000001, 1, kVO_V_Any }, // sshr_v
+  { 0b0000000000000000000000, 0b0000111100000000000101, 1, kVO_V_Any }, // ssra_v
+  { 0b0010111000100000010111, 0b0000000000000000000000, 0, kVO_SV_Any }, // uqrshl_v
+  { 0b0000000000000000000000, 0b0010111100000000100111, 1, kVO_SV_B8H4S2 }, // uqrshrn_v
+  { 0b0000000000000000000000, 0b0110111100000000100111, 1, kVO_V_B16H8S4 }, // uqrshrn2_v
+  { 0b0010111000100000010011, 0b0010111100000000011101, 0, kVO_SV_Any }, // uqshl_v
+  { 0b0000000000000000000000, 0b0010111100000000100101, 1, kVO_SV_B8H4S2 }, // uqshrn_v
+  { 0b0000000000000000000000, 0b0110111100000000100101, 1, kVO_V_B16H8S4 }, // uqshrn2_v
+  { 0b0010111000100000010101, 0b0000000000000000000000, 0, kVO_V_Any }, // urshl_v
+  { 0b0000000000000000000000, 0b0010111100000000001001, 1, kVO_V_Any }, // urshr_v
+  { 0b0000000000000000000000, 0b0010111100000000001101, 1, kVO_V_Any }, // ursra_v
+  { 0b0010111000100000010001, 0b0000000000000000000000, 0, kVO_V_Any }, // ushl_v
+  { 0b0000000000000000000000, 0b0010111100000000101001, 0, kVO_V_B8H4S2 }, // ushll_v
+  { 0b0000000000000000000000, 0b0110111100000000101001, 0, kVO_V_B16H8S4 }, // ushll2_v
+  { 0b0000000000000000000000, 0b0010111100000000000001, 1, kVO_V_Any }, // ushr_v
+  { 0b0000000000000000000000, 0b0010111100000000000101, 1, kVO_V_Any }  // usra_v
+};
+
+const SimdShiftES simdShiftES[2] = {
+  { 0b0010111000100001001110, kVO_V_B8H4S2 }, // shll_v
+  { 0b0110111000100001001110, kVO_V_B16H8S4 }  // shll2_v
+};
+
+const SimdSm3tt simdSm3tt[4] = {
+  { 0b1100111001000000100000 }, // sm3tt1a_v
+  { 0b1100111001000000100001 }, // sm3tt1b_v
+  { 0b1100111001000000100010 }, // sm3tt2a_v
+  { 0b1100111001000000100011 }  // sm3tt2b_v
+};
+
+const SimdSmovUmov simdSmovUmov[2] = {
+  { 0b0000111000000000001011, kVO_V_BHS, 1 }, // smov_v
+  { 0b0000111000000000001111, kVO_V_Any, 0 }  // umov_v
+};
+
+const SimdSxtlUxtl simdSxtlUxtl[4] = {
+  { 0b0000111100000000101001, kVO_V_B8H4S2 }, // sxtl_v
+  { 0b0100111100000000101001, kVO_V_B16H8S4 }, // sxtl2_v
+  { 0b0010111100000000101001, kVO_V_B8H4S2 }, // uxtl_v
+  { 0b0110111100000000101001, kVO_V_B16H8S4 }  // uxtl2_v
+};
+
+const SimdTblTbx simdTblTbx[2] = {
+  { 0b0000111000000000000000 }, // tbl_v
+  { 0b0000111000000000000100 }  // tbx_v
+};
+// ----------------------------------------------------------------------------
+// ${EncodingData:End}
+
+} // {EncodingData}
+} // {InstDB}
+
+/*
+// ${CommonData:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const InstDB::CommonInfo InstDB::commonData[] = {
+  { 0}  // #0 [ref=440x]
+};
+// ----------------------------------------------------------------------------
+// ${CommonData:End}
+*/
+
+// ArmUtil - Id <-> Name
+// =====================
+
+#ifndef ASMJIT_DISABLE_TEXT
+// ${NameData:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const char InstDB::_nameData[] =
+  "\0" "adc\0" "adcs\0" "addg\0" "adds\0" "addv\0" "adr\0" "adrp\0" "aesd\0" "aese\0" "aesimc\0" "aesmc\0" "and\0"
+  "ands\0" "asr\0" "asrv\0" "at\0" "autda\0" "autdb\0" "autdza\0" "autdzb\0" "autia\0" "autia1716\0" "autiasp\0"
+  "autiaz\0" "autib\0" "autib1716\0" "autibsp\0" "autibz\0" "autiza\0" "autizb\0" "axflag\0" "bcax\0" "bfc\0" "bfcvt\0"
+  "bfcvtn\0" "bfcvtn2\0" "bfdot\0" "bfi\0" "bfmlalb\0" "bfmlalt\0" "bfmmla\0" "bfxil\0" "bic\0" "bics\0" "bif\0"
+  "blr\0" "br\0" "brk\0" "bsl\0" "cas\0" "casa\0" "casab\0" "casah\0" "casal\0" "casalb\0" "casalh\0" "casb\0" "cash\0"
+  "casl\0" "caslb\0" "caslh\0" "casp\0" "caspa\0" "caspal\0" "caspl\0" "cbnz\0" "cbz\0" "ccmn\0" "cfinv\0" "cinc\0"
+  "cinv\0" "clrex\0" "cls\0" "clz\0" "cmhi\0" "cmhs\0" "cmpp\0" "cmtst\0" "cneg\0" "cnt\0" "crc32b\0" "crc32cb\0"
+  "crc32ch\0" "crc32cw\0" "crc32cx\0" "crc32h\0" "crc32w\0" "crc32x\0" "csdb\0" "cset\0" "csetm\0" "csinc\0" "csinv\0"
+  "csneg\0" "dcps1\0" "dcps2\0" "dcps3\0" "dgh\0" "dmb\0" "drps\0" "dsb\0" "dup\0" "eon\0" "eor3\0" "eret\0" "esb\0"
+  "ext\0" "extr\0" "fabd\0" "fabs\0" "facge\0" "facgt\0" "fadd\0" "faddp\0" "fcadd\0" "fccmp\0" "fccmpe\0" "fcmeq\0"
+  "fcmge\0" "fcmgt\0" "fcmla\0" "fcmle\0" "fcmlt\0" "fcmp\0" "fcmpe\0" "fcsel\0" "fcvtas\0" "fcvtau\0" "fcvtl\0"
+  "fcvtl2\0" "fcvtms\0" "fcvtmu\0" "fcvtns\0" "fcvtnu\0" "fcvtps\0" "fcvtpu\0" "fcvtxn\0" "fcvtxn2\0" "fcvtzs\0"
+  "fcvtzu\0" "fdiv\0" "fjcvtzs\0" "fmadd\0" "fmax\0" "fmaxnm\0" "fmaxnmp\0" "fmaxnmv\0" "fmaxp\0" "fmaxv\0" "fmin\0"
+  "fminnm\0" "fminnmp\0" "fminnmv\0" "fminp\0" "fminv\0" "fmla\0" "fmlal\0" "fmlal2\0" "fmls\0" "fmlsl\0" "fmlsl2\0"
+  "fmov\0" "fmsub\0" "fmul\0" "fmulx\0" "fneg\0" "fnmadd\0" "fnmsub\0" "fnmul\0" "frecpe\0" "frecps\0" "frecpx\0"
+  "frint32x\0" "frint32z\0" "frint64x\0" "frint64z\0" "frinta\0" "frinti\0" "frintm\0" "frintn\0" "frintp\0" "frintx\0"
+  "frintz\0" "frsqrte\0" "frsqrts\0" "fsqrt\0" "fsub\0" "gmi\0" "hint\0" "hlt\0" "hvc\0" "ins\0" "isb\0" "ld1\0"
+  "ld1r\0" "ld2\0" "ld2r\0" "ld3\0" "ld3r\0" "ld4\0" "ld4r\0" "ldadd\0" "ldadda\0" "ldaddab\0" "ldaddah\0" "ldaddal\0"
+  "ldaddalb\0" "ldaddalh\0" "ldaddb\0" "ldaddh\0" "ldaddl\0" "ldaddlb\0" "ldaddlh\0" "ldar\0" "ldarb\0" "ldarh\0"
+  "ldaxp\0" "ldaxr\0" "ldaxrb\0" "ldaxrh\0" "ldclr\0" "ldclra\0" "ldclrab\0" "ldclrah\0" "ldclral\0" "ldclralb\0"
+  "ldclralh\0" "ldclrb\0" "ldclrh\0" "ldclrl\0" "ldclrlb\0" "ldclrlh\0" "ldeor\0" "ldeora\0" "ldeorab\0" "ldeorah\0"
+  "ldeoral\0" "ldeoralb\0" "ldeoralh\0" "ldeorb\0" "ldeorh\0" "ldeorl\0" "ldeorlb\0" "ldeorlh\0" "ldg\0" "ldgm\0"
+  "ldlar\0" "ldlarb\0" "ldlarh\0" "ldnp\0" "ldp\0" "ldpsw\0" "ldr\0" "ldraa\0" "ldrab\0" "ldrb\0" "ldrh\0" "ldrsb\0"
+  "ldrsh\0" "ldrsw\0" "ldset\0" "ldseta\0" "ldsetab\0" "ldsetah\0" "ldsetal\0" "ldsetalb\0" "ldsetalh\0" "ldsetb\0"
+  "ldseth\0" "ldsetl\0" "ldsetlb\0" "ldsetlh\0" "ldsmax\0" "ldsmaxa\0" "ldsmaxab\0" "ldsmaxah\0" "ldsmaxal\0"
+  "ldsmaxalb\0" "ldsmaxalh\0" "ldsmaxb\0" "ldsmaxh\0" "ldsmaxl\0" "ldsmaxlb\0" "ldsmaxlh\0" "ldsmin\0" "ldsmina\0"
+  "ldsminab\0" "ldsminah\0" "ldsminal\0" "ldsminalb\0" "ldsminalh\0" "ldsminb\0" "ldsminh\0" "ldsminl\0" "ldsminlb\0"
+  "ldsminlh\0" "ldtr\0" "ldtrb\0" "ldtrh\0" "ldtrsb\0" "ldtrsh\0" "ldtrsw\0" "ldumax\0" "ldumaxa\0" "ldumaxab\0"
+  "ldumaxah\0" "ldumaxal\0" "ldumaxalb\0" "ldumaxalh\0" "ldumaxb\0" "ldumaxh\0" "ldumaxl\0" "ldumaxlb\0" "ldumaxlh\0"
+  "ldumin\0" "ldumina\0" "lduminab\0" "lduminah\0" "lduminal\0" "lduminalb\0" "lduminalh\0" "lduminb\0" "lduminh\0"
+  "lduminl\0" "lduminlb\0" "lduminlh\0" "ldur\0" "ldurb\0" "ldurh\0" "ldursb\0" "ldursh\0" "ldursw\0" "ldxp\0" "ldxr\0"
+  "ldxrb\0" "ldxrh\0" "lslv\0" "lsr\0" "lsrv\0" "mneg\0" "movi\0" "movk\0" "movn\0" "movz\0" "mrs\0" "msr\0" "mvn\0"
+  "mvni\0" "negs\0" "ngc\0" "ngcs\0" "nop\0" "not\0" "orn\0" "orr\0" "pacda\0" "pacdb\0" "pacdza\0" "pacdzb\0"
+  "pacga\0" "pmul\0" "pmull\0" "pmull2\0" "pssbb\0" "raddhn\0" "raddhn2\0" "rax1\0" "rbit\0" "rev\0" "rev16\0"
+  "rev32\0" "rev64\0" "ror\0" "rorv\0" "rsubhn\0" "rsubhn2\0" "saba\0" "sabal\0" "sabal2\0" "sabd\0" "sabdl\0"
+  "sabdl2\0" "sadalp\0" "saddl\0" "saddl2\0" "saddlp\0" "saddlv\0" "saddw\0" "saddw2\0" "sbc\0" "sbcs\0" "sbfiz\0"
+  "sbfm\0" "sbfx\0" "scvtf\0" "sdiv\0" "setf16\0" "setf8\0" "sev\0" "sevl\0" "sha1c\0" "sha1h\0" "sha1m\0" "sha1p\0"
+  "sha1su0\0" "sha1su1\0" "sha256h\0" "sha256h2\0" "sha256su0\0" "sha256su1\0" "sha512h\0" "sha512h2\0" "sha512su0\0"
+  "sha512su1\0" "shadd\0" "shsub\0" "sli\0" "sm3partw1\0" "sm3partw2\0" "sm3ss1\0" "sm3tt1a\0" "sm3tt1b\0" "sm3tt2a\0"
+  "sm3tt2b\0" "sm4e\0" "sm4ekey\0" "smaddl\0" "smaxp\0" "smaxv\0" "sminp\0" "sminv\0" "smlal\0" "smlal2\0" "smlsl\0"
+  "smlsl2\0" "smnegl\0" "smov\0" "smsubl\0" "smulh\0" "smull\0" "smull2\0" "sqabs\0" "sqdmlal\0" "sqdmlal2\0"
+  "sqdmlsl\0" "sqdmlsl2\0" "sqdmulh\0" "sqdmull\0" "sqdmull2\0" "sqneg\0" "sqrdmlah\0" "sqrdmlsh\0" "sqrdmulh\0"
+  "sqrshl\0" "sqrshrn\0" "sqrshrn2\0" "sqrshrun\0" "sqrshrun2\0" "sqshl\0" "sqshlu\0" "sqshrn\0" "sqshrn2\0"
+  "sqshrun\0" "sqshrun2\0" "sqsub\0" "sqxtn\0" "sqxtn2\0" "sqxtun\0" "sqxtun2\0" "srhadd\0" "sri\0" "srshl\0" "srshr\0"
+  "srsra\0" "sshl\0" "sshll\0" "sshll2\0" "sshr\0" "ssra\0" "ssubl\0" "ssubl2\0" "ssubw\0" "ssubw2\0" "st1\0" "st2\0"
+  "st2g\0" "st3\0" "st4\0" "stadd\0" "staddb\0" "staddh\0" "staddl\0" "staddlb\0" "staddlh\0" "stclr\0" "stclrb\0"
+  "stclrh\0" "stclrl\0" "stclrlb\0" "stclrlh\0" "steor\0" "steorb\0" "steorh\0" "steorl\0" "steorlb\0" "steorlh\0"
+  "stg\0" "stgm\0" "stgp\0" "stllr\0" "stllrb\0" "stllrh\0" "stlr\0" "stlrb\0" "stlrh\0" "stlxp\0" "stlxr\0" "stlxrb\0"
+  "stlxrh\0" "stnp\0" "stp\0" "str\0" "strb\0" "strh\0" "stset\0" "stsetb\0" "stseth\0" "stsetl\0" "stsetlb\0"
+  "stsetlh\0" "stsmax\0" "stsmaxb\0" "stsmaxh\0" "stsmaxl\0" "stsmaxlb\0" "stsmaxlh\0" "stsmin\0" "stsminb\0"
+  "stsminh\0" "stsminl\0" "stsminlb\0" "stsminlh\0" "sttr\0" "sttrb\0" "sttrh\0" "stumax\0" "stumaxb\0" "stumaxh\0"
+  "stumaxl\0" "stumaxlb\0" "stumaxlh\0" "stumin\0" "stuminb\0" "stuminh\0" "stuminl\0" "stuminlb\0" "stuminlh\0"
+  "stur\0" "sturb\0" "sturh\0" "stxp\0" "stxr\0" "stxrb\0" "stxrh\0" "stz2g\0" "stzg\0" "stzgm\0" "subg\0" "subp\0"
+  "subps\0" "subs\0" "sudot\0" "suqadd\0" "svc\0" "swp\0" "swpa\0" "swpab\0" "swpah\0" "swpal\0" "swpalb\0" "swpalh\0"
+  "swpb\0" "swph\0" "swpl\0" "swplb\0" "swplh\0" "sxtb\0" "sxth\0" "sxtl\0" "sxtl2\0" "sxtw\0" "sys\0" "tbl\0" "tbnz\0"
+  "tbx\0" "tbz\0" "tlbi\0" "trn1\0" "trn2\0" "uaba\0" "uabal\0" "uabal2\0" "uabd\0" "uabdl\0" "uabdl2\0" "uadalp\0"
+  "uaddl\0" "uaddl2\0" "uaddlp\0" "uaddlv\0" "uaddw\0" "uaddw2\0" "ubfiz\0" "ubfm\0" "ubfx\0" "ucvtf\0" "udf\0"
+  "udiv\0" "uhadd\0" "uhsub\0" "umaddl\0" "umaxp\0" "umaxv\0" "uminp\0" "uminv\0" "umlal\0" "umlal2\0" "umlsl\0"
+  "umlsl2\0" "ummla\0" "umnegl\0" "umov\0" "umsubl\0" "umulh\0" "umull\0" "umull2\0" "uqrshl\0" "uqrshrn\0"
+  "uqrshrn2\0" "uqshl\0" "uqshrn\0" "uqshrn2\0" "uqsub\0" "uqxtn\0" "uqxtn2\0" "urecpe\0" "urhadd\0" "urshl\0"
+  "urshr\0" "ursqrte\0" "ursra\0" "usdot\0" "ushl\0" "ushll\0" "ushll2\0" "ushr\0" "usmmla\0" "usqadd\0" "usra\0"
+  "usubl\0" "usubl2\0" "usubw\0" "usubw2\0" "uxtb\0" "uxth\0" "uxtl\0" "uxtl2\0" "uzp1\0" "uzp2\0" "wfe\0" "wfi\0"
+  "xaflag\0" "xar\0" "xpacd\0" "xpaci\0" "xpaclri\0" "yield\0" "zip1\0" "zip2";
+
+const InstDB::InstNameIndex InstDB::instNameIndex[26] = {
+  { Inst::kIdAdc          , Inst::kIdAnd_v         + 1 },
+  { Inst::kIdB            , Inst::kIdBsl_v         + 1 },
+  { Inst::kIdCas          , Inst::kIdCnt_v         + 1 },
+  { Inst::kIdDc           , Inst::kIdDup_v         + 1 },
+  { Inst::kIdEon          , Inst::kIdExt_v         + 1 },
+  { Inst::kIdFabd_v       , Inst::kIdFsub_v        + 1 },
+  { Inst::kIdGmi          , Inst::kIdGmi           + 1 },
+  { Inst::kIdHint         , Inst::kIdHvc           + 1 },
+  { Inst::kIdIc           , Inst::kIdIns_v         + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 },
+  { Inst::kIdLdadd        , Inst::kIdLdur_v        + 1 },
+  { Inst::kIdMadd         , Inst::kIdMvni_v        + 1 },
+  { Inst::kIdNeg          , Inst::kIdNot_v         + 1 },
+  { Inst::kIdOrn          , Inst::kIdOrr_v         + 1 },
+  { Inst::kIdPacda        , Inst::kIdPmull2_v      + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 },
+  { Inst::kIdRbit         , Inst::kIdRsubhn2_v     + 1 },
+  { Inst::kIdSbc          , Inst::kIdSxtl2_v       + 1 },
+  { Inst::kIdTlbi         , Inst::kIdTrn2_v        + 1 },
+  { Inst::kIdUbfiz        , Inst::kIdUzp2_v        + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 },
+  { Inst::kIdWfe          , Inst::kIdWfi           + 1 },
+  { Inst::kIdXaflag       , Inst::kIdXtn2_v        + 1 },
+  { Inst::kIdYield        , Inst::kIdYield         + 1 },
+  { Inst::kIdZip1_v       , Inst::kIdZip2_v        + 1 }
+};
+// ----------------------------------------------------------------------------
+// ${NameData:End}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
diff --git a/lib/lepton/asmjit/arm/a64instdb.h b/lib/lepton/asmjit/arm/a64instdb.h
new file mode 100644
index 0000000000..0575d1a2fa
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64instdb.h
@@ -0,0 +1,74 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64INSTDB_H_INCLUDED
+#define ASMJIT_ARM_A64INSTDB_H_INCLUDED
+
+#include "../arm/a64globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! Instruction database (AArch64).
+namespace InstDB {
+
+//! Instruction flags.
+enum InstFlags : uint32_t {
+  //! The instruction provides conditional execution.
+  kInstFlagCond = 0x00000001u,
+  //! SIMD instruction that processes elements in pairs.
+  kInstFlagPair = 0x00000002u,
+  //! SIMD instruction that does widening (Long).
+  kInstFlagLong = 0x00000004u,
+  //! SIMD instruction that does narrowing (Narrow).
+  kInstFlagNarrow = 0x00000008u,
+  //! SIMD element access of half-words can only be used with v0..15.
+  kInstFlagVH0_15 = 0x00000010u,
+
+  //! Instruction may consecutive registers if the number of operands is greater than 2.
+  kInstFlagConsecutive = 0x00000080u
+};
+
+//! Instruction information (AArch64).
+struct InstInfo {
+  //! Instruction encoding type.
+  uint32_t _encoding : 8;
+  //! Index to data specific to each encoding type.
+  uint32_t _encodingDataIndex : 8;
+  uint32_t _reserved : 2;
+  //! Index to \ref _nameData.
+  uint32_t _nameDataIndex : 14;
+
+  uint16_t _rwInfoIndex;
+  uint16_t _flags;
+
+  //! \name Accessors
+  //! \{
+
+  inline uint32_t rwInfoIndex() const noexcept { return _rwInfoIndex; }
+  inline uint32_t flags() const noexcept { return _flags; }
+
+  inline bool hasFlag(uint32_t flag) const { return (_flags & flag) != 0; }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const InstInfo _instInfoTable[];
+
+static inline const InstInfo& infoById(InstId instId) noexcept {
+  instId &= uint32_t(InstIdParts::kRealId);
+  ASMJIT_ASSERT(Inst::isDefinedId(instId));
+  return _instInfoTable[instId];
+}
+
+} // {InstDB}
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64INSTDB_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64instdb_p.h b/lib/lepton/asmjit/arm/a64instdb_p.h
new file mode 100644
index 0000000000..eb4f3f8376
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64instdb_p.h
@@ -0,0 +1,876 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64INSTDB_H_P_INCLUDED
+#define ASMJIT_ARM_A64INSTDB_H_P_INCLUDED
+
+#include "../core/codeholder.h"
+#include "../arm/a64instdb.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+namespace InstDB {
+
+// a64::InstDB - Constants Used by Instructions
+// ============================================
+
+// GP register types supported by base instructions.
+static constexpr uint32_t kW = 0x1;
+static constexpr uint32_t kX = 0x2;
+static constexpr uint32_t kWX = 0x3;
+
+// GP high register IDs supported by the instruction.
+static constexpr uint32_t kZR = Gp::kIdZr;
+static constexpr uint32_t kSP = Gp::kIdSp;
+
+// a64::InstDB - RWInfo
+// ====================
+
+enum RWInfoType : uint32_t {
+  kRWI_R,
+  kRWI_RW,
+  kRWI_RX,
+  kRWI_RRW,
+  kRWI_RWX,
+  kRWI_W,
+  kRWI_WRW,
+  kRWI_WRX,
+  kRWI_WRRW,
+  kRWI_WRRX,
+  kRWI_WW,
+  kRWI_X,
+  kRWI_XRX,
+  kRWI_XXRRX,
+
+  kRWI_LDn,
+  kRWI_STn,
+
+  kRWI_SpecialStart = kRWI_LDn
+};
+
+// a64::InstDB - ElementType
+// =========================
+
+enum ElementType : uint8_t {
+  kET_None = Vec::kElementTypeNone,
+  kET_B    = Vec::kElementTypeB,
+  kET_H    = Vec::kElementTypeH,
+  kET_S    = Vec::kElementTypeS,
+  kET_D    = Vec::kElementTypeD,
+  kET_2H   = Vec::kElementTypeH2,
+  kET_4B   = Vec::kElementTypeB4
+};
+
+// a64::InstDB - GpType
+// ====================
+
+enum GpType : uint8_t {
+  kGp_W,
+  kGp_X,
+  kGp_X_SP
+};
+
+// a64::InstDB - OPSig
+// ===================
+
+enum kOpSignature : uint32_t {
+  kOp_GpW = GpW::kSignature,
+  kOp_GpX = GpX::kSignature,
+
+  kOp_B = VecB::kSignature,
+  kOp_H = VecH::kSignature,
+  kOp_S = VecS::kSignature,
+  kOp_D = VecD::kSignature,
+  kOp_Q = VecV::kSignature,
+
+  kOp_V8B = VecD::kSignature | Vec::kSignatureElementB,
+  kOp_V4H = VecD::kSignature | Vec::kSignatureElementH,
+  kOp_V2S = VecD::kSignature | Vec::kSignatureElementS,
+
+  kOp_V16B = VecV::kSignature | Vec::kSignatureElementB,
+  kOp_V8H = VecV::kSignature | Vec::kSignatureElementH,
+  kOp_V4S = VecV::kSignature | Vec::kSignatureElementS,
+  kOp_V2D = VecV::kSignature | Vec::kSignatureElementD
+};
+
+// a64::InstDB - HFConv
+// ====================
+
+enum kHFConv : uint32_t {
+  //! FP16 version of the instruction is not available.
+  kHF_N,
+
+  //! Doesn't do any change to the opcode.
+  kHF_0,
+
+  kHF_A,
+  kHF_B,
+  kHF_C,
+  kHF_D,
+
+  kHF_Count
+};
+
+// a64::InstDB - VOType
+// ====================
+
+//! Vector operand type combinations used by FP&SIMD instructions.
+enum VOType : uint32_t {
+  kVO_V_B,
+  kVO_V_BH,
+  kVO_V_BH_4S,
+  kVO_V_BHS,
+  kVO_V_BHS_D2,
+  kVO_V_HS,
+  kVO_V_S,
+
+  kVO_V_B8H4,
+  kVO_V_B8H4S2,
+  kVO_V_B8D1,
+  kVO_V_H4S2,
+
+  kVO_V_B16,
+  kVO_V_B16H8,
+  kVO_V_B16H8S4,
+  kVO_V_B16D2,
+  kVO_V_H8S4,
+  kVO_V_S4,
+  kVO_V_D2,
+
+  kVO_SV_BHS,
+  kVO_SV_B8H4S2,
+  kVO_SV_HS,
+  kVO_V_Any,
+  kVO_SV_Any,
+
+  kVO_Count
+};
+
+// a64::InstDB - EncodingId
+// ========================
+
+// ${EncodingId:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+enum EncodingId : uint32_t {
+  kEncodingNone = 0,
+  kEncodingBaseAddSub,
+  kEncodingBaseAdr,
+  kEncodingBaseAtDcIcTlbi,
+  kEncodingBaseAtomicCasp,
+  kEncodingBaseAtomicOp,
+  kEncodingBaseAtomicSt,
+  kEncodingBaseBfc,
+  kEncodingBaseBfi,
+  kEncodingBaseBfm,
+  kEncodingBaseBfx,
+  kEncodingBaseBranchCmp,
+  kEncodingBaseBranchReg,
+  kEncodingBaseBranchRel,
+  kEncodingBaseBranchTst,
+  kEncodingBaseCCmp,
+  kEncodingBaseCInc,
+  kEncodingBaseCSel,
+  kEncodingBaseCSet,
+  kEncodingBaseCmpCmn,
+  kEncodingBaseExtend,
+  kEncodingBaseExtract,
+  kEncodingBaseLdSt,
+  kEncodingBaseLdpStp,
+  kEncodingBaseLdxp,
+  kEncodingBaseLogical,
+  kEncodingBaseMov,
+  kEncodingBaseMovKNZ,
+  kEncodingBaseMrs,
+  kEncodingBaseMsr,
+  kEncodingBaseMvnNeg,
+  kEncodingBaseOp,
+  kEncodingBaseOpImm,
+  kEncodingBaseR,
+  kEncodingBaseRM_NoImm,
+  kEncodingBaseRM_SImm10,
+  kEncodingBaseRM_SImm9,
+  kEncodingBaseRR,
+  kEncodingBaseRRII,
+  kEncodingBaseRRR,
+  kEncodingBaseRRRR,
+  kEncodingBaseRev,
+  kEncodingBaseShift,
+  kEncodingBaseStx,
+  kEncodingBaseStxp,
+  kEncodingBaseSys,
+  kEncodingBaseTst,
+  kEncodingFSimdPair,
+  kEncodingFSimdSV,
+  kEncodingFSimdVV,
+  kEncodingFSimdVVV,
+  kEncodingFSimdVVVV,
+  kEncodingFSimdVVVe,
+  kEncodingISimdPair,
+  kEncodingISimdSV,
+  kEncodingISimdVV,
+  kEncodingISimdVVV,
+  kEncodingISimdVVVI,
+  kEncodingISimdVVVV,
+  kEncodingISimdVVVVx,
+  kEncodingISimdVVVe,
+  kEncodingISimdVVVx,
+  kEncodingISimdVVx,
+  kEncodingISimdWWV,
+  kEncodingSimdBicOrr,
+  kEncodingSimdCmp,
+  kEncodingSimdDot,
+  kEncodingSimdDup,
+  kEncodingSimdFcadd,
+  kEncodingSimdFccmpFccmpe,
+  kEncodingSimdFcm,
+  kEncodingSimdFcmla,
+  kEncodingSimdFcmpFcmpe,
+  kEncodingSimdFcsel,
+  kEncodingSimdFcvt,
+  kEncodingSimdFcvtLN,
+  kEncodingSimdFcvtSV,
+  kEncodingSimdFmlal,
+  kEncodingSimdFmov,
+  kEncodingSimdIns,
+  kEncodingSimdLdNStN,
+  kEncodingSimdLdSt,
+  kEncodingSimdLdpStp,
+  kEncodingSimdLdurStur,
+  kEncodingSimdMov,
+  kEncodingSimdMoviMvni,
+  kEncodingSimdShift,
+  kEncodingSimdShiftES,
+  kEncodingSimdSm3tt,
+  kEncodingSimdSmovUmov,
+  kEncodingSimdSxtlUxtl,
+  kEncodingSimdTblTbx
+};
+// ----------------------------------------------------------------------------
+// ${EncodingId:End}
+
+// a64::InstDB::EncodingData
+// =========================
+
+namespace EncodingData {
+
+#define M_OPCODE(field, bits) \
+  uint32_t _##field : bits; \
+  inline constexpr uint32_t field() const noexcept { return uint32_t(_##field) << (32 - bits); }
+
+struct BaseOp {
+  uint32_t opcode;
+};
+
+struct BaseOpImm {
+  uint32_t opcode;
+  uint16_t immBits;
+  uint16_t immOffset;
+};
+
+struct BaseR {
+  uint32_t opcode;
+  uint32_t rType : 8;
+  uint32_t rHiId : 8;
+  uint32_t rShift : 8;
+};
+
+struct BaseRR {
+  uint32_t opcode;
+  uint32_t aType : 2;
+  uint32_t aHiId : 6;
+  uint32_t aShift : 5;
+  uint32_t bType : 2;
+  uint32_t bHiId : 6;
+  uint32_t bShift : 5;
+  uint32_t uniform : 1;
+};
+
+struct BaseRRR {
+  M_OPCODE(opcode, 22)
+  uint32_t aType : 2;
+  uint32_t aHiId : 6;
+  uint32_t bType : 2;
+  uint32_t bHiId : 6;
+  uint32_t cType : 2;
+  uint32_t cHiId : 6;
+  uint32_t uniform : 1;
+};
+
+struct BaseRRRR {
+  M_OPCODE(opcode, 22)
+  uint32_t aType : 2;
+  uint32_t aHiId : 6;
+  uint32_t bType : 2;
+  uint32_t bHiId : 6;
+  uint32_t cType : 2;
+  uint32_t cHiId : 6;
+  uint32_t dType : 2;
+  uint32_t dHiId : 6;
+  uint32_t uniform : 1;
+};
+
+struct BaseRRII {
+  M_OPCODE(opcode, 22)
+  uint32_t aType : 2;
+  uint32_t aHiId : 6;
+  uint32_t bType : 2;
+  uint32_t bHiId : 6;
+  uint32_t aImmSize : 6;
+  uint32_t aImmDiscardLsb : 5;
+  uint32_t aImmOffset : 5;
+  uint32_t bImmSize : 6;
+  uint32_t bImmDiscardLsb : 5;
+  uint32_t bImmOffset : 5;
+};
+
+struct BaseAtDcIcTlbi {
+  uint32_t immVerifyMask : 14;
+  uint32_t immVerifyData : 14;
+  uint32_t mandatoryReg : 1;
+};
+
+struct BaseAdcSbc {
+  uint32_t opcode;
+};
+
+struct BaseAddSub {
+  uint32_t shiftedOp  : 10; // sf|.......|Sh|.|Rm|  Imm:6 |Rn|Rd|
+  uint32_t extendedOp : 10; // sf|.......|..|.|Rm|Opt|Imm3|Rn|Rd|
+  uint32_t immediateOp: 10; // sf|.......|Sh|    Imm:12   |Rn|Rd|
+};
+
+struct BaseAdr {
+  M_OPCODE(opcode, 22)
+  OffsetType offsetType : 8;
+};
+
+struct BaseBfm {
+  uint32_t opcode;         // sf|........|N|ImmR:6|ImmS:6|Rn|Rd|
+};
+
+struct BaseCmpCmn {
+  uint32_t shiftedOp  : 10; // sf|.......|Sh|.|Rm|  Imm:6 |Rn|11111|
+  uint32_t extendedOp : 10; // sf|.......|..|.|Rm|Opt|Imm3|Rn|11111|
+  uint32_t immediateOp: 10; // sf|.......|Sh|    Imm:12   |Rn|11111|
+};
+
+struct BaseExtend {
+  M_OPCODE(opcode, 22)      // sf|........|N|......|......|Rn|Rd|
+  uint32_t rType : 2;
+  uint32_t u : 1;
+};
+
+struct BaseLogical {
+  uint32_t shiftedOp  : 10; // sf|.......|Sh|.|Rm|  Imm:6 |Rn|Rd|
+  uint32_t immediateOp: 10; // sf|........|N|ImmR:6|ImmS:6|Rn|Rd|
+  uint32_t negateImm  : 1 ; // True if this is an operation that must negate IMM.
+};
+
+struct BaseMvnNeg {
+  uint32_t opcode;
+};
+
+struct BaseShift {
+  M_OPCODE(registerOp, 22)
+  M_OPCODE(immediateOp, 22)
+  uint32_t ror : 2;
+};
+
+struct BaseTst {
+  uint32_t shiftedOp  : 10; // sf|.......|Sh|.|Rm|  Imm:6 |Rn|11111|
+  uint32_t immediateOp: 10; // sf|........|N|ImmR:6|ImmS:6|Rn|11111|
+};
+
+struct BaseRM_NoImm {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t rHiId : 6;
+  uint32_t xOffset : 5;
+};
+
+struct BaseRM_SImm9 {
+  M_OPCODE(offsetOp, 22)
+  M_OPCODE(prePostOp, 22)
+  uint32_t rType : 2;
+  uint32_t rHiId : 6;
+  uint32_t xOffset : 5;
+  uint32_t immShift : 4;
+};
+
+struct BaseRM_SImm10 {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t rHiId : 6;
+  uint32_t xOffset : 5;
+  uint32_t immShift : 4;
+};
+
+struct BaseLdSt {
+  uint32_t uOffsetOp  : 10;
+  uint32_t prePostOp  : 11;
+  uint32_t registerOp : 11;
+  uint32_t literalOp  : 8;
+  uint32_t rType      : 2;
+  uint32_t xOffset    : 5;
+  uint32_t uOffsetShift : 3;
+  uint32_t uAltInstId : 14;
+};
+
+struct BaseLdpStp {
+  uint32_t offsetOp : 10;
+  uint32_t prePostOp : 10;
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+  uint32_t offsetShift : 3;
+};
+
+struct BaseStx {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+struct BaseLdxp {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+struct BaseStxp {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+struct BaseAtomicOp {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+  uint32_t zr : 1;
+};
+
+struct BaseAtomicSt {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+struct BaseAtomicCasp {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+typedef BaseOp BaseBranchReg;
+typedef BaseOp BaseBranchRel;
+typedef BaseOp BaseBranchCmp;
+typedef BaseOp BaseBranchTst;
+typedef BaseOp BaseExtract;
+typedef BaseOp BaseBfc;
+typedef BaseOp BaseBfi;
+typedef BaseOp BaseBfx;
+typedef BaseOp BaseCCmp;
+typedef BaseOp BaseCInc;
+typedef BaseOp BaseCSet;
+typedef BaseOp BaseCSel;
+typedef BaseOp BaseMovKNZ;
+typedef BaseOp BaseMull;
+
+struct FSimdGeneric {
+  uint32_t _scalarOp : 28;
+  uint32_t _scalarHf : 4;
+  uint32_t _vectorOp : 28;
+  uint32_t _vectorHf : 4;
+
+  constexpr uint32_t scalarOp() const noexcept { return uint32_t(_scalarOp) << 10; }
+  constexpr uint32_t vectorOp() const noexcept { return uint32_t(_vectorOp) << 10; }
+  constexpr uint32_t scalarHf() const noexcept { return uint32_t(_scalarHf); }
+  constexpr uint32_t vectorHf() const noexcept { return uint32_t(_vectorHf); }
+};
+
+typedef FSimdGeneric FSimdVV;
+typedef FSimdGeneric FSimdVVV;
+typedef FSimdGeneric FSimdVVVV;
+
+struct FSimdSV {
+  uint32_t opcode;
+};
+
+struct FSimdVVVe {
+  uint32_t _scalarOp : 28;
+  uint32_t _scalarHf : 4;
+  uint32_t _vectorOp;
+  uint32_t _elementOp;
+
+  constexpr uint32_t scalarOp() const noexcept { return uint32_t(_scalarOp) << 10; }
+  constexpr uint32_t scalarHf() const noexcept { return uint32_t(_scalarHf); };
+  constexpr uint32_t vectorOp() const noexcept { return uint32_t(_vectorOp) << 10; }
+  constexpr uint32_t vectorHf() const noexcept { return kHF_C; }
+  constexpr uint32_t elementScalarOp() const noexcept { return (uint32_t(_elementOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t elementVectorOp() const noexcept { return (uint32_t(_elementOp) << 10); }
+};
+
+struct SimdFcadd {
+  uint32_t _opcode;
+
+  constexpr uint32_t opcode() const noexcept { return _opcode << 10; }
+};
+
+struct SimdFcmla {
+  uint32_t _regularOp;
+  uint32_t _elementOp;
+
+  constexpr uint32_t regularOp() const noexcept { return uint32_t(_regularOp) << 10; }
+  constexpr uint32_t elementOp() const noexcept { return (uint32_t(_elementOp) << 10); }
+};
+
+struct SimdFccmpFccmpe {
+  uint32_t _opcode;
+  constexpr uint32_t opcode() const noexcept { return _opcode; }
+};
+
+struct SimdFcm {
+  uint32_t _registerOp : 28;
+  uint32_t _registerHf : 4;
+
+  uint32_t _zeroOp : 28;
+
+  constexpr bool hasRegisterOp() const noexcept { return _registerOp != 0; }
+  constexpr bool hasZeroOp() const noexcept { return _zeroOp != 0; }
+
+  constexpr uint32_t registerScalarOp() const noexcept { return (uint32_t(_registerOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t registerVectorOp() const noexcept { return uint32_t(_registerOp) << 10; }
+  constexpr uint32_t registerScalarHf() const noexcept { return uint32_t(_registerHf); }
+  constexpr uint32_t registerVectorHf() const noexcept { return uint32_t(_registerHf); }
+
+  constexpr uint32_t zeroScalarOp() const noexcept { return (uint32_t(_zeroOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t zeroVectorOp() const noexcept { return (uint32_t(_zeroOp) << 10); }
+};
+
+struct SimdFcmpFcmpe {
+  uint32_t _opcode;
+  constexpr uint32_t opcode() const noexcept { return _opcode; }
+};
+
+struct SimdFcvtLN {
+  uint32_t _opcode : 22;
+  uint32_t _isCvtxn : 1;
+  uint32_t _hasScalar : 1;
+
+  constexpr uint32_t scalarOp() const noexcept { return (uint32_t(_opcode) << 10) | (0x5u << 28); }
+  constexpr uint32_t vectorOp() const noexcept { return (uint32_t(_opcode) << 10); }
+
+  constexpr uint32_t isCvtxn() const noexcept { return _isCvtxn; }
+  constexpr uint32_t hasScalar() const noexcept { return _hasScalar; }
+};
+
+struct SimdFcvtSV {
+  uint32_t _vectorIntOp;
+  uint32_t _vectorFpOp;
+  uint32_t _generalOp : 31;
+  uint32_t _isFloatToInt : 1;
+
+  constexpr uint32_t scalarIntOp() const noexcept { return (uint32_t(_vectorIntOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t vectorIntOp() const noexcept { return uint32_t(_vectorIntOp) << 10; }
+  constexpr uint32_t scalarFpOp() const noexcept { return (uint32_t(_vectorFpOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t vectorFpOp() const noexcept { return uint32_t(_vectorFpOp) << 10; }
+  constexpr uint32_t generalOp() const noexcept { return (uint32_t(_generalOp) << 10); }
+
+  constexpr uint32_t isFloatToInt() const noexcept { return _isFloatToInt; }
+  constexpr uint32_t isFixedPoint() const noexcept { return _vectorFpOp != 0; }
+};
+
+struct SimdFmlal {
+  uint32_t _vectorOp;
+  uint32_t _elementOp;
+  uint8_t _optionalQ;
+  uint8_t tA;
+  uint8_t tB;
+  uint8_t tElement;
+
+  constexpr uint32_t vectorOp() const noexcept { return uint32_t(_vectorOp) << 10; }
+  constexpr uint32_t elementOp() const noexcept { return uint32_t(_elementOp) << 10; }
+  constexpr uint32_t optionalQ() const noexcept { return _optionalQ; }
+};
+
+struct FSimdPair {
+  uint32_t _scalarOp;
+  uint32_t _vectorOp;
+
+  constexpr uint32_t scalarOp() const noexcept { return uint32_t(_scalarOp) << 10; }
+  constexpr uint32_t vectorOp() const noexcept { return uint32_t(_vectorOp) << 10; }
+};
+
+struct ISimdVV {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVx {
+  M_OPCODE(opcode, 22)
+  uint32_t op0Signature;
+  uint32_t op1Signature;
+};
+
+struct ISimdSV {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVV {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVVx {
+  M_OPCODE(opcode, 22)
+  uint32_t op0Signature;
+  uint32_t op1Signature;
+  uint32_t op2Signature;
+};
+
+struct ISimdWWV {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVVe {
+  uint32_t regularOp : 26; // 22 bits used.
+  uint32_t regularVecType : 6;
+  uint32_t elementOp : 26; // 22 bits used.
+  uint32_t elementVecType : 6;
+};
+
+struct ISimdVVVI {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+  uint32_t immSize : 4;
+  uint32_t immShift : 4;
+  uint32_t imm64HasOneBitLess : 1;
+};
+
+struct ISimdVVVV {
+  uint32_t opcode : 22;
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVVVx {
+  uint32_t opcode;
+  uint32_t op0Signature;
+  uint32_t op1Signature;
+  uint32_t op2Signature;
+  uint32_t op3Signature;
+};
+
+struct SimdBicOrr {
+  uint32_t registerOp;   // 22 bits used.
+  uint32_t immediateOp;  // 22 bits used.
+};
+
+struct SimdCmp {
+  uint32_t regOp;
+  uint32_t zeroOp : 22;
+  uint32_t vecOpType : 6;
+};
+
+struct SimdDot {
+  uint32_t vectorOp;     // 22 bits used.
+  uint32_t elementOp;    // 22 bits used.
+  uint8_t tA;            // Element-type of the first operand.
+  uint8_t tB;            // Element-type of the second and third operands.
+  uint8_t tElement;      // Element-type of the element index[] operand.
+};
+
+struct SimdMoviMvni {
+  uint32_t opcode : 31;
+  uint32_t inverted : 1;
+};
+
+struct SimdLdSt {
+  uint32_t uOffsetOp  : 10;
+  uint32_t prePostOp  : 11;
+  uint32_t registerOp : 11;
+  uint32_t literalOp  : 8;
+  uint32_t uAltInstId : 16;
+};
+
+struct SimdLdNStN {
+  uint32_t singleOp;
+  uint32_t multipleOp : 22;
+  uint32_t n : 3;
+  uint32_t replicate : 1;
+};
+
+struct SimdLdpStp {
+  uint32_t offsetOp : 10;
+  uint32_t prePostOp : 10;
+};
+
+struct SimdLdurStur {
+  uint32_t opcode;
+};
+
+struct ISimdPair {
+  uint32_t opcode2;      // 22 bits used.
+  uint32_t opcode3 : 26; // 22 bits used.
+  uint32_t opType3 : 6;
+};
+
+struct SimdShift {
+  uint32_t registerOp;       // 22 bits used.
+  uint32_t immediateOp : 22; // 22 bits used.
+  uint32_t invertedImm : 1;
+  uint32_t vecOpType : 6;
+};
+
+struct SimdShiftES {
+  uint32_t opcode : 22;
+  uint32_t vecOpType : 6;
+};
+
+struct SimdSm3tt {
+  uint32_t opcode;
+};
+
+struct SimdSmovUmov {
+  uint32_t opcode : 22;
+  uint32_t vecOpType : 6;
+  uint32_t isSigned : 1;
+};
+
+struct SimdSxtlUxtl {
+  uint32_t opcode : 22;
+  uint32_t vecOpType : 6;
+};
+
+struct SimdTblTbx {
+  uint32_t opcode;
+};
+
+#undef M_OPCODE
+
+// ${EncodingDataForward:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+extern const BaseAddSub baseAddSub[4];
+extern const BaseAdr baseAdr[2];
+extern const BaseAtDcIcTlbi baseAtDcIcTlbi[4];
+extern const BaseAtomicCasp baseAtomicCasp[4];
+extern const BaseAtomicOp baseAtomicOp[123];
+extern const BaseAtomicSt baseAtomicSt[48];
+extern const BaseBfc baseBfc[1];
+extern const BaseBfi baseBfi[3];
+extern const BaseBfm baseBfm[3];
+extern const BaseBfx baseBfx[3];
+extern const BaseBranchCmp baseBranchCmp[2];
+extern const BaseBranchReg baseBranchReg[3];
+extern const BaseBranchRel baseBranchRel[2];
+extern const BaseBranchTst baseBranchTst[2];
+extern const BaseCCmp baseCCmp[2];
+extern const BaseCInc baseCInc[3];
+extern const BaseCSel baseCSel[4];
+extern const BaseCSet baseCSet[2];
+extern const BaseCmpCmn baseCmpCmn[2];
+extern const BaseExtend baseExtend[5];
+extern const BaseExtract baseExtract[1];
+extern const BaseLdSt baseLdSt[9];
+extern const BaseLdpStp baseLdpStp[6];
+extern const BaseLdxp baseLdxp[2];
+extern const BaseLogical baseLogical[8];
+extern const BaseMovKNZ baseMovKNZ[3];
+extern const BaseMvnNeg baseMvnNeg[3];
+extern const BaseOp baseOp[23];
+extern const BaseOpImm baseOpImm[14];
+extern const BaseR baseR[10];
+extern const BaseRM_NoImm baseRM_NoImm[21];
+extern const BaseRM_SImm10 baseRM_SImm10[2];
+extern const BaseRM_SImm9 baseRM_SImm9[23];
+extern const BaseRR baseRR[15];
+extern const BaseRRII baseRRII[2];
+extern const BaseRRR baseRRR[26];
+extern const BaseRRRR baseRRRR[6];
+extern const BaseShift baseShift[8];
+extern const BaseStx baseStx[3];
+extern const BaseStxp baseStxp[2];
+extern const BaseTst baseTst[1];
+extern const FSimdPair fSimdPair[5];
+extern const FSimdSV fSimdSV[4];
+extern const FSimdVV fSimdVV[17];
+extern const FSimdVVV fSimdVVV[13];
+extern const FSimdVVVV fSimdVVVV[4];
+extern const FSimdVVVe fSimdVVVe[4];
+extern const ISimdPair iSimdPair[1];
+extern const ISimdSV iSimdSV[7];
+extern const ISimdVV iSimdVV[29];
+extern const ISimdVVV iSimdVVV[65];
+extern const ISimdVVVI iSimdVVVI[2];
+extern const ISimdVVVV iSimdVVVV[2];
+extern const ISimdVVVVx iSimdVVVVx[1];
+extern const ISimdVVVe iSimdVVVe[25];
+extern const ISimdVVVx iSimdVVVx[17];
+extern const ISimdVVx iSimdVVx[13];
+extern const ISimdWWV iSimdWWV[8];
+extern const SimdBicOrr simdBicOrr[2];
+extern const SimdCmp simdCmp[7];
+extern const SimdDot simdDot[5];
+extern const SimdFcadd simdFcadd[1];
+extern const SimdFccmpFccmpe simdFccmpFccmpe[2];
+extern const SimdFcm simdFcm[5];
+extern const SimdFcmla simdFcmla[1];
+extern const SimdFcmpFcmpe simdFcmpFcmpe[2];
+extern const SimdFcvtLN simdFcvtLN[6];
+extern const SimdFcvtSV simdFcvtSV[12];
+extern const SimdFmlal simdFmlal[6];
+extern const SimdLdNStN simdLdNStN[12];
+extern const SimdLdSt simdLdSt[2];
+extern const SimdLdpStp simdLdpStp[4];
+extern const SimdLdurStur simdLdurStur[2];
+extern const SimdMoviMvni simdMoviMvni[2];
+extern const SimdShift simdShift[40];
+extern const SimdShiftES simdShiftES[2];
+extern const SimdSm3tt simdSm3tt[4];
+extern const SimdSmovUmov simdSmovUmov[2];
+extern const SimdSxtlUxtl simdSxtlUxtl[4];
+extern const SimdTblTbx simdTblTbx[2];
+// ----------------------------------------------------------------------------
+// ${EncodingDataForward:End}
+
+} // {EncodingData}
+
+// a64::InstDB - InstNameIndex
+// ===========================
+
+// ${NameLimits:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+enum : uint32_t { kMaxNameSize = 9 };
+// ----------------------------------------------------------------------------
+// ${NameLimits:End}
+
+struct InstNameIndex {
+  uint16_t start;
+  uint16_t end;
+};
+
+// a64::InstDB - Tables
+// ====================
+
+#ifndef ASMJIT_NO_TEXT
+extern const char _nameData[];
+extern const InstNameIndex instNameIndex[26];
+#endif // !ASMJIT_NO_TEXT
+
+} // {InstDB}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_A64_ARMINSTDB_H_P_INCLUDED
+
diff --git a/lib/lepton/asmjit/arm/a64operand.cpp b/lib/lepton/asmjit/arm/a64operand.cpp
new file mode 100644
index 0000000000..40a4a7952b
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64operand.cpp
@@ -0,0 +1,85 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../core/misc_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::Operand - Tests
+// ====================
+
+#if defined(ASMJIT_TEST)
+UNIT(a64_operand) {
+  INFO("Checking if a64::reg(...) matches built-in IDs");
+  EXPECT(w(5) == w5);
+  EXPECT(x(5) == x5);
+
+  INFO("Checking Gp register properties");
+  EXPECT(Gp().isReg() == true);
+  EXPECT(w0.isReg() == true);
+  EXPECT(x0.isReg() == true);
+  EXPECT(w0.id() == 0);
+  EXPECT(x0.id() == 0);
+  EXPECT(wzr.id() == Gp::kIdZr);
+  EXPECT(xzr.id() == Gp::kIdZr);
+  EXPECT(wsp.id() == Gp::kIdSp);
+  EXPECT(sp.id() == Gp::kIdSp);
+  EXPECT(w0.size() == 4);
+  EXPECT(x0.size() == 8);
+  EXPECT(w0.type() == RegType::kARM_GpW);
+  EXPECT(x0.type() == RegType::kARM_GpX);
+  EXPECT(w0.group() == RegGroup::kGp);
+  EXPECT(x0.group() == RegGroup::kGp);
+
+  INFO("Checking Vec register properties");
+  EXPECT(v0.type() == RegType::kARM_VecV);
+  EXPECT(d0.type() == RegType::kARM_VecD);
+  EXPECT(s0.type() == RegType::kARM_VecS);
+  EXPECT(h0.type() == RegType::kARM_VecH);
+  EXPECT(b0.type() == RegType::kARM_VecB);
+
+  EXPECT(v0.group() == RegGroup::kVec);
+  EXPECT(d0.group() == RegGroup::kVec);
+  EXPECT(s0.group() == RegGroup::kVec);
+  EXPECT(h0.group() == RegGroup::kVec);
+  EXPECT(b0.group() == RegGroup::kVec);
+
+  INFO("Checking Vec register element[] access");
+  Vec vd_1 = v15.d(1);
+  EXPECT(vd_1.type() == RegType::kARM_VecV);
+  EXPECT(vd_1.group() == RegGroup::kVec);
+  EXPECT(vd_1.id() == 15);
+  EXPECT(vd_1.isVecD2());
+  EXPECT(vd_1.elementType() == Vec::kElementTypeD);
+  EXPECT(vd_1.hasElementIndex());
+  EXPECT(vd_1.elementIndex() == 1);
+
+  Vec vs_3 = v15.s(3);
+  EXPECT(vs_3.type() == RegType::kARM_VecV);
+  EXPECT(vs_3.group() == RegGroup::kVec);
+  EXPECT(vs_3.id() == 15);
+  EXPECT(vs_3.isVecS4());
+  EXPECT(vs_3.elementType() == Vec::kElementTypeS);
+  EXPECT(vs_3.hasElementIndex());
+  EXPECT(vs_3.elementIndex() == 3);
+
+  Vec vb_4 = v15.b4(3);
+  EXPECT(vb_4.type() == RegType::kARM_VecV);
+  EXPECT(vb_4.group() == RegGroup::kVec);
+  EXPECT(vb_4.id() == 15);
+  EXPECT(vb_4.isVecB4x4());
+  EXPECT(vb_4.elementType() == Vec::kElementTypeB4);
+  EXPECT(vb_4.hasElementIndex());
+  EXPECT(vb_4.elementIndex() == 3);
+}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
diff --git a/lib/lepton/asmjit/arm/a64operand.h b/lib/lepton/asmjit/arm/a64operand.h
new file mode 100644
index 0000000000..c2d3c179a8
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64operand.h
@@ -0,0 +1,312 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64OPERAND_H_INCLUDED
+#define ASMJIT_ARM_A64OPERAND_H_INCLUDED
+
+#include "../arm/armoperand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+using arm::Reg;
+using arm::Mem;
+using arm::Gp;
+using arm::GpW;
+using arm::GpX;
+
+using arm::Vec;
+using arm::VecB;
+using arm::VecH;
+using arm::VecS;
+using arm::VecD;
+using arm::VecV;
+
+#ifndef _DOXYGEN
+namespace regs {
+#endif
+
+using namespace ::asmjit::arm::regs;
+
+static constexpr GpW w0 = GpW(0);
+static constexpr GpW w1 = GpW(1);
+static constexpr GpW w2 = GpW(2);
+static constexpr GpW w3 = GpW(3);
+static constexpr GpW w4 = GpW(4);
+static constexpr GpW w5 = GpW(5);
+static constexpr GpW w6 = GpW(6);
+static constexpr GpW w7 = GpW(7);
+static constexpr GpW w8 = GpW(8);
+static constexpr GpW w9 = GpW(9);
+static constexpr GpW w10 = GpW(10);
+static constexpr GpW w11 = GpW(11);
+static constexpr GpW w12 = GpW(12);
+static constexpr GpW w13 = GpW(13);
+static constexpr GpW w14 = GpW(14);
+static constexpr GpW w15 = GpW(15);
+static constexpr GpW w16 = GpW(16);
+static constexpr GpW w17 = GpW(17);
+static constexpr GpW w18 = GpW(18);
+static constexpr GpW w19 = GpW(19);
+static constexpr GpW w20 = GpW(20);
+static constexpr GpW w21 = GpW(21);
+static constexpr GpW w22 = GpW(22);
+static constexpr GpW w23 = GpW(23);
+static constexpr GpW w24 = GpW(24);
+static constexpr GpW w25 = GpW(25);
+static constexpr GpW w26 = GpW(26);
+static constexpr GpW w27 = GpW(27);
+static constexpr GpW w28 = GpW(28);
+static constexpr GpW w29 = GpW(29);
+static constexpr GpW w30 = GpW(30);
+static constexpr GpW wzr = GpW(Gp::kIdZr);
+static constexpr GpW wsp = GpW(Gp::kIdSp);
+
+static constexpr GpX x0 = GpX(0);
+static constexpr GpX x1 = GpX(1);
+static constexpr GpX x2 = GpX(2);
+static constexpr GpX x3 = GpX(3);
+static constexpr GpX x4 = GpX(4);
+static constexpr GpX x5 = GpX(5);
+static constexpr GpX x6 = GpX(6);
+static constexpr GpX x7 = GpX(7);
+static constexpr GpX x8 = GpX(8);
+static constexpr GpX x9 = GpX(9);
+static constexpr GpX x10 = GpX(10);
+static constexpr GpX x11 = GpX(11);
+static constexpr GpX x12 = GpX(12);
+static constexpr GpX x13 = GpX(13);
+static constexpr GpX x14 = GpX(14);
+static constexpr GpX x15 = GpX(15);
+static constexpr GpX x16 = GpX(16);
+static constexpr GpX x17 = GpX(17);
+static constexpr GpX x18 = GpX(18);
+static constexpr GpX x19 = GpX(19);
+static constexpr GpX x20 = GpX(20);
+static constexpr GpX x21 = GpX(21);
+static constexpr GpX x22 = GpX(22);
+static constexpr GpX x23 = GpX(23);
+static constexpr GpX x24 = GpX(24);
+static constexpr GpX x25 = GpX(25);
+static constexpr GpX x26 = GpX(26);
+static constexpr GpX x27 = GpX(27);
+static constexpr GpX x28 = GpX(28);
+static constexpr GpX x29 = GpX(29);
+static constexpr GpX x30 = GpX(30);
+static constexpr GpX xzr = GpX(Gp::kIdZr);
+static constexpr GpX sp = GpX(Gp::kIdSp);
+
+static constexpr VecB b0 = VecB(0);
+static constexpr VecB b1 = VecB(1);
+static constexpr VecB b2 = VecB(2);
+static constexpr VecB b3 = VecB(3);
+static constexpr VecB b4 = VecB(4);
+static constexpr VecB b5 = VecB(5);
+static constexpr VecB b6 = VecB(6);
+static constexpr VecB b7 = VecB(7);
+static constexpr VecB b8 = VecB(8);
+static constexpr VecB b9 = VecB(9);
+static constexpr VecB b10 = VecB(10);
+static constexpr VecB b11 = VecB(11);
+static constexpr VecB b12 = VecB(12);
+static constexpr VecB b13 = VecB(13);
+static constexpr VecB b14 = VecB(14);
+static constexpr VecB b15 = VecB(15);
+static constexpr VecB b16 = VecB(16);
+static constexpr VecB b17 = VecB(17);
+static constexpr VecB b18 = VecB(18);
+static constexpr VecB b19 = VecB(19);
+static constexpr VecB b20 = VecB(20);
+static constexpr VecB b21 = VecB(21);
+static constexpr VecB b22 = VecB(22);
+static constexpr VecB b23 = VecB(23);
+static constexpr VecB b24 = VecB(24);
+static constexpr VecB b25 = VecB(25);
+static constexpr VecB b26 = VecB(26);
+static constexpr VecB b27 = VecB(27);
+static constexpr VecB b28 = VecB(28);
+static constexpr VecB b29 = VecB(29);
+static constexpr VecB b30 = VecB(30);
+static constexpr VecB b31 = VecB(31);
+
+static constexpr VecH h0 = VecH(0);
+static constexpr VecH h1 = VecH(1);
+static constexpr VecH h2 = VecH(2);
+static constexpr VecH h3 = VecH(3);
+static constexpr VecH h4 = VecH(4);
+static constexpr VecH h5 = VecH(5);
+static constexpr VecH h6 = VecH(6);
+static constexpr VecH h7 = VecH(7);
+static constexpr VecH h8 = VecH(8);
+static constexpr VecH h9 = VecH(9);
+static constexpr VecH h10 = VecH(10);
+static constexpr VecH h11 = VecH(11);
+static constexpr VecH h12 = VecH(12);
+static constexpr VecH h13 = VecH(13);
+static constexpr VecH h14 = VecH(14);
+static constexpr VecH h15 = VecH(15);
+static constexpr VecH h16 = VecH(16);
+static constexpr VecH h17 = VecH(17);
+static constexpr VecH h18 = VecH(18);
+static constexpr VecH h19 = VecH(19);
+static constexpr VecH h20 = VecH(20);
+static constexpr VecH h21 = VecH(21);
+static constexpr VecH h22 = VecH(22);
+static constexpr VecH h23 = VecH(23);
+static constexpr VecH h24 = VecH(24);
+static constexpr VecH h25 = VecH(25);
+static constexpr VecH h26 = VecH(26);
+static constexpr VecH h27 = VecH(27);
+static constexpr VecH h28 = VecH(28);
+static constexpr VecH h29 = VecH(29);
+static constexpr VecH h30 = VecH(30);
+static constexpr VecH h31 = VecH(31);
+
+static constexpr VecS s0 = VecS(0);
+static constexpr VecS s1 = VecS(1);
+static constexpr VecS s2 = VecS(2);
+static constexpr VecS s3 = VecS(3);
+static constexpr VecS s4 = VecS(4);
+static constexpr VecS s5 = VecS(5);
+static constexpr VecS s6 = VecS(6);
+static constexpr VecS s7 = VecS(7);
+static constexpr VecS s8 = VecS(8);
+static constexpr VecS s9 = VecS(9);
+static constexpr VecS s10 = VecS(10);
+static constexpr VecS s11 = VecS(11);
+static constexpr VecS s12 = VecS(12);
+static constexpr VecS s13 = VecS(13);
+static constexpr VecS s14 = VecS(14);
+static constexpr VecS s15 = VecS(15);
+static constexpr VecS s16 = VecS(16);
+static constexpr VecS s17 = VecS(17);
+static constexpr VecS s18 = VecS(18);
+static constexpr VecS s19 = VecS(19);
+static constexpr VecS s20 = VecS(20);
+static constexpr VecS s21 = VecS(21);
+static constexpr VecS s22 = VecS(22);
+static constexpr VecS s23 = VecS(23);
+static constexpr VecS s24 = VecS(24);
+static constexpr VecS s25 = VecS(25);
+static constexpr VecS s26 = VecS(26);
+static constexpr VecS s27 = VecS(27);
+static constexpr VecS s28 = VecS(28);
+static constexpr VecS s29 = VecS(29);
+static constexpr VecS s30 = VecS(30);
+static constexpr VecS s31 = VecS(31);
+
+static constexpr VecD d0 = VecD(0);
+static constexpr VecD d1 = VecD(1);
+static constexpr VecD d2 = VecD(2);
+static constexpr VecD d3 = VecD(3);
+static constexpr VecD d4 = VecD(4);
+static constexpr VecD d5 = VecD(5);
+static constexpr VecD d6 = VecD(6);
+static constexpr VecD d7 = VecD(7);
+static constexpr VecD d8 = VecD(8);
+static constexpr VecD d9 = VecD(9);
+static constexpr VecD d10 = VecD(10);
+static constexpr VecD d11 = VecD(11);
+static constexpr VecD d12 = VecD(12);
+static constexpr VecD d13 = VecD(13);
+static constexpr VecD d14 = VecD(14);
+static constexpr VecD d15 = VecD(15);
+static constexpr VecD d16 = VecD(16);
+static constexpr VecD d17 = VecD(17);
+static constexpr VecD d18 = VecD(18);
+static constexpr VecD d19 = VecD(19);
+static constexpr VecD d20 = VecD(20);
+static constexpr VecD d21 = VecD(21);
+static constexpr VecD d22 = VecD(22);
+static constexpr VecD d23 = VecD(23);
+static constexpr VecD d24 = VecD(24);
+static constexpr VecD d25 = VecD(25);
+static constexpr VecD d26 = VecD(26);
+static constexpr VecD d27 = VecD(27);
+static constexpr VecD d28 = VecD(28);
+static constexpr VecD d29 = VecD(29);
+static constexpr VecD d30 = VecD(30);
+static constexpr VecD d31 = VecD(31);
+
+static constexpr VecV q0 = VecV(0);
+static constexpr VecV q1 = VecV(1);
+static constexpr VecV q2 = VecV(2);
+static constexpr VecV q3 = VecV(3);
+static constexpr VecV q4 = VecV(4);
+static constexpr VecV q5 = VecV(5);
+static constexpr VecV q6 = VecV(6);
+static constexpr VecV q7 = VecV(7);
+static constexpr VecV q8 = VecV(8);
+static constexpr VecV q9 = VecV(9);
+static constexpr VecV q10 = VecV(10);
+static constexpr VecV q11 = VecV(11);
+static constexpr VecV q12 = VecV(12);
+static constexpr VecV q13 = VecV(13);
+static constexpr VecV q14 = VecV(14);
+static constexpr VecV q15 = VecV(15);
+static constexpr VecV q16 = VecV(16);
+static constexpr VecV q17 = VecV(17);
+static constexpr VecV q18 = VecV(18);
+static constexpr VecV q19 = VecV(19);
+static constexpr VecV q20 = VecV(20);
+static constexpr VecV q21 = VecV(21);
+static constexpr VecV q22 = VecV(22);
+static constexpr VecV q23 = VecV(23);
+static constexpr VecV q24 = VecV(24);
+static constexpr VecV q25 = VecV(25);
+static constexpr VecV q26 = VecV(26);
+static constexpr VecV q27 = VecV(27);
+static constexpr VecV q28 = VecV(28);
+static constexpr VecV q29 = VecV(29);
+static constexpr VecV q30 = VecV(30);
+static constexpr VecV q31 = VecV(31);
+
+static constexpr VecV v0 = VecV(0);
+static constexpr VecV v1 = VecV(1);
+static constexpr VecV v2 = VecV(2);
+static constexpr VecV v3 = VecV(3);
+static constexpr VecV v4 = VecV(4);
+static constexpr VecV v5 = VecV(5);
+static constexpr VecV v6 = VecV(6);
+static constexpr VecV v7 = VecV(7);
+static constexpr VecV v8 = VecV(8);
+static constexpr VecV v9 = VecV(9);
+static constexpr VecV v10 = VecV(10);
+static constexpr VecV v11 = VecV(11);
+static constexpr VecV v12 = VecV(12);
+static constexpr VecV v13 = VecV(13);
+static constexpr VecV v14 = VecV(14);
+static constexpr VecV v15 = VecV(15);
+static constexpr VecV v16 = VecV(16);
+static constexpr VecV v17 = VecV(17);
+static constexpr VecV v18 = VecV(18);
+static constexpr VecV v19 = VecV(19);
+static constexpr VecV v20 = VecV(20);
+static constexpr VecV v21 = VecV(21);
+static constexpr VecV v22 = VecV(22);
+static constexpr VecV v23 = VecV(23);
+static constexpr VecV v24 = VecV(24);
+static constexpr VecV v25 = VecV(25);
+static constexpr VecV v26 = VecV(26);
+static constexpr VecV v27 = VecV(27);
+static constexpr VecV v28 = VecV(28);
+static constexpr VecV v29 = VecV(29);
+static constexpr VecV v30 = VecV(30);
+static constexpr VecV v31 = VecV(31);
+
+#ifndef _DOXYGEN
+} // {regs}
+
+// Make `a64::regs` accessible through `a64` namespace as well.
+using namespace regs;
+#endif
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64OPERAND_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64rapass.cpp b/lib/lepton/asmjit/arm/a64rapass.cpp
new file mode 100644
index 0000000000..aaec1c90f9
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64rapass.cpp
@@ -0,0 +1,852 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64) && !defined(ASMJIT_NO_COMPILER)
+
+#include "../core/cpuinfo.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../arm/a64assembler.h"
+#include "../arm/a64compiler.h"
+#include "../arm/a64emithelper_p.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64instdb_p.h"
+#include "../arm/a64rapass_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::ARMRAPass - Helpers
+// ========================
+
+// TODO: [ARM] These should be shared with all backends.
+ASMJIT_MAYBE_UNUSED
+static inline uint64_t raImmMaskFromSize(uint32_t size) noexcept {
+  ASMJIT_ASSERT(size > 0 && size < 256);
+  static const uint64_t masks[] = {
+    0x00000000000000FFu, //   1
+    0x000000000000FFFFu, //   2
+    0x00000000FFFFFFFFu, //   4
+    0xFFFFFFFFFFFFFFFFu, //   8
+    0x0000000000000000u, //  16
+    0x0000000000000000u, //  32
+    0x0000000000000000u, //  64
+    0x0000000000000000u, // 128
+    0x0000000000000000u  // 256
+  };
+  return masks[Support::ctz(size)];
+}
+
+static const RegMask raConsecutiveLeadCountToRegMaskFilter[5] = {
+  0xFFFFFFFFu, // [0] No consecutive.
+  0x00000000u, // [1] Invalid, never used.
+  0x7FFFFFFFu, // [2] 2 consecutive registers.
+  0x3FFFFFFFu, // [3] 3 consecutive registers.
+  0x1FFFFFFFu  // [4] 4 consecutive registers.
+};
+
+static inline RATiedFlags raUseOutFlagsFromRWFlags(OpRWFlags rwFlags) noexcept {
+  static constexpr RATiedFlags map[] = {
+    RATiedFlags::kNone,
+    RATiedFlags::kRead  | RATiedFlags::kUse, // kRead
+    RATiedFlags::kWrite | RATiedFlags::kOut, // kWrite
+    RATiedFlags::kRW    | RATiedFlags::kUse, // kRW
+  };
+
+  return map[uint32_t(rwFlags & OpRWFlags::kRW)];
+}
+
+static inline RATiedFlags raRegRwFlags(OpRWFlags flags) noexcept {
+  return raUseOutFlagsFromRWFlags(flags);
+}
+
+static inline RATiedFlags raMemBaseRwFlags(OpRWFlags flags) noexcept {
+  constexpr uint32_t shift = Support::ConstCTZ<uint32_t(OpRWFlags::kMemBaseRW)>::value;
+  return raUseOutFlagsFromRWFlags(OpRWFlags(uint32_t(flags) >> shift) & OpRWFlags::kRW);
+}
+
+static inline RATiedFlags raMemIndexRwFlags(OpRWFlags flags) noexcept {
+  constexpr uint32_t shift = Support::ConstCTZ<uint32_t(OpRWFlags::kMemIndexRW)>::value;
+  return raUseOutFlagsFromRWFlags(OpRWFlags(uint32_t(flags) >> shift) & OpRWFlags::kRW);
+}
+// a64::RACFGBuilder
+// =================
+
+class RACFGBuilder : public RACFGBuilderT<RACFGBuilder> {
+public:
+  Arch _arch;
+
+  inline RACFGBuilder(ARMRAPass* pass) noexcept
+    : RACFGBuilderT<RACFGBuilder>(pass),
+      _arch(pass->cc()->arch()) {}
+
+  inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cc); }
+
+  Error onInst(InstNode* inst, InstControlFlow& controlType, RAInstBuilder& ib) noexcept;
+
+  Error onBeforeInvoke(InvokeNode* invokeNode) noexcept;
+  Error onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept;
+
+  Error moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept;
+  Error moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept;
+  Error moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept;
+
+  Error onBeforeRet(FuncRetNode* funcRet) noexcept;
+  Error onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept;
+};
+
+// a64::RACFGBuilder - OnInst
+// ==========================
+
+// TODO: [ARM] This is just a workaround...
+static InstControlFlow getControlFlowType(InstId instId) noexcept {
+  switch (BaseInst::extractRealId(instId)) {
+    case Inst::kIdB:
+    case Inst::kIdBr:
+      if (BaseInst::extractARMCondCode(instId) == CondCode::kAL)
+        return InstControlFlow::kJump;
+      else
+        return InstControlFlow::kBranch;
+    case Inst::kIdBl:
+    case Inst::kIdBlr:
+      return InstControlFlow::kCall;
+    case Inst::kIdCbz:
+    case Inst::kIdCbnz:
+    case Inst::kIdTbz:
+    case Inst::kIdTbnz:
+      return InstControlFlow::kBranch;
+    case Inst::kIdRet:
+      return InstControlFlow::kReturn;
+    default:
+      return InstControlFlow::kRegular;
+  }
+}
+
+Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& controlType, RAInstBuilder& ib) noexcept {
+  InstRWInfo rwInfo;
+
+  if (Inst::isDefinedId(inst->realId())) {
+    InstId instId = inst->id();
+    uint32_t opCount = inst->opCount();
+    const Operand* opArray = inst->operands();
+    ASMJIT_PROPAGATE(InstInternal::queryRWInfo(_arch, inst->baseInst(), opArray, opCount, &rwInfo));
+
+    const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
+    uint32_t singleRegOps = 0;
+
+    ib.addInstRWFlags(rwInfo.instFlags());
+
+    if (opCount) {
+      uint32_t consecutiveOffset = 0xFFFFFFFFu;
+      uint32_t consecutiveParent = Globals::kInvalidId;
+
+      for (uint32_t i = 0; i < opCount; i++) {
+        const Operand& op = opArray[i];
+        const OpRWInfo& opRwInfo = rwInfo.operand(i);
+
+        if (op.isReg()) {
+          // Register Operand
+          // ----------------
+          const Reg& reg = op.as<Reg>();
+
+          RATiedFlags flags = raRegRwFlags(opRwInfo.opFlags());
+          uint32_t vIndex = Operand::virtIdToIndex(reg.id());
+
+          if (vIndex < Operand::kVirtIdCount) {
+            RAWorkReg* workReg;
+            ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+            // Use RW instead of Write in case that not the whole register is overwritten. This is important for
+            // liveness as we cannot kill a register that will be used.
+            if ((flags & RATiedFlags::kRW) == RATiedFlags::kWrite) {
+              if (workReg->regByteMask() & ~(opRwInfo.writeByteMask() | opRwInfo.extendByteMask())) {
+                // Not write-only operation.
+                flags = (flags & ~RATiedFlags::kOut) | (RATiedFlags::kRead | RATiedFlags::kUse);
+              }
+            }
+
+            RegGroup group = workReg->group();
+
+            RegMask useRegs = _pass->_availableRegs[group];
+            RegMask outRegs = useRegs;
+
+            uint32_t useId = BaseReg::kIdBad;
+            uint32_t outId = BaseReg::kIdBad;
+
+            uint32_t useRewriteMask = 0;
+            uint32_t outRewriteMask = 0;
+
+            if (opRwInfo.consecutiveLeadCount()) {
+              // There must be a single consecutive register lead, otherwise the RW data is invalid.
+              if (consecutiveOffset != 0xFFFFFFFFu)
+                return DebugUtils::errored(kErrorInvalidState);
+
+              // A consecutive lead register cannot be used as a consecutive +1/+2/+3 register, the registers must be distinct.
+              if (RATiedReg::consecutiveDataFromFlags(flags) != 0)
+                return DebugUtils::errored(kErrorNotConsecutiveRegs);
+
+              flags |= RATiedFlags::kLeadConsecutive | RATiedReg::consecutiveDataToFlags(opRwInfo.consecutiveLeadCount() - 1);
+              consecutiveOffset = 0;
+
+              RegMask filter = raConsecutiveLeadCountToRegMaskFilter[opRwInfo.consecutiveLeadCount()];
+              if (Support::test(flags, RATiedFlags::kUse)) {
+                flags |= RATiedFlags::kUseConsecutive;
+                useRegs &= filter;
+              }
+              else {
+                flags |= RATiedFlags::kOutConsecutive;
+                outRegs &= filter;
+              }
+            }
+
+            if (Support::test(flags, RATiedFlags::kUse)) {
+              useRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
+              if (opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
+                useId = opRwInfo.physId();
+                flags |= RATiedFlags::kUseFixed;
+              }
+              else if (opRwInfo.hasOpFlag(OpRWFlags::kConsecutive)) {
+                if (consecutiveOffset == 0xFFFFFFFFu)
+                  return DebugUtils::errored(kErrorInvalidState);
+                flags |= RATiedFlags::kUseConsecutive | RATiedReg::consecutiveDataToFlags(++consecutiveOffset);
+              }
+            }
+            else {
+              outRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
+              if (opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
+                outId = opRwInfo.physId();
+                flags |= RATiedFlags::kOutFixed;
+              }
+              else if (opRwInfo.hasOpFlag(OpRWFlags::kConsecutive)) {
+                if (consecutiveOffset == 0xFFFFFFFFu)
+                  return DebugUtils::errored(kErrorInvalidState);
+                flags |= RATiedFlags::kOutConsecutive | RATiedReg::consecutiveDataToFlags(++consecutiveOffset);
+              }
+            }
+
+            // Special cases regarding element access.
+            if (reg.as<Vec>().hasElementIndex()) {
+              // Only the first 0..15 registers can be used if the register uses
+              // element accessor that accesses half-words (h[0..7] elements).
+              if (instInfo.hasFlag(InstDB::kInstFlagVH0_15) && reg.as<Vec>().elementType() == Vec::kElementTypeH) {
+                if (Support::test(flags, RATiedFlags::kUse))
+                  useId &= 0x0000FFFFu;
+                else
+                  outId &= 0x0000FFFFu;
+              }
+            }
+
+            ASMJIT_PROPAGATE(ib.add(workReg, flags, useRegs, useId, useRewriteMask, outRegs, outId, outRewriteMask, opRwInfo.rmSize(), consecutiveParent));
+            if (singleRegOps == i)
+              singleRegOps++;
+
+            if (Support::test(flags, RATiedFlags::kLeadConsecutive | RATiedFlags::kUseConsecutive | RATiedFlags::kOutConsecutive))
+              consecutiveParent = workReg->workId();
+          }
+        }
+        else if (op.isMem()) {
+          // Memory Operand
+          // --------------
+          const Mem& mem = op.as<Mem>();
+
+          if (mem.isRegHome()) {
+            RAWorkReg* workReg;
+            ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(mem.baseId()), &workReg));
+            _pass->getOrCreateStackSlot(workReg);
+          }
+          else if (mem.hasBaseReg()) {
+            uint32_t vIndex = Operand::virtIdToIndex(mem.baseId());
+            if (vIndex < Operand::kVirtIdCount) {
+              RAWorkReg* workReg;
+              ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+              RATiedFlags flags = raMemBaseRwFlags(opRwInfo.opFlags());
+              RegGroup group = workReg->group();
+              RegMask allocable = _pass->_availableRegs[group];
+
+              // Base registers have never fixed id on ARM.
+              const uint32_t useId = BaseReg::kIdBad;
+              const uint32_t outId = BaseReg::kIdBad;
+
+              uint32_t useRewriteMask = 0;
+              uint32_t outRewriteMask = 0;
+
+              if (Support::test(flags, RATiedFlags::kUse))
+                useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
+              else
+                outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
+
+              ASMJIT_PROPAGATE(ib.add(workReg, flags, allocable, useId, useRewriteMask, allocable, outId, outRewriteMask));
+            }
+          }
+
+          if (mem.hasIndexReg()) {
+            uint32_t vIndex = Operand::virtIdToIndex(mem.indexId());
+            if (vIndex < Operand::kVirtIdCount) {
+              RAWorkReg* workReg;
+              ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+              RATiedFlags flags = raMemIndexRwFlags(opRwInfo.opFlags());
+              RegGroup group = workReg->group();
+              RegMask allocable = _pass->_availableRegs[group];
+
+              // Index registers have never fixed id on ARM.
+              const uint32_t useId = BaseReg::kIdBad;
+              const uint32_t outId = BaseReg::kIdBad;
+
+              uint32_t useRewriteMask = 0;
+              uint32_t outRewriteMask = 0;
+
+              if (Support::test(flags, RATiedFlags::kUse))
+                useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
+              else
+                outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
+
+              ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, allocable, useId, useRewriteMask, allocable, outId, outRewriteMask));
+            }
+          }
+        }
+      }
+    }
+
+    controlType = getControlFlowType(instId);
+  }
+
+  return kErrorOk;
+}
+
+// a64::RACFGBuilder - OnInvoke
+// ============================
+
+Error RACFGBuilder::onBeforeInvoke(InvokeNode* invokeNode) noexcept {
+  const FuncDetail& fd = invokeNode->detail();
+  uint32_t argCount = invokeNode->argCount();
+
+  cc()->_setCursor(invokeNode->prev());
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    const FuncValuePack& argPack = fd.argPack(argIndex);
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      if (!argPack[valueIndex])
+        break;
+
+      const FuncValue& arg = argPack[valueIndex];
+      const Operand& op = invokeNode->arg(argIndex, valueIndex);
+
+      if (op.isNone())
+        continue;
+
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (arg.isReg()) {
+          RegGroup regGroup = workReg->group();
+          RegGroup argGroup = Reg::groupOf(arg.regType());
+
+          if (regGroup != argGroup) {
+            // TODO: [ARM] Conversion is not supported.
+            return DebugUtils::errored(kErrorInvalidAssignment);
+          }
+        }
+        else {
+          ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg));
+        }
+      }
+      else if (op.isImm()) {
+        if (arg.isReg()) {
+          BaseReg reg;
+          ASMJIT_PROPAGATE(moveImmToRegArg(invokeNode, arg, op.as<Imm>(), &reg));
+          invokeNode->_args[argIndex][valueIndex] = reg;
+        }
+        else {
+          ASMJIT_PROPAGATE(moveImmToStackArg(invokeNode, arg, op.as<Imm>()));
+        }
+      }
+    }
+  }
+
+  cc()->_setCursor(invokeNode);
+
+  if (fd.hasRet()) {
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      const FuncValue& ret = fd.ret(valueIndex);
+      if (!ret)
+        break;
+
+      const Operand& op = invokeNode->ret(valueIndex);
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (ret.isReg()) {
+          RegGroup regGroup = workReg->group();
+          RegGroup retGroup = Reg::groupOf(ret.regType());
+
+          if (regGroup != retGroup) {
+            // TODO: [ARM] Conversion is not supported.
+            return DebugUtils::errored(kErrorInvalidAssignment);
+          }
+        }
+      }
+    }
+  }
+
+  // This block has function call(s).
+  _curBlock->addFlags(RABlockFlags::kHasFuncCalls);
+  _pass->func()->frame().addAttributes(FuncAttributes::kHasFuncCalls);
+  _pass->func()->frame().updateCallStackSize(fd.argStackSize());
+
+  return kErrorOk;
+}
+
+Error RACFGBuilder::onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept {
+  uint32_t argCount = invokeNode->argCount();
+  const FuncDetail& fd = invokeNode->detail();
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    const FuncValuePack& argPack = fd.argPack(argIndex);
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      if (!argPack[valueIndex])
+        continue;
+
+      const FuncValue& arg = argPack[valueIndex];
+      const Operand& op = invokeNode->arg(argIndex, valueIndex);
+
+      if (op.isNone())
+        continue;
+
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (arg.isIndirect()) {
+          RegGroup regGroup = workReg->group();
+          if (regGroup != RegGroup::kGp)
+            return DebugUtils::errored(kErrorInvalidState);
+          ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId()));
+        }
+        else if (arg.isReg()) {
+          RegGroup regGroup = workReg->group();
+          RegGroup argGroup = Reg::groupOf(arg.regType());
+
+          if (regGroup == argGroup) {
+            ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId()));
+          }
+        }
+      }
+    }
+  }
+
+  for (uint32_t retIndex = 0; retIndex < Globals::kMaxValuePack; retIndex++) {
+    const FuncValue& ret = fd.ret(retIndex);
+    if (!ret)
+      break;
+
+    const Operand& op = invokeNode->ret(retIndex);
+    if (op.isReg()) {
+      const Reg& reg = op.as<Reg>();
+      RAWorkReg* workReg;
+      ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+      if (ret.isReg()) {
+        RegGroup regGroup = workReg->group();
+        RegGroup retGroup = Reg::groupOf(ret.regType());
+
+        if (regGroup == retGroup) {
+          ASMJIT_PROPAGATE(ib.addCallRet(workReg, ret.regId()));
+        }
+      }
+      else {
+        return DebugUtils::errored(kErrorInvalidAssignment);
+      }
+    }
+  }
+
+  // Setup clobbered registers.
+  ib._clobbered[0] = Support::lsbMask<RegMask>(_pass->_physRegCount[RegGroup(0)]) & ~fd.preservedRegs(RegGroup(0));
+  ib._clobbered[1] = Support::lsbMask<RegMask>(_pass->_physRegCount[RegGroup(1)]) & ~fd.preservedRegs(RegGroup(1));
+  ib._clobbered[2] = Support::lsbMask<RegMask>(_pass->_physRegCount[RegGroup(2)]) & ~fd.preservedRegs(RegGroup(2));
+  ib._clobbered[3] = Support::lsbMask<RegMask>(_pass->_physRegCount[RegGroup(3)]) & ~fd.preservedRegs(RegGroup(3));
+
+  return kErrorOk;
+}
+
+// a64::RACFGBuilder - MoveImmToRegArg
+// ===================================
+
+Error RACFGBuilder::moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept {
+  DebugUtils::unused(invokeNode);
+  ASMJIT_ASSERT(arg.isReg());
+
+  Imm imm(imm_);
+  TypeId typeId = TypeId::kVoid;
+
+  switch (arg.typeId()) {
+    case TypeId::kInt8  : typeId = TypeId::kUInt64; imm.signExtend8Bits(); break;
+    case TypeId::kUInt8 : typeId = TypeId::kUInt64; imm.zeroExtend8Bits(); break;
+    case TypeId::kInt16 : typeId = TypeId::kUInt64; imm.signExtend16Bits(); break;
+    case TypeId::kUInt16: typeId = TypeId::kUInt64; imm.zeroExtend16Bits(); break;
+    case TypeId::kInt32 : typeId = TypeId::kUInt64; imm.signExtend32Bits(); break;
+    case TypeId::kUInt32: typeId = TypeId::kUInt64; imm.zeroExtend32Bits(); break;
+    case TypeId::kInt64 : typeId = TypeId::kUInt64; break;
+    case TypeId::kUInt64: typeId = TypeId::kUInt64; break;
+
+    default:
+      return DebugUtils::errored(kErrorInvalidAssignment);
+  }
+
+  ASMJIT_PROPAGATE(cc()->_newReg(out, typeId, nullptr));
+  cc()->virtRegById(out->id())->setWeight(BaseRAPass::kCallArgWeight);
+  return cc()->mov(out->as<Gp>(), imm);
+}
+
+// a64::RACFGBuilder - MoveImmToStackArg
+// =====================================
+
+Error RACFGBuilder::moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept {
+  BaseReg reg;
+
+  ASMJIT_PROPAGATE(moveImmToRegArg(invokeNode, arg, imm_, &reg));
+  ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg));
+
+  return kErrorOk;
+}
+
+// a64::RACFGBuilder - MoveRegToStackArg
+// =====================================
+
+Error RACFGBuilder::moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept {
+  DebugUtils::unused(invokeNode);
+  Mem stackPtr = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
+
+  if (reg.isGp())
+    return cc()->str(reg.as<Gp>(), stackPtr);
+
+  if (reg.isVec())
+    return cc()->str(reg.as<Vec>(), stackPtr);
+
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+// a64::RACFGBuilder - OnReg
+// =========================
+
+Error RACFGBuilder::onBeforeRet(FuncRetNode* funcRet) noexcept {
+  DebugUtils::unused(funcRet);
+  return kErrorOk;
+}
+
+Error RACFGBuilder::onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept {
+  const FuncDetail& funcDetail = _pass->func()->detail();
+  const Operand* opArray = funcRet->operands();
+  uint32_t opCount = funcRet->opCount();
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand& op = opArray[i];
+    if (op.isNone()) continue;
+
+    const FuncValue& ret = funcDetail.ret(i);
+    if (ASMJIT_UNLIKELY(!ret.isReg()))
+      return DebugUtils::errored(kErrorInvalidAssignment);
+
+    if (op.isReg()) {
+      // Register return value.
+      const Reg& reg = op.as<Reg>();
+      uint32_t vIndex = Operand::virtIdToIndex(reg.id());
+
+      if (vIndex < Operand::kVirtIdCount) {
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+        RegGroup group = workReg->group();
+        RegMask allocable = _pass->_availableRegs[group];
+        ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, allocable, ret.regId(), 0, 0, BaseReg::kIdBad, 0));
+      }
+    }
+    else {
+      return DebugUtils::errored(kErrorInvalidAssignment);
+    }
+  }
+
+  return kErrorOk;
+}
+
+// a64::ARMRAPass - Construction & Destruction
+// ===========================================
+
+ARMRAPass::ARMRAPass() noexcept
+  : BaseRAPass() { _iEmitHelper = &_emitHelper; }
+ARMRAPass::~ARMRAPass() noexcept {}
+
+// a64::ARMRAPass - OnInit / OnDone
+// ================================
+
+void ARMRAPass::onInit() noexcept {
+  Arch arch = cc()->arch();
+
+  _emitHelper._emitter = _cb;
+
+  _archTraits = &ArchTraits::byArch(arch);
+  _physRegCount.set(RegGroup::kGp, 32);
+  _physRegCount.set(RegGroup::kVec, 32);
+  _physRegCount.set(RegGroup::kExtraVirt2, 0);
+  _physRegCount.set(RegGroup::kExtraVirt3, 0);
+  _buildPhysIndex();
+
+  _availableRegCount = _physRegCount;
+  _availableRegs[RegGroup::kGp] = Support::lsbMask<uint32_t>(_physRegCount.get(RegGroup::kGp));
+  _availableRegs[RegGroup::kVec] = Support::lsbMask<uint32_t>(_physRegCount.get(RegGroup::kVec));
+  _availableRegs[RegGroup::kExtraVirt3] = Support::lsbMask<uint32_t>(_physRegCount.get(RegGroup::kExtraVirt2));
+  _availableRegs[RegGroup::kExtraVirt3] = Support::lsbMask<uint32_t>(_physRegCount.get(RegGroup::kExtraVirt3));
+
+  _scratchRegIndexes[0] = uint8_t(27);
+  _scratchRegIndexes[1] = uint8_t(28);
+
+  // The architecture specific setup makes implicitly all registers available. So
+  // make unavailable all registers that are special and cannot be used in general.
+  bool hasFP = _func->frame().hasPreservedFP();
+
+  if (hasFP)
+    makeUnavailable(RegGroup::kGp, Gp::kIdFp);
+
+  makeUnavailable(RegGroup::kGp, Gp::kIdSp);
+  makeUnavailable(RegGroup::kGp, Gp::kIdOs); // OS-specific use, usually TLS.
+
+  _sp = sp;
+  _fp = x29;
+}
+
+void ARMRAPass::onDone() noexcept {}
+
+// a64::ARMRAPass - BuildCFG
+// =========================
+
+Error ARMRAPass::buildCFG() noexcept {
+  return RACFGBuilder(this).run();
+}
+
+// a64::ARMRAPass - Rewrite
+// ========================
+
+ASMJIT_FAVOR_SPEED Error ARMRAPass::_rewrite(BaseNode* first, BaseNode* stop) noexcept {
+  uint32_t virtCount = cc()->_vRegArray.size();
+
+  BaseNode* node = first;
+  while (node != stop) {
+    BaseNode* next = node->next();
+    if (node->isInst()) {
+      InstNode* inst = node->as<InstNode>();
+      RAInst* raInst = node->passData<RAInst>();
+
+      Operand* operands = inst->operands();
+      uint32_t opCount = inst->opCount();
+
+      uint32_t i;
+
+      // Rewrite virtual registers into physical registers.
+      if (raInst) {
+        // If the instruction contains pass data (raInst) then it was a subject
+        // for register allocation and must be rewritten to use physical regs.
+        RATiedReg* tiedRegs = raInst->tiedRegs();
+        uint32_t tiedCount = raInst->tiedCount();
+
+        for (i = 0; i < tiedCount; i++) {
+          RATiedReg* tiedReg = &tiedRegs[i];
+
+          Support::BitWordIterator<uint32_t> useIt(tiedReg->useRewriteMask());
+          uint32_t useId = tiedReg->useId();
+          while (useIt.hasNext())
+            inst->rewriteIdAtIndex(useIt.next(), useId);
+
+          Support::BitWordIterator<uint32_t> outIt(tiedReg->outRewriteMask());
+          uint32_t outId = tiedReg->outId();
+          while (outIt.hasNext())
+            inst->rewriteIdAtIndex(outIt.next(), outId);
+        }
+
+        // This data is allocated by Zone passed to `runOnFunction()`, which
+        // will be reset after the RA pass finishes. So reset this data to
+        // prevent having a dead pointer after the RA pass is complete.
+        node->resetPassData();
+
+        if (ASMJIT_UNLIKELY(node->type() != NodeType::kInst)) {
+          // FuncRet terminates the flow, it must either be removed if the exit
+          // label is next to it (optimization) or patched to an architecture
+          // dependent jump instruction that jumps to the function's exit before
+          // the epilog.
+          if (node->type() == NodeType::kFuncRet) {
+            RABlock* block = raInst->block();
+            if (!isNextTo(node, _func->exitNode())) {
+              cc()->_setCursor(node->prev());
+              ASMJIT_PROPAGATE(emitJump(_func->exitNode()->label()));
+            }
+
+            BaseNode* prev = node->prev();
+            cc()->removeNode(node);
+            block->setLast(prev);
+          }
+        }
+      }
+
+      // Rewrite stack slot addresses.
+      for (i = 0; i < opCount; i++) {
+        Operand& op = operands[i];
+        if (op.isMem()) {
+          BaseMem& mem = op.as<BaseMem>();
+          if (mem.isRegHome()) {
+            uint32_t virtIndex = Operand::virtIdToIndex(mem.baseId());
+            if (ASMJIT_UNLIKELY(virtIndex >= virtCount))
+              return DebugUtils::errored(kErrorInvalidVirtId);
+
+            VirtReg* virtReg = cc()->virtRegByIndex(virtIndex);
+            RAWorkReg* workReg = virtReg->workReg();
+            ASMJIT_ASSERT(workReg != nullptr);
+
+            RAStackSlot* slot = workReg->stackSlot();
+            int32_t offset = slot->offset();
+
+            mem._setBase(_sp.type(), slot->baseRegId());
+            mem.clearRegHome();
+            mem.addOffsetLo32(offset);
+          }
+        }
+      }
+
+      // Rewrite `loadAddressOf()` construct.
+      if (inst->realId() == Inst::kIdAdr && inst->opCount() == 2 && inst->op(1).isMem()) {
+        BaseMem mem = inst->op(1).as<BaseMem>();
+        int64_t offset = mem.offset();
+
+        if (!mem.hasBaseOrIndex()) {
+          inst->setId(Inst::kIdMov);
+          inst->setOp(1, Imm(offset));
+        }
+        else {
+          if (mem.hasIndex())
+            return DebugUtils::errored(kErrorInvalidAddressIndex);
+
+          GpX dst(inst->op(0).as<Gp>().id());
+          GpX base(mem.baseId());
+
+          InstId arithInstId = offset < 0 ? Inst::kIdSub : Inst::kIdAdd;
+          uint64_t absOffset = offset < 0 ? Support::neg(uint64_t(offset)) : uint64_t(offset);
+
+          inst->setId(arithInstId);
+          inst->setOpCount(3);
+          inst->setOp(1, base);
+          inst->setOp(2, Imm(absOffset));
+
+          // Use two operations if the offset cannot be encoded with ADD/SUB.
+          if (absOffset > 0xFFFu && (absOffset & ~uint64_t(0xFFF000u)) != 0) {
+            if (absOffset <= 0xFFFFFFu) {
+              cc()->_setCursor(inst->prev());
+              ASMJIT_PROPAGATE(cc()->emit(arithInstId, dst, base, Imm(absOffset & 0xFFFu)));
+
+              inst->setOp(1, dst);
+              inst->setOp(2, Imm(absOffset & 0xFFF000u));
+            }
+            else {
+              cc()->_setCursor(inst->prev());
+              ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, inst->op(0), Imm(absOffset)));
+
+              inst->setOp(1, base);
+              inst->setOp(2, dst);
+            }
+          }
+        }
+      }
+    }
+
+    node = next;
+  }
+
+  return kErrorOk;
+}
+
+// a64::ARMRAPass - Prolog & Epilog
+// ================================
+
+Error ARMRAPass::updateStackFrame() noexcept {
+  if (_func->frame().hasFuncCalls())
+    _func->frame().addDirtyRegs(RegGroup::kGp, Support::bitMask(Gp::kIdLr));
+
+  return BaseRAPass::updateStackFrame();
+}
+
+// a64::ARMRAPass - OnEmit
+// =======================
+
+Error ARMRAPass::emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseReg dst(wReg->signature(), dstPhysId);
+  BaseReg src(wReg->signature(), srcPhysId);
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<MOVE> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dst, src, wReg->typeId(), comment);
+}
+
+Error ARMRAPass::emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept {
+  DebugUtils::unused(aWorkId, aPhysId, bWorkId, bPhysId);
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+Error ARMRAPass::emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseReg dstReg(wReg->signature(), dstPhysId);
+  BaseMem srcMem(workRegAsMem(wReg));
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<LOAD> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dstReg, srcMem, wReg->typeId(), comment);
+}
+
+Error ARMRAPass::emitSave(uint32_t workId, uint32_t srcPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseMem dstMem(workRegAsMem(wReg));
+  BaseReg srcReg(wReg->signature(), srcPhysId);
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<SAVE> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dstMem, srcReg, wReg->typeId(), comment);
+}
+
+Error ARMRAPass::emitJump(const Label& label) noexcept {
+  return cc()->b(label);
+}
+
+Error ARMRAPass::emitPreCall(InvokeNode* invokeNode) noexcept {
+  DebugUtils::unused(invokeNode);
+  return kErrorOk;
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64 && !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/arm/a64rapass_p.h b/lib/lepton/asmjit/arm/a64rapass_p.h
new file mode 100644
index 0000000000..e1a90ab8df
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64rapass_p.h
@@ -0,0 +1,105 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64RAPASS_P_H_INCLUDED
+#define ASMJIT_ARM_A64RAPASS_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/rabuilders_p.h"
+#include "../core/rapass_p.h"
+#include "../arm/a64assembler.h"
+#include "../arm/a64compiler.h"
+#include "../arm/a64emithelper_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+//! ARM register allocation pass.
+//!
+//! Takes care of generating function prologs and epilogs, and also performs
+//! register allocation.
+class ARMRAPass : public BaseRAPass {
+public:
+  ASMJIT_NONCOPYABLE(ARMRAPass)
+  typedef BaseRAPass Base;
+
+  EmitHelper _emitHelper;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ARMRAPass() noexcept;
+  virtual ~ARMRAPass() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the compiler casted to `arm::Compiler`.
+  inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cb); }
+
+  //! Returns emit helper.
+  inline EmitHelper* emitHelper() noexcept { return &_emitHelper; }
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  void onInit() noexcept override;
+  void onDone() noexcept override;
+
+  //! \}
+
+  //! \name CFG
+  //! \{
+
+  Error buildCFG() noexcept override;
+
+  //! \}
+
+  //! \name Rewrite
+  //! \{
+
+  Error _rewrite(BaseNode* first, BaseNode* stop) noexcept override;
+
+  //! \}
+
+  //! \name Prolog & Epilog
+  //! \{
+
+  Error updateStackFrame() noexcept override;
+
+  //! \}
+
+  //! \name Emit Helpers
+  //! \{
+
+  Error emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept override;
+  Error emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept override;
+
+  Error emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept override;
+  Error emitSave(uint32_t workId, uint32_t srcPhysId) noexcept override;
+
+  Error emitJump(const Label& label) noexcept override;
+  Error emitPreCall(InvokeNode* invokeNode) noexcept override;
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_ARM_A64RAPASS_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/a64utils.h b/lib/lepton/asmjit/arm/a64utils.h
new file mode 100644
index 0000000000..4a88ca5172
--- /dev/null
+++ b/lib/lepton/asmjit/arm/a64utils.h
@@ -0,0 +1,179 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64UTILS_H_INCLUDED
+#define ASMJIT_ARM_A64UTILS_H_INCLUDED
+
+#include "../arm/a64globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! Public utilities and helpers for targeting AArch64 architecture.
+namespace Utils {
+
+//! Decomposed fields of a logical immediate value (AArch64).
+struct LogicalImm {
+  uint32_t n;
+  uint32_t s;
+  uint32_t r;
+};
+
+//! Encodes the given `imm` value of the given `width` to a logical immediate value represented as N, S, and R fields
+//! and writes these fields to `out`.
+//!
+//! Encoding Table:
+//!
+//! ```
+//! +---+--------+--------+------+
+//! | N |  ImmS  |  ImmR  | Size |
+//! +---+--------+--------+------+
+//! | 1 | ssssss | rrrrrr |  64  |
+//! | 0 | 0sssss | .rrrrr |  32  |
+//! | 0 | 10ssss | ..rrrr |  16  |
+//! | 0 | 110sss | ...rrr |  8   |
+//! | 0 | 1110ss | ....rr |  4   |
+//! | 0 | 11110s | .....r |  2   |
+//! +---+--------+--------+------+
+//! ```
+ASMJIT_MAYBE_UNUSED
+static bool encodeLogicalImm(uint64_t imm, uint32_t width, a64::Utils::LogicalImm* out) noexcept {
+  // Determine the element width, which must be 2, 4, 8, 16, 32, or 64 bits.
+  do {
+    width /= 2;
+    uint64_t mask = (uint64_t(1) << width) - 1u;
+    if ((imm & mask) != ((imm >> width) & mask)) {
+      width *= 2;
+      break;
+    }
+  } while (width > 2);
+
+  // Patterns of all zeros and all ones are not encodable.
+  uint64_t lsbMask = Support::lsbMask<uint64_t>(width);
+  imm &= lsbMask;
+
+  if (imm == 0 || imm == lsbMask)
+    return false;
+
+  // Inspect the pattern and get the most important bit indexes.
+  //
+  //         oIndex <-+      +-> zIndex
+  //                  |      |
+  // |..zeros..|oCount|zCount|..ones..|
+  // |000000000|111111|000000|11111111|
+
+  uint32_t zIndex = Support::ctz(~imm);
+  uint64_t zImm = imm ^ ((uint64_t(1) << zIndex) - 1);
+  uint32_t zCount = (zImm ? Support::ctz(zImm) : width) - zIndex;
+
+  uint32_t oIndex = zIndex + zCount;
+  uint64_t oImm = ~(zImm ^ Support::lsbMask<uint64_t>(oIndex));
+  uint32_t oCount = (oImm ? Support::ctz(oImm) : width) - (oIndex);
+
+  // Verify whether the bit-pattern is encodable.
+  uint64_t mustBeZero = oImm ^ ~Support::lsbMask<uint64_t>(oIndex + oCount);
+  if (mustBeZero != 0 || (zIndex > 0 && width - (oIndex + oCount) != 0))
+    return false;
+
+  out->n = width == 64;
+  out->s = (oCount + zIndex - 1) | (Support::neg(width * 2) & 0x3F);
+  out->r = width - oIndex;
+  return true;
+}
+
+//! Returns true if the given `imm` value is encodable as a logical immediate. The `width` argument describes the
+//! width of the operation, and must be either 32 or 64. This function can be used to test whether an immediate
+//! value can be used with AND, ANDS, BIC, BICS, EON, EOR, ORN, and ORR instruction.
+ASMJIT_MAYBE_UNUSED
+static inline bool isLogicalImm(uint64_t imm, uint32_t width) noexcept {
+  LogicalImm dummy;
+  return encodeLogicalImm(imm, width, &dummy);
+}
+
+//! Returns true if the given `imm` value is a byte mask. Byte mask has each byte part of the value set to either
+//! 0x00 or 0xFF. Some ARM instructions accept immediates that form a byte-mask and this function can be used to
+//! verify that the immediate is encodable before using the value.
+template<typename T>
+static inline bool isByteMaskImm8(const T& imm) noexcept {
+  constexpr T kMask = T(0x0101010101010101 & Support::allOnes<T>());
+  return imm == (imm & kMask) * T(255);
+}
+
+//! \cond
+//! A generic implementation that checjs whether a floating point value can be converted to ARM Imm8.
+template<typename T, uint32_t kNumBBits, uint32_t kNumCDEFGHBits, uint32_t kNumZeroBits>
+static inline bool isFPImm8Generic(T val) noexcept {
+  constexpr uint32_t kAllBsMask = Support::lsbMask<uint32_t>(kNumBBits);
+  constexpr uint32_t kB0Pattern = Support::bitMask(kNumBBits - 1);
+  constexpr uint32_t kB1Pattern = kAllBsMask ^ kB0Pattern;
+
+  T immZ = val & Support::lsbMask<T>(kNumZeroBits);
+  uint32_t immB = uint32_t(val >> (kNumZeroBits + kNumCDEFGHBits)) & kAllBsMask;
+
+  // ImmZ must be all zeros and ImmB must either be B0 or B1 pattern.
+  return immZ == 0 && (immB == kB0Pattern || immB == kB1Pattern);
+}
+//! \endcond
+
+//! Returns true if the given half precision floating point `val` can be encoded as ARM IMM8 value, which represents
+//! a limited set of floating point immediate values, which can be used with FMOV instruction.
+//!
+//! The floating point must have bits distributed in the following way:
+//!
+//! ```
+//! [aBbbcdef|gh000000]
+//! ```
+static inline bool isFP16Imm8(uint32_t val) noexcept { return isFPImm8Generic<uint32_t, 3, 6, 6>(val); }
+
+//! Returns true if the given single precision floating point `val` can be encoded as ARM IMM8 value, which represents
+//! a limited set of floating point immediate values, which can be used with FMOV instruction.
+//!
+//! The floating point must have bits distributed in the following way:
+//!
+//! ```
+//! [aBbbbbbc|defgh000|00000000|00000000]
+//! ```
+static inline bool isFP32Imm8(uint32_t val) noexcept { return isFPImm8Generic<uint32_t, 6, 6, 19>(val); }
+//! \overload
+static inline bool isFP32Imm8(float val) noexcept { return isFP32Imm8(Support::bitCast<uint32_t>(val)); }
+
+//! Returns true if the given double precision floating point `val` can be encoded as ARM IMM8 value, which represents
+//! a limited set of floating point immediate values, which can be used with FMOV instruction.
+//!
+//! The floating point must have bits distributed in the following way:
+//!
+//! ```
+//! [aBbbbbbb|bbcdefgh|00000000|00000000|00000000|00000000|00000000|00000000]
+//! ```
+static inline bool isFP64Imm8(uint64_t val) noexcept { return isFPImm8Generic<uint64_t, 9, 6, 48>(val); }
+//! \overload
+static inline bool isFP64Imm8(double val) noexcept { return isFP64Imm8(Support::bitCast<uint64_t>(val)); }
+
+//! \cond
+template<typename T, uint32_t kNumBBits, uint32_t kNumCDEFGHBits, uint32_t kNumZeroBits>
+static inline uint32_t encodeFPToImm8Generic(T val) noexcept {
+  uint32_t bits = uint32_t(val >> kNumZeroBits);
+  return ((bits >> (kNumBBits + kNumCDEFGHBits - 7)) & 0x80u) | (bits & 0x7F);
+}
+//! \endcond
+
+//! Encodes a double precision floating point value into IMM8 format.
+//!
+//! \note This function expects that `isFP64Imm8(val) == true` so it doesn't perform any checks of the value and just
+//! rearranges some bits into Imm8 order.
+static inline uint32_t encodeFP64ToImm8(uint64_t val) noexcept { return encodeFPToImm8Generic<uint64_t, 9, 6, 48>(val); }
+//! \overload
+static inline uint32_t encodeFP64ToImm8(double val) noexcept { return encodeFP64ToImm8(Support::bitCast<uint64_t>(val)); }
+
+} // {Utils}
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64UTILS_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/arm/armformatter.cpp b/lib/lepton/asmjit/arm/armformatter.cpp
new file mode 100644
index 0000000000..0432043106
--- /dev/null
+++ b/lib/lepton/asmjit/arm/armformatter.cpp
@@ -0,0 +1,143 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../arm/armformatter_p.h"
+#include "../arm/armoperand.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64instdb_p.h"
+
+#ifndef ASMJIT_NO_COMPILER
+  #include "../core/compiler.h"
+#endif
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+// arm::FormatterInternal - Format Feature
+// =======================================
+
+Error FormatterInternal::formatFeature(String& sb, uint32_t featureId) noexcept {
+  // @EnumStringBegin{"enum": "CpuFeatures::ARM", "output": "sFeature", "strip": "k"}@
+  static const char sFeatureString[] =
+    "None\0"
+    "THUMB\0"
+    "THUMBv2\0"
+    "ARMv6\0"
+    "ARMv7\0"
+    "ARMv8a\0"
+    "ARMv8_1a\0"
+    "ARMv8_2a\0"
+    "ARMv8_3a\0"
+    "ARMv8_4a\0"
+    "ARMv8_5a\0"
+    "ARMv8_6a\0"
+    "ARMv8_7a\0"
+    "VFPv2\0"
+    "VFPv3\0"
+    "VFPv4\0"
+    "VFP_D32\0"
+    "AES\0"
+    "ALTNZCV\0"
+    "ASIMD\0"
+    "BF16\0"
+    "BTI\0"
+    "CPUID\0"
+    "CRC32\0"
+    "DGH\0"
+    "DIT\0"
+    "DOTPROD\0"
+    "EDSP\0"
+    "FCMA\0"
+    "FJCVTZS\0"
+    "FLAGM\0"
+    "FP16CONV\0"
+    "FP16FML\0"
+    "FP16FULL\0"
+    "FRINT\0"
+    "I8MM\0"
+    "IDIVA\0"
+    "IDIVT\0"
+    "LSE\0"
+    "MTE\0"
+    "RCPC_IMMO\0"
+    "RDM\0"
+    "PMU\0"
+    "PMULL\0"
+    "RNG\0"
+    "SB\0"
+    "SHA1\0"
+    "SHA2\0"
+    "SHA3\0"
+    "SHA512\0"
+    "SM3\0"
+    "SM4\0"
+    "SSBS\0"
+    "SVE\0"
+    "SVE_BF16\0"
+    "SVE_F32MM\0"
+    "SVE_F64MM\0"
+    "SVE_I8MM\0"
+    "SVE_PMULL\0"
+    "SVE2\0"
+    "SVE2_AES\0"
+    "SVE2_BITPERM\0"
+    "SVE2_SHA3\0"
+    "SVE2_SM4\0"
+    "TME\0"
+    "<Unknown>\0";
+
+  static const uint16_t sFeatureIndex[] = {
+    0, 5, 11, 19, 25, 31, 38, 47, 56, 65, 74, 83, 92, 101, 107, 113, 119, 127,
+    131, 139, 145, 150, 154, 160, 166, 170, 174, 182, 187, 192, 200, 206, 215,
+    223, 232, 238, 243, 249, 255, 259, 263, 273, 277, 281, 287, 291, 294, 299,
+    304, 309, 316, 320, 324, 329, 333, 342, 352, 362, 371, 381, 386, 395, 408,
+    418, 427, 431
+  };
+  // @EnumStringEnd@
+
+  return sb.append(sFeatureString + sFeatureIndex[Support::min<uint32_t>(featureId, uint32_t(CpuFeatures::ARM::kMaxValue) + 1)]);
+}
+
+// arm::FormatterInternal - Format Constants
+// =========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatCondCode(String& sb, CondCode cc) noexcept {
+  static const char condCodeData[] =
+    "al\0" "na\0"
+    "eq\0" "ne\0"
+    "cs\0" "cc\0" "mi\0" "pl\0" "vs\0" "vc\0"
+    "hi\0" "ls\0" "ge\0" "lt\0" "gt\0" "le\0"
+    "<Unknown>";
+  return sb.append(condCodeData + Support::min<uint32_t>(uint32_t(cc), 16u) * 3);
+}
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatShiftOp(String& sb, ShiftOp shiftOp) noexcept {
+  const char* str = "<Unknown>";
+  switch (shiftOp) {
+    case ShiftOp::kLSL: str = "lsl"; break;
+    case ShiftOp::kLSR: str = "lsr"; break;
+    case ShiftOp::kASR: str = "asr"; break;
+    case ShiftOp::kROR: str = "ror"; break;
+    case ShiftOp::kRRX: str = "rrx"; break;
+    case ShiftOp::kMSL: str = "msl"; break;
+    case ShiftOp::kUXTB: str = "uxtb"; break;
+    case ShiftOp::kUXTH: str = "uxth"; break;
+    case ShiftOp::kUXTW: str = "uxtw"; break;
+    case ShiftOp::kUXTX: str = "uxtx"; break;
+    case ShiftOp::kSXTB: str = "sxtb"; break;
+    case ShiftOp::kSXTH: str = "sxth"; break;
+    case ShiftOp::kSXTW: str = "sxtw"; break;
+    case ShiftOp::kSXTX: str = "sxtx"; break;
+  }
+  return sb.append(str);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
diff --git a/lib/lepton/asmjit/arm/armformatter_p.h b/lib/lepton/asmjit/arm/armformatter_p.h
new file mode 100644
index 0000000000..582173054a
--- /dev/null
+++ b/lib/lepton/asmjit/arm/armformatter_p.h
@@ -0,0 +1,44 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMFORMATTER_P_H_INCLUDED
+#define ASMJIT_ARM_ARMFORMATTER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/formatter.h"
+#include "../core/string.h"
+#include "../arm/armglobals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_arm
+//! \{
+
+namespace FormatterInternal {
+
+Error ASMJIT_CDECL formatFeature(
+  String& sb,
+  uint32_t featureId) noexcept;
+
+Error ASMJIT_CDECL formatCondCode(
+  String& sb,
+  CondCode cc) noexcept;
+
+Error ASMJIT_CDECL formatShiftOp(
+  String& sb,
+  ShiftOp shiftOp) noexcept;
+
+} // {FormatterInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
+#endif // ASMJIT_ARM_ARMFORMATTER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/armglobals.h b/lib/lepton/asmjit/arm/armglobals.h
new file mode 100644
index 0000000000..506646f806
--- /dev/null
+++ b/lib/lepton/asmjit/arm/armglobals.h
@@ -0,0 +1,21 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMGLOBALS_H_INCLUDED
+#define ASMJIT_ARM_ARMGLOBALS_H_INCLUDED
+
+#include "../core/archcommons.h"
+#include "../core/inst.h"
+
+//! \namespace asmjit::arm
+//! \ingroup asmjit_arm
+//!
+//! API shared between AArch32 & AArch64 backends.
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_ARMGLOBALS_H_INCLUDED
diff --git a/lib/lepton/asmjit/arm/armoperand.h b/lib/lepton/asmjit/arm/armoperand.h
new file mode 100644
index 0000000000..ede829d9c1
--- /dev/null
+++ b/lib/lepton/asmjit/arm/armoperand.h
@@ -0,0 +1,621 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMOPERAND_H_INCLUDED
+#define ASMJIT_ARM_ARMOPERAND_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+#include "../arm/armglobals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+//! \addtogroup asmjit_arm
+//! \{
+
+class Reg;
+class Mem;
+
+class Gp;
+class GpW;
+class GpX;
+
+class Vec;
+class VecB;
+class VecH;
+class VecS;
+class VecD;
+class VecV;
+
+//! Register traits (ARM/AArch64).
+//!
+//! Register traits contains information about a particular register type. It's used by asmjit to setup register
+//! information on-the-fly and to populate tables that contain register information (this way it's possible to
+//! change register types and groups without having to reorder these tables).
+template<RegType kRegType>
+struct RegTraits : public BaseRegTraits {};
+
+//! \cond
+// <--------------------+-----+-------------------------+------------------------+---+---+------------------+
+//                      | Reg |        Reg-Type         |        Reg-Group       |Sz |Cnt|      TypeId      |
+// <--------------------+-----+-------------------------+------------------------+---+---+------------------+
+ASMJIT_DEFINE_REG_TRAITS(GpW  , RegType::kARM_GpW       , RegGroup::kGp          , 4 , 32, TypeId::kInt32   );
+ASMJIT_DEFINE_REG_TRAITS(GpX  , RegType::kARM_GpX       , RegGroup::kGp          , 8 , 32, TypeId::kInt64   );
+ASMJIT_DEFINE_REG_TRAITS(VecB , RegType::kARM_VecB      , RegGroup::kVec         , 1 , 32, TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(VecH , RegType::kARM_VecH      , RegGroup::kVec         , 2 , 32, TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(VecS , RegType::kARM_VecS      , RegGroup::kVec         , 4 , 32, TypeId::kInt32x1 );
+ASMJIT_DEFINE_REG_TRAITS(VecD , RegType::kARM_VecD      , RegGroup::kVec         , 8 , 32, TypeId::kInt32x2 );
+ASMJIT_DEFINE_REG_TRAITS(VecV , RegType::kARM_VecV      , RegGroup::kVec         , 16, 32, TypeId::kInt32x4 );
+//! \endcond
+
+//! Register (ARM).
+class Reg : public BaseReg {
+public:
+  ASMJIT_DEFINE_ABSTRACT_REG(Reg, BaseReg)
+
+  //! Gets whether the register is a `R|W` register (32-bit).
+  inline constexpr bool isGpW() const noexcept { return baseSignature() == RegTraits<RegType::kARM_GpW>::kSignature; }
+  //! Gets whether the register is an `X` register (64-bit).
+  inline constexpr bool isGpX() const noexcept { return baseSignature() == RegTraits<RegType::kARM_GpX>::kSignature; }
+  //! Gets whether the register is a VEC-B register (8-bit).
+  inline constexpr bool isVecB() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecB>::kSignature; }
+  //! Gets whether the register is a VEC-H register (16-bit).
+  inline constexpr bool isVecH() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecH>::kSignature; }
+  //! Gets whether the register is a VEC-S register (32-bit).
+  inline constexpr bool isVecS() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecS>::kSignature; }
+  //! Gets whether the register is a VEC-D register (64-bit).
+  inline constexpr bool isVecD() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecD>::kSignature; }
+  //! Gets whether the register is a VEC-Q register (128-bit).
+  inline constexpr bool isVecQ() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecV>::kSignature; }
+
+  //! Gets whether the register is either VEC-D (64-bit) or VEC-Q (128-bit).
+  inline constexpr bool isVecDOrQ() const noexcept { return uint32_t(type()) - uint32_t(RegType::kARM_VecD) <= 1u; }
+
+  //! Gets whether the register is a VEC-V register (128-bit).
+  inline constexpr bool isVecV() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecV>::kSignature; }
+
+  template<RegType kRegType>
+  inline void setRegT(uint32_t id) noexcept {
+    setSignature(RegTraits<kRegType>::kSignature);
+    setId(id);
+  }
+
+  inline void setTypeAndId(RegType type, uint32_t id) noexcept {
+    setSignature(signatureOf(type));
+    setId(id);
+  }
+
+  static inline RegGroup groupOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kAArch64).regTypeToGroup(type); }
+  static inline TypeId typeIdOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kAArch64).regTypeToTypeId(type); }
+  static inline OperandSignature signatureOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kAArch64).regTypeToSignature(type); }
+
+  template<RegType kRegType>
+  static inline RegGroup groupOfT() noexcept { return RegTraits<kRegType>::kGroup; }
+
+  template<RegType kRegType>
+  static inline TypeId typeIdOfT() noexcept { return RegTraits<kRegType>::kTypeId; }
+
+  template<RegType kRegType>
+  static inline OperandSignature signatureOfT() noexcept { return RegTraits<kRegType>::kSignature; }
+
+  static inline bool isGpW(const Operand_& op) noexcept { return op.as<Reg>().isGpW(); }
+  static inline bool isGpX(const Operand_& op) noexcept { return op.as<Reg>().isGpX(); }
+  static inline bool isVecB(const Operand_& op) noexcept { return op.as<Reg>().isVecB(); }
+  static inline bool isVecH(const Operand_& op) noexcept { return op.as<Reg>().isVecH(); }
+  static inline bool isVecS(const Operand_& op) noexcept { return op.as<Reg>().isVecS(); }
+  static inline bool isVecD(const Operand_& op) noexcept { return op.as<Reg>().isVecD(); }
+  static inline bool isVecQ(const Operand_& op) noexcept { return op.as<Reg>().isVecQ(); }
+  static inline bool isVecV(const Operand_& op) noexcept { return op.as<Reg>().isVecV(); }
+
+  static inline bool isGpW(const Operand_& op, uint32_t id) noexcept { return isGpW(op) & (op.id() == id); }
+  static inline bool isGpX(const Operand_& op, uint32_t id) noexcept { return isGpX(op) & (op.id() == id); }
+  static inline bool isVecB(const Operand_& op, uint32_t id) noexcept { return isVecB(op) & (op.id() == id); }
+  static inline bool isVecH(const Operand_& op, uint32_t id) noexcept { return isVecH(op) & (op.id() == id); }
+  static inline bool isVecS(const Operand_& op, uint32_t id) noexcept { return isVecS(op) & (op.id() == id); }
+  static inline bool isVecD(const Operand_& op, uint32_t id) noexcept { return isVecD(op) & (op.id() == id); }
+  static inline bool isVecQ(const Operand_& op, uint32_t id) noexcept { return isVecQ(op) & (op.id() == id); }
+  static inline bool isVecV(const Operand_& op, uint32_t id) noexcept { return isVecV(op) & (op.id() == id); }
+};
+
+//! General purpose register (ARM).
+class Gp : public Reg {
+public:
+  ASMJIT_DEFINE_ABSTRACT_REG(Gp, Reg)
+
+  //! Special register id.
+  enum Id : uint32_t {
+    //! Register that depends on OS, could be used as TLS offset.
+    kIdOs = 18,
+    //! Frame pointer.
+    kIdFp = 29,
+    //! Link register.
+    kIdLr = 30,
+    //! Stack register id.
+    kIdSp = 31,
+    //! Zero register id.
+    //!
+    //! Although zero register has the same id as stack register it has a special treatment, because we need to be
+    //! able to distinguish between these two at API level. Some intructions were designed to be used with SP and
+    //! some other with ZR - so we need a way to distinguish these two to make sure we emit the right thing.
+    //!
+    //! The number 63 is not random, when you perform `id & 31` you would always get 31 for both SP and ZR inputs,
+    //! which is the identifier used by AArch64 ISA to encode either SP or ZR depending on the instruction.
+    kIdZr = 63
+  };
+
+  inline constexpr bool isZR() const noexcept { return id() == kIdZr; }
+  inline constexpr bool isSP() const noexcept { return id() == kIdSp; }
+
+  //! Cast this register to a 32-bit R|W.
+  inline GpW w() const noexcept;
+  //! Cast this register to a 64-bit X.
+  inline GpX x() const noexcept;
+};
+
+//! Vector register (ARM).
+class Vec : public Reg {
+public:
+  ASMJIT_DEFINE_ABSTRACT_REG(Vec, Reg)
+
+  //! Additional signature bits used by arm::Vec.
+  enum AdditionalBits : uint32_t {
+    // Register element type (3 bits).
+    // |........|........|.XXX....|........|
+    kSignatureRegElementTypeShift = 12,
+    kSignatureRegElementTypeMask = 0x07 << kSignatureRegElementTypeShift,
+
+    // Register has element index (1 bit).
+    // |........|........|X.......|........|
+    kSignatureRegElementFlagShift = 15,
+    kSignatureRegElementFlagMask = 0x01 << kSignatureRegElementFlagShift,
+
+    // Register element index (4 bits).
+    // |........|....XXXX|........|........|
+    kSignatureRegElementIndexShift = 16,
+    kSignatureRegElementIndexMask = 0x0F << kSignatureRegElementIndexShift
+  };
+
+  //! Element type.
+  enum ElementType : uint32_t {
+    //! No element type specified.
+    kElementTypeNone = 0,
+    //! Byte elements (B8 or B16).
+    kElementTypeB,
+    //! Halfword elements (H4 or H8).
+    kElementTypeH,
+    //! Singleword elements (S2 or S4).
+    kElementTypeS,
+    //! Doubleword elements (D2).
+    kElementTypeD,
+    //! Byte elements grouped by 4 bytes (B4).
+    //!
+    //! \note This element-type is only used by few instructions.
+    kElementTypeB4,
+    //! Halfword elements grouped by 2 halfwords (H2).
+    //!
+    //! \note This element-type is only used by few instructions.
+    kElementTypeH2,
+
+    //! Count of element types.
+    kElementTypeCount
+  };
+
+  //! \cond
+  //! Shortcuts.
+  enum SignatureReg : uint32_t {
+    kSignatureElementB  = kElementTypeB  << kSignatureRegElementTypeShift,
+    kSignatureElementH  = kElementTypeH  << kSignatureRegElementTypeShift,
+    kSignatureElementS  = kElementTypeS  << kSignatureRegElementTypeShift,
+    kSignatureElementD  = kElementTypeD  << kSignatureRegElementTypeShift,
+    kSignatureElementB4 = kElementTypeB4 << kSignatureRegElementTypeShift,
+    kSignatureElementH2 = kElementTypeH2 << kSignatureRegElementTypeShift
+  };
+  //! \endcond
+
+  //! Returns whether the register has associated an element type.
+  inline constexpr bool hasElementType() const noexcept { return _signature.hasField<kSignatureRegElementTypeMask>(); }
+  //! Returns whether the register has element index (it's an element index access).
+  inline constexpr bool hasElementIndex() const noexcept { return _signature.hasField<kSignatureRegElementFlagMask>(); }
+  //! Returns whether the reggister has element type or element index (or both).
+  inline constexpr bool hasElementTypeOrIndex() const noexcept { return _signature.hasField<kSignatureRegElementTypeMask | kSignatureRegElementFlagMask>(); }
+
+  //! Returns element type of the register.
+  inline constexpr uint32_t elementType() const noexcept { return _signature.getField<kSignatureRegElementTypeMask>(); }
+  //! Sets element type of the register to `elementType`.
+  inline void setElementType(uint32_t elementType) noexcept { _signature.setField<kSignatureRegElementTypeMask>(elementType); }
+  //! Resets element type to none.
+  inline void resetElementType() noexcept { _signature.setField<kSignatureRegElementTypeMask>(0); }
+
+  //! Returns element index of the register.
+  inline constexpr uint32_t elementIndex() const noexcept { return _signature.getField<kSignatureRegElementIndexMask>(); }
+  //! Sets element index of the register to `elementType`.
+  inline void setElementIndex(uint32_t elementIndex) noexcept {
+    _signature |= kSignatureRegElementFlagMask;
+    _signature.setField<kSignatureRegElementIndexMask>(elementIndex);
+  }
+  //! Resets element index of the register.
+  inline void resetElementIndex() noexcept {
+    _signature &= ~(kSignatureRegElementFlagMask | kSignatureRegElementIndexMask);
+  }
+
+  inline constexpr bool isVecB8() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecD>::kSignature | kSignatureElementB); }
+  inline constexpr bool isVecH4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecD>::kSignature | kSignatureElementH); }
+  inline constexpr bool isVecS2() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecD>::kSignature | kSignatureElementS); }
+  inline constexpr bool isVecD1() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecD>::kSignature); }
+
+  inline constexpr bool isVecB16() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementB); }
+  inline constexpr bool isVecH8() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementH); }
+  inline constexpr bool isVecS4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementS); }
+  inline constexpr bool isVecD2() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementD); }
+  inline constexpr bool isVecB4x4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementB4); }
+  inline constexpr bool isVecH2x4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementH2); }
+
+  //! Creates a cloned register with element access.
+  inline Vec at(uint32_t elementIndex) const noexcept {
+    return Vec((signature() & ~kSignatureRegElementIndexMask) | (elementIndex << kSignatureRegElementIndexShift) | kSignatureRegElementFlagMask, id());
+  }
+
+  //! Cast this register to an 8-bit B register (scalar).
+  inline VecB b() const noexcept;
+  //! Cast this register to a 16-bit H register (scalar).
+  inline VecH h() const noexcept;
+  //! Cast this register to a 32-bit S register (scalar).
+  inline VecS s() const noexcept;
+  //! Cast this register to a 64-bit D register (scalar).
+  inline VecD d() const noexcept;
+  //! Cast this register to a 128-bit Q register (scalar).
+  inline VecV q() const noexcept;
+  //! Cast this register to a 128-bit V register.
+  inline VecV v() const noexcept;
+
+  //! Cast this register to a 128-bit V.B[elementIndex] register.
+  inline VecV b(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.H[elementIndex] register.
+  inline VecV h(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.S[elementIndex] register.
+  inline VecV s(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.D[elementIndex] register.
+  inline VecV d(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.H2[elementIndex] register.
+  inline VecV h2(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.B4[elementIndex] register.
+  inline VecV b4(uint32_t elementIndex) const noexcept;
+
+  //! Cast this register to V.8B.
+  inline VecD b8() const noexcept;
+  //! Cast this register to V.16B.
+  inline VecV b16() const noexcept;
+  //! Cast this register to V.2H.
+  inline VecS h2() const noexcept;
+  //! Cast this register to V.4H.
+  inline VecD h4() const noexcept;
+  //! Cast this register to V.8H.
+  inline VecV h8() const noexcept;
+  //! Cast this register to V.2S.
+  inline VecD s2() const noexcept;
+  //! Cast this register to V.4S.
+  inline VecV s4() const noexcept;
+  //! Cast this register to V.2D.
+  inline VecV d2() const noexcept;
+
+  static inline constexpr OperandSignature _makeElementAccessSignature(uint32_t elementType, uint32_t elementIndex) noexcept {
+    return OperandSignature{
+      uint32_t(RegTraits<RegType::kARM_VecV>::kSignature)      |
+      uint32_t(kSignatureRegElementFlagMask)                   |
+      uint32_t(elementType << kSignatureRegElementTypeShift)   |
+      uint32_t(elementIndex << kSignatureRegElementIndexShift)};
+  }
+};
+
+//! 32-bit GPW (AArch64) and/or GPR (ARM/AArch32) register.
+class GpW : public Gp { ASMJIT_DEFINE_FINAL_REG(GpW, Gp, RegTraits<RegType::kARM_GpW>) };
+//! 64-bit GPX (AArch64) register.
+class GpX : public Gp { ASMJIT_DEFINE_FINAL_REG(GpX, Gp, RegTraits<RegType::kARM_GpX>) };
+
+//! 8-bit view (S) of VFP/SIMD register.
+class VecB : public Vec { ASMJIT_DEFINE_FINAL_REG(VecB, Vec, RegTraits<RegType::kARM_VecB>) };
+//! 16-bit view (S) of VFP/SIMD register.
+class VecH : public Vec { ASMJIT_DEFINE_FINAL_REG(VecH, Vec, RegTraits<RegType::kARM_VecH>) };
+//! 32-bit view (S) of VFP/SIMD register.
+class VecS : public Vec { ASMJIT_DEFINE_FINAL_REG(VecS, Vec, RegTraits<RegType::kARM_VecS>) };
+//! 64-bit view (D) of VFP/SIMD register.
+class VecD : public Vec { ASMJIT_DEFINE_FINAL_REG(VecD, Vec, RegTraits<RegType::kARM_VecD>) };
+//! 128-bit vector register (Q or V).
+class VecV : public Vec { ASMJIT_DEFINE_FINAL_REG(VecV, Vec, RegTraits<RegType::kARM_VecV>) };
+
+inline GpW Gp::w() const noexcept { return GpW(id()); }
+inline GpX Gp::x() const noexcept { return GpX(id()); }
+
+inline VecB Vec::b() const noexcept { return VecB(id()); }
+inline VecH Vec::h() const noexcept { return VecH(id()); }
+inline VecS Vec::s() const noexcept { return VecS(id()); }
+inline VecD Vec::d() const noexcept { return VecD(id()); }
+inline VecV Vec::q() const noexcept { return VecV(id()); }
+inline VecV Vec::v() const noexcept { return VecV(id()); }
+
+inline VecV Vec::b(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeB, elementIndex), id()); }
+inline VecV Vec::h(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeH, elementIndex), id()); }
+inline VecV Vec::s(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeS, elementIndex), id()); }
+inline VecV Vec::d(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeD, elementIndex), id()); }
+inline VecV Vec::h2(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeH2, elementIndex), id()); }
+inline VecV Vec::b4(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeB4, elementIndex), id()); }
+
+inline VecD Vec::b8() const noexcept { return VecD(OperandSignature{VecD::kSignature | kSignatureElementB}, id()); }
+inline VecS Vec::h2() const noexcept { return VecS(OperandSignature{VecS::kSignature | kSignatureElementH}, id()); }
+inline VecD Vec::h4() const noexcept { return VecD(OperandSignature{VecD::kSignature | kSignatureElementH}, id()); }
+inline VecD Vec::s2() const noexcept { return VecD(OperandSignature{VecD::kSignature | kSignatureElementS}, id()); }
+inline VecV Vec::b16() const noexcept { return VecV(OperandSignature{VecV::kSignature | kSignatureElementB}, id()); }
+inline VecV Vec::h8() const noexcept { return VecV(OperandSignature{VecV::kSignature | kSignatureElementH}, id()); }
+inline VecV Vec::s4() const noexcept { return VecV(OperandSignature{VecV::kSignature | kSignatureElementS}, id()); }
+inline VecV Vec::d2() const noexcept { return VecV(OperandSignature{VecV::kSignature | kSignatureElementD}, id()); }
+
+#ifndef _DOXYGEN
+namespace regs {
+#endif
+
+//! Creates a 32-bit W register operand (ARM/AArch64).
+static inline constexpr GpW w(uint32_t id) noexcept { return GpW(id); }
+//! Creates a 64-bit X register operand (AArch64).
+static inline constexpr GpX x(uint32_t id) noexcept { return GpX(id); }
+//! Creates a 32-bit S register operand (ARM/AArch64).
+static inline constexpr VecS s(uint32_t id) noexcept { return VecS(id); }
+//! Creates a 64-bit D register operand (ARM/AArch64).
+static inline constexpr VecD d(uint32_t id) noexcept { return VecD(id); }
+//! Creates a 1282-bit V register operand (ARM/AArch64).
+static inline constexpr VecV v(uint32_t id) noexcept { return VecV(id); }
+
+#ifndef _DOXYGEN
+} // {regs}
+
+// Make `arm::regs` accessible through `arm` namespace as well.
+using namespace regs;
+#endif
+
+//! Memory operand (ARM).
+class Mem : public BaseMem {
+public:
+  //! \cond INTERNAL
+  //! Additional bits of operand's signature used by `arm::Mem`.
+  enum AdditionalBits : uint32_t {
+    // Index shift value (5 bits).
+    // |........|.....XXX|XX......|........|
+    kSignatureMemShiftValueShift = 14,
+    kSignatureMemShiftValueMask = 0x1Fu << kSignatureMemShiftValueShift,
+
+    // Shift operation type (4 bits).
+    // |........|XXXX....|........|........|
+    kSignatureMemPredicateShift = 20,
+    kSignatureMemPredicateMask = 0x0Fu << kSignatureMemPredicateShift
+  };
+  //! \endcond
+
+  //! Memory offset mode.
+  //!
+  //! Additional constants that can be used with the `predicate`.
+  enum OffsetMode : uint32_t {
+    //! Pre-index "[BASE, #Offset {, <shift>}]!" with write-back.
+    kOffsetPreIndex = 0xE,
+    //! Post-index "[BASE], #Offset {, <shift>}" with write-back.
+    kOffsetPostIndex = 0xF
+  };
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Construct a default `Mem` operand, that points to [0].
+  inline constexpr Mem() noexcept
+    : BaseMem() {}
+
+  inline constexpr Mem(const Mem& other) noexcept
+    : BaseMem(other) {}
+
+  inline explicit Mem(Globals::NoInit_) noexcept
+    : BaseMem(Globals::NoInit) {}
+
+  inline constexpr Mem(const Signature& signature, uint32_t baseId, uint32_t indexId, int32_t offset) noexcept
+    : BaseMem(signature, baseId, indexId, offset) {}
+
+  inline constexpr explicit Mem(const Label& base, int32_t off = 0, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(RegType::kLabelTag) |
+              signature, base.id(), 0, off) {}
+
+  inline constexpr explicit Mem(const BaseReg& base, int32_t off = 0, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(base.type()) |
+              signature, base.id(), 0, off) {}
+
+  inline constexpr Mem(const BaseReg& base, const BaseReg& index, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(base.type()) |
+              Signature::fromMemIndexType(index.type()) |
+              signature, base.id(), index.id(), 0) {}
+
+  inline constexpr Mem(const BaseReg& base, const BaseReg& index, const Shift& shift, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(base.type()) |
+              Signature::fromMemIndexType(index.type()) |
+              Signature::fromValue<kSignatureMemPredicateMask>(uint32_t(shift.op())) |
+              Signature::fromValue<kSignatureMemShiftValueMask>(shift.value()) |
+              signature, base.id(), index.id(), 0) {}
+
+  inline constexpr Mem(uint64_t base, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              signature, uint32_t(base >> 32), 0, int32_t(uint32_t(base & 0xFFFFFFFFu))) {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline Mem& operator=(const Mem& other) noexcept = default;
+
+  //! \}
+
+  //! \name Clone
+  //! \{
+
+  //! Clones the memory operand.
+  inline constexpr Mem clone() const noexcept { return Mem(*this); }
+
+  //! Gets new memory operand adjusted by `off`.
+  inline Mem cloneAdjusted(int64_t off) const noexcept {
+    Mem result(*this);
+    result.addOffset(off);
+    return result;
+  }
+
+  //! Clones the memory operand and makes it pre-index.
+  inline Mem pre() const noexcept {
+    Mem result(*this);
+    result.setPredicate(kOffsetPreIndex);
+    return result;
+  }
+
+  //! Clones the memory operand, applies a given offset `off` and makes it pre-index.
+  inline Mem pre(int64_t off) const noexcept {
+    Mem result(*this);
+    result.setPredicate(kOffsetPreIndex);
+    result.addOffset(off);
+    return result;
+  }
+
+  //! Clones the memory operand and makes it post-index.
+  inline Mem post() const noexcept {
+    Mem result(*this);
+    result.setPredicate(kOffsetPreIndex);
+    return result;
+  }
+
+  //! Clones the memory operand, applies a given offset `off` and makes it post-index.
+  inline Mem post(int64_t off) const noexcept {
+    Mem result(*this);
+    result.setPredicate(kOffsetPostIndex);
+    result.addOffset(off);
+    return result;
+  }
+
+  //! \}
+
+  //! \name Base & Index
+  //! \{
+
+  //! Converts memory `baseType` and `baseId` to `arm::Reg` instance.
+  //!
+  //! The memory must have a valid base register otherwise the result will be wrong.
+  inline Reg baseReg() const noexcept { return Reg::fromTypeAndId(baseType(), baseId()); }
+
+  //! Converts memory `indexType` and `indexId` to `arm::Reg` instance.
+  //!
+  //! The memory must have a valid index register otherwise the result will be wrong.
+  inline Reg indexReg() const noexcept { return Reg::fromTypeAndId(indexType(), indexId()); }
+
+  using BaseMem::setIndex;
+
+  inline void setIndex(const BaseReg& index, uint32_t shift) noexcept {
+    setIndex(index);
+    setShift(shift);
+  }
+
+  //! \}
+
+  //! \name ARM Specific Features
+  //! \{
+
+  //! Gets whether the memory operand has shift (aka scale) constant.
+  inline constexpr bool hasShift() const noexcept { return _signature.hasField<kSignatureMemShiftValueMask>(); }
+  //! Gets the memory operand's shift (aka scale) constant.
+  inline constexpr uint32_t shift() const noexcept { return _signature.getField<kSignatureMemShiftValueMask>(); }
+  //! Sets the memory operand's shift (aka scale) constant.
+  inline void setShift(uint32_t shift) noexcept { _signature.setField<kSignatureMemShiftValueMask>(shift); }
+  //! Resets the memory operand's shift (aka scale) constant to zero.
+  inline void resetShift() noexcept { _signature.setField<kSignatureMemShiftValueMask>(0); }
+
+  //! Gets memory predicate (shift mode or offset mode), see \ref ShiftOp and \ref OffsetMode.
+  inline constexpr uint32_t predicate() const noexcept { return _signature.getField<kSignatureMemPredicateMask>(); }
+  //! Sets memory predicate to `predicate`, see `Mem::ShiftOp`.
+  inline void setPredicate(uint32_t predicate) noexcept { _signature.setField<kSignatureMemPredicateMask>(predicate); }
+  //! Resets shift mode to LSL (default).
+  inline void resetPredicate() noexcept { _signature.setField<kSignatureMemPredicateMask>(0); }
+
+  inline constexpr bool isFixedOffset() const noexcept { return predicate() < kOffsetPreIndex; }
+  inline constexpr bool isPreOrPost() const noexcept { return predicate() >= kOffsetPreIndex; }
+  inline constexpr bool isPreIndex() const noexcept { return predicate() == kOffsetPreIndex; }
+  inline constexpr bool isPostIndex() const noexcept { return predicate() == kOffsetPostIndex; }
+
+  inline void resetToFixedOffset() noexcept { resetPredicate(); }
+  inline void makePreIndex() noexcept { setPredicate(kOffsetPreIndex); }
+  inline void makePostIndex() noexcept { setPredicate(kOffsetPostIndex); }
+
+  //! \}
+};
+
+//! Creates `[base.reg, offset]` memory operand (offset mode).
+static inline constexpr Mem ptr(const Gp& base, int32_t offset = 0) noexcept {
+  return Mem(base, offset);
+}
+
+//! Creates `[base.reg, offset]!` memory operand (pre-index mode).
+static inline constexpr Mem ptr_pre(const Gp& base, int32_t offset = 0) noexcept {
+  return Mem(base, offset, OperandSignature::fromValue<Mem::kSignatureMemPredicateMask>(Mem::kOffsetPreIndex));
+}
+
+//! Creates `[base.reg], offset` memory operand (post-index mode).
+static inline constexpr Mem ptr_post(const Gp& base, int32_t offset = 0) noexcept {
+  return Mem(base, offset, OperandSignature::fromValue<Mem::kSignatureMemPredicateMask>(Mem::kOffsetPostIndex));
+}
+
+//! Creates `[base.reg, index]` memory operand.
+static inline constexpr Mem ptr(const Gp& base, const Gp& index) noexcept {
+  return Mem(base, index);
+}
+
+//! Creates `[base.reg], index` memory operand (post-index mode).
+static inline constexpr Mem ptr_post(const Gp& base, const Gp& index) noexcept {
+  return Mem(base, index, OperandSignature::fromValue<Mem::kSignatureMemPredicateMask>(Mem::kOffsetPostIndex));
+}
+
+//! Creates `[base.reg, index, SHIFT_OP #shift]` memory operand.
+static inline constexpr Mem ptr(const Gp& base, const Gp& index, const Shift& shift) noexcept {
+  return Mem(base, index, shift);
+}
+
+//! Creates `[base + offset]` memory operand.
+static inline constexpr Mem ptr(const Label& base, int32_t offset = 0) noexcept {
+  return Mem(base, offset);
+}
+
+// TODO: [ARM] PC + offset address.
+#if 0
+//! Creates `[PC + offset]` (relative) memory operand.
+static inline constexpr Mem ptr(const PC& pc, int32_t offset = 0) noexcept {
+  return Mem(pc, offset);
+}
+#endif
+
+//! Creates `[base]` absolute memory operand.
+//!
+//! \note The concept of absolute memory operands doesn't exist on ARM, the ISA only provides PC relative addressing.
+//! Absolute memory operands can only be used if it's known that the PC relative offset is encodable and that it
+//! would be within the limits. Absolute address is also often output from disassemblers, so AsmJit support it so it
+//! can assemble it back.
+static inline constexpr Mem ptr(uint64_t base) noexcept { return Mem(base); }
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+//! \cond INTERNAL
+ASMJIT_BEGIN_NAMESPACE
+ASMJIT_DEFINE_TYPE_ID(arm::GpW, TypeId::kInt32);
+ASMJIT_DEFINE_TYPE_ID(arm::GpX, TypeId::kInt64);
+ASMJIT_DEFINE_TYPE_ID(arm::VecS, TypeId::kFloat32x1);
+ASMJIT_DEFINE_TYPE_ID(arm::VecD, TypeId::kFloat64x1);
+ASMJIT_DEFINE_TYPE_ID(arm::VecV, TypeId::kInt32x4);
+ASMJIT_END_NAMESPACE
+//! \endcond
+
+#endif // ASMJIT_ARM_ARMOPERAND_H_INCLUDED
diff --git a/lib/lepton/asmjit/asmjit-scope-begin.h b/lib/lepton/asmjit/asmjit-scope-begin.h
new file mode 100644
index 0000000000..93397b584a
--- /dev/null
+++ b/lib/lepton/asmjit/asmjit-scope-begin.h
@@ -0,0 +1,17 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifdef _WIN32
+  #pragma push_macro("min")
+  #pragma push_macro("max")
+
+  #ifdef min
+    #undef min
+  #endif
+
+  #ifdef max
+    #undef max
+  #endif
+#endif
diff --git a/lib/lepton/asmjit/asmjit-scope-end.h b/lib/lepton/asmjit/asmjit-scope-end.h
new file mode 100644
index 0000000000..702cef49f1
--- /dev/null
+++ b/lib/lepton/asmjit/asmjit-scope-end.h
@@ -0,0 +1,9 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifdef _WIN32
+  #pragma pop_macro("min")
+  #pragma pop_macro("max")
+#endif
diff --git a/lib/lepton/asmjit/asmjit.h b/lib/lepton/asmjit/asmjit.h
new file mode 100644
index 0000000000..1cd0651ffe
--- /dev/null
+++ b/lib/lepton/asmjit/asmjit.h
@@ -0,0 +1,33 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// SPDX-License-Identifier: Zlib
+// Official GitHub Repository: https://github.com/asmjit/asmjit
+//
+// Copyright (c) 2008-2021 The AsmJit Authors
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+
+#ifndef ASMJIT_ASMJIT_H_INCLUDED
+#define ASMJIT_ASMJIT_H_INCLUDED
+
+#include "./core.h"
+
+#ifndef ASMJIT_NO_X86
+  #include "./x86.h"
+#endif
+
+#endif // ASMJIT_ASMJIT_H_INCLUDED
diff --git a/lib/lepton/asmjit/core.h b/lib/lepton/asmjit/core.h
new file mode 100644
index 0000000000..4406ed89f3
--- /dev/null
+++ b/lib/lepton/asmjit/core.h
@@ -0,0 +1,1861 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_H_INCLUDED
+#define ASMJIT_CORE_H_INCLUDED
+
+//! Root namespace used by AsmJit.
+namespace asmjit {
+
+//! \mainpage API Reference
+//!
+//! AsmJit C++ API reference documentation generated by Doxygen.
+//!
+//! AsmJit library uses one global namespace called \ref asmjit, which provides the whole functionality. Core
+//! functionality is within \ref asmjit namespace and architecture specific functionality is always in its own
+//! namespace. For example \ref asmjit::x86 provides both 32-bit and 64-bit X86 code generation.
+//!
+//! \section main_groups Documentation Groups
+//!
+//! AsmJit documentation is structured into groups. Groups can be followed in order to learn AsmJit, but knowledge
+//! from multiple groups is required to use AsmJit properly:
+//!
+//! $$DOCS_GROUP_OVERVIEW$$
+//!
+//! \note It's important to understand that in order to learn AsmJit all groups are important. Some groups can be
+//! omitted if a particular tool is out of interest - for example \ref asmjit_assembler users don't need to know
+//! about \ref asmjit_builder, but it's not the opposite. \ref asmjit_builder users should know about \ref
+//! asmjit_assembler as it also uses operands, labels, and other concepts. Similarly \ref asmjit_compiler users
+//! should know how both \ref asmjit_assembler and \ref asmjit_builder tools work.
+//!
+//! \section where_to_start Where To Start
+//!
+//! AsmJit \ref asmjit_core provides the following two classes that are essential from the code generation perspective:
+//!
+//!   - \ref CodeHolder provides functionality to temporarily hold the generated code. It stores all the necessary
+//!     information about the code - code buffers, sections, labels, symbols, and information about relocations.
+//!
+//!   - \ref BaseEmitter provides interface used by emitter implementations. The interface provides basic building
+//!     blocks that are then implemented by \ref BaseAssembler, \ref BaseBuilder, and \ref BaseCompiler.
+//!
+//! Code emitters:
+//!
+//!  - \ref asmjit_assembler - provides direct machine code generation.
+//!
+//!  - \ref asmjit_builder - provides intermediate code generation that can be processed before it's serialized to
+//!    \ref BaseAssembler.
+//!
+//!  - \ref asmjit_compiler - provides high-level code generation with built-in register allocation.
+//!
+//!  - \ref FuncNode - provides insight into how function looks from the Compiler perspective and how it's stored in
+//!     a node-list.
+//!
+//! \section main_recommendations Recommendations
+//!
+//! The following steps are recommended for all AsmJit users:
+//!
+//!   - Make sure that you use \ref Logger, see \ref asmjit_logging.
+//!
+//!   - Make sure that you use \ref ErrorHandler, see \ref asmjit_error_handling.
+//!
+//!   - Instruction validation in your debug builds can reveal problems too. AsmJit provides validation at instruction
+//!     level that can be enabled via \ref BaseEmitter::addDiagnosticOptions(). See \ref DiagnosticOptions for more
+//!     details.
+//!
+//!   - If you are a Compiler user, use diagnostic options and read carefully if anything suspicious pops out.
+//!     Diagnostic options can be enabled via \ref BaseEmitter::addDiagnosticOptions(). If unsure which ones to use,
+//!     enable annotations and all debug options: `DiagnosticOptions::kRAAnnotate | DiagnosticOptions::kRADebugAll`.
+//!
+//!   - Make sure you put a breakpoint into \ref DebugUtils::errored() function if you have a problem with AsmJit
+//!     returning errors during instruction encoding or register allocation. Having an active breakpoint there can
+//!     help to reveal the origin of the error, to inspect variables and other conditions that caused it.
+//!
+//! The reason for using \ref Logger and \ref ErrorHandler is that they provide a very useful information about what's
+//! happening inside emitters. In many cases the information provided by these two is crucial to quickly identify and
+//! fix issues that happen during development (for example wrong instruction, address, or register used). In addition,
+//! output from \ref Logger is always necessary when filling bug reports. In other words, using logging and proper error
+//! handling can save a lot of time during the development and can also save users from submitting issues.
+//!
+//! \section main_other Other Pages
+//!
+//!   - <a href="annotated.html">Class List</a> - List of classes sorted alphabetically
+//!   - <a href="namespaceasmjit.html">AsmJit Namespace</a> - List of symbols provided by `asmjit` namespace
+
+
+//! \defgroup asmjit_build Build Instructions
+//! \brief Build instructions, supported environments, and feature selection.
+//!
+//! ### Overview
+//!
+//! AsmJit is designed to be easy embeddable in any project. However, it depends on some compile-time definitions that
+//! can be used to enable or disable features to decrease the resulting binary size. A typical way of building AsmJit
+//! is to use [cmake](https://www.cmake.org), but it's also possible to just include AsmJit source code in your project
+//! and to just build it. The easiest way to include AsmJit in your project is to just include **src** directory in
+//! your project and to define \ref ASMJIT_STATIC. AsmJit can be just updated from time to time without any changes to
+//! this integration process. Do not embed AsmJit's `test` files in such case as these are used exclusively for testing.
+//!
+//! ### Supported C++ Compilers
+//!
+//!   - Requirements:
+//!
+//!     - AsmJit won't build without C++11 enabled. If you use older GCC or Clang you would have to enable at least
+//!       C++11 standard through compiler flags.
+//!
+//!   - Tested:
+//!
+//!     - **Clang** - Tested by GitHub Actions - Clang 3.9+ (with C++11 enabled) is officially supported (older Clang
+//!       versions having C++11 support are probably fine, but are not regularly tested).
+//!
+//!     - **GNU** - Tested by GitHub Actions - GCC 4.8+ (with C++11 enabled) is officially supported.
+//!
+//!     - **MINGW** - Should work, but it's not tested in our CI environment.
+//!
+//!     - **MSVC** - Tested by GitHub Actions - VS2017+ is officially supported, VS2015 is reported to work.
+//!
+//!   - Untested:
+//!
+//!     - **Intel** - No maintainers and no CI environment to regularly test this compiler.
+//!
+//!     - **Other** C++ compilers would require basic support in
+//!       [core/api-config.h](https://github.com/asmjit/asmjit/tree/master/src/asmjit/core/api-config.h).
+//!
+//! ### Supported Operating Systems and Platforms
+//!
+//!   - Tested:
+//!
+//!     - **Linux** - Tested by GitHub Actions (any distribution is generally supported).
+//!
+//!     - **Mac OS** - Tested by GitHub Actions (any version is supported).
+//!
+//!     - **Windows** - Tested by GitHub Actions - (Windows 7+ is officially supported).
+//!
+//!     - **Emscripten** - Works if compiled with \ref ASMJIT_NO_JIT. AsmJit cannot generate WASM code, but can be
+//!       used to generate X86/X64 code within a browser, for example.
+//!
+//!   - Untested:
+//!
+//!     - **BSDs** - No maintainers, no CI environment to regularly test BSDs, but they should work out of box.
+//!
+//!     - **Haiku** - Not regularly tested, but reported to work.
+//!
+//!     - **Other** operating systems would require some testing and support in the following files:
+//!       - [core/api-config.h](https://github.com/asmjit/asmjit/tree/master/src/asmjit/core/api-config.h)
+//!       - [core/osutils.cpp](https://github.com/asmjit/asmjit/tree/master/src/asmjit/core/osutils.cpp)
+//!       - [core/virtmem.cpp](https://github.com/asmjit/asmjit/tree/master/src/asmjit/core/virtmem.cpp)
+//!
+//! ### Supported Backends / Architectures
+//!
+//!   - **X86** and **X86_64** - Both 32-bit and 64-bit backends tested on CI.
+//!   - **AArch64** - AArch64 backend is currently only partially tested (there is no native AArch64 runner to test
+//!     AsmJit Builder/Compiler)
+//!
+//! ### Static Builds and Embedding
+//!
+//! These definitions can be used to enable static library build. Embed is used when AsmJit's source code is embedded
+//! directly in another project, implies static build as well.
+//!
+//!   - \ref ASMJIT_EMBED - Asmjit is embedded, implies \ref ASMJIT_STATIC.
+//!   - \ref ASMJIT_STATIC - Enable static-library build.
+//!
+//! \note Projects that use AsmJit statically must define \ref ASMJIT_STATIC in all compilation units that use AsmJit,
+//! otherwise AsmJit would use dynamic library imports in \ref ASMJIT_API decorator. The recommendation is to define
+//! this macro across the whole project that uses AsmJit this way.
+//!
+//! ### Build Configuration
+//!
+//! These definitions control whether asserts are active or not. By default AsmJit would autodetect build configuration
+//! from existing pre-processor definitions, but this behavior can be overridden, for example to enable debug asserts
+//! in release configuration.
+//!
+//!   - \ref ASMJIT_BUILD_DEBUG - Overrides build configuration to debug, asserts will be enabled in this case.
+//!   - \ref ASMJIT_BUILD_RELEASE - Overrides build configuration to release, asserts will be disabled in this case.
+//!
+//! \note There is usually no need to override the build configuration. AsmJit detects the build configuration by
+//! checking whether `NDEBUG` is defined and automatically defines \ref ASMJIT_BUILD_RELEASE if configuration overrides
+//! were not used. We only recommend using build configuration overrides in special situations, like using AsmJit in
+//! release configuration with asserts enabled for whatever reason.
+//!
+//! ### AsmJit Backends
+//!
+//! AsmJit currently supports only X86/X64 backend, but the plan is to add more backends in the future. By default
+//! AsmJit builds only the host backend, which is autodetected at compile-time, but this can be overridden.
+//!
+//!   - \ref ASMJIT_NO_X86 - Disable X86/X64 backends.
+//!   - \ref ASMJIT_NO_FOREIGN - Disables the support for foreign architectures.
+//!
+//! ### Features Selection
+//!
+//! AsmJit builds by defaults all supported features, which includes all emitters, logging, instruction validation and
+//! introspection, and JIT memory allocation. Features can be disabled at compile time by using `ASMJIT_NO_...`
+//! definitions.
+//!
+//!   - \ref ASMJIT_NO_DEPRECATED - Disables deprecated API at compile time so it won't be available and the
+//!     compilation will fail if there is attempt to use such API. This includes deprecated classes, namespaces,
+//!     enumerations, and functions.
+//!
+//!   - \ref ASMJIT_NO_BUILDER - Disables \ref asmjit_builder functionality completely. This implies \ref
+//!     ASMJIT_NO_COMPILER as \ref asmjit_compiler cannot be used without \ref asmjit_builder.
+//!
+//!   - \ref ASMJIT_NO_COMPILER - Disables \ref asmjit_compiler functionality completely.
+//!
+//!   - \ref ASMJIT_NO_JIT - Disables JIT memory management and \ref JitRuntime.
+//!
+//!   - \ref ASMJIT_NO_LOGGING - Disables \ref Logger and \ref Formatter.
+//!
+//!   - \ref ASMJIT_NO_TEXT - Disables everything that contains string representation of AsmJit constants, should
+//!     be used together with \ref ASMJIT_NO_LOGGING as logging doesn't make sense without the ability to query
+//!     instruction names, register names, etc...
+//!
+//!   - \ref ASMJIT_NO_VALIDATION - Disables validation API.
+//!
+//!   - \ref ASMJIT_NO_INTROSPECTION - Disables instruction introspection API, must be used together with \ref
+//!     ASMJIT_NO_COMPILER as \ref asmjit_compiler requires introspection for its liveness analysis and register
+//!     allocation.
+//!
+//! \note It's not recommended to disable features if you plan to build AsmJit as a shared library that will be
+//! used by multiple projects that you don't control how AsmJit was built (for example AsmJit in a Linux distribution).
+//! The possibility to disable certain features exists mainly for customized AsmJit builds.
+
+
+//! \defgroup asmjit_breaking_changes Breaking Changes
+//! \brief Documentation of breaking changes
+//!
+//! ### Overview
+//!
+//! AsmJit is a live project that is being actively developed. Deprecating the existing API in favor of a new
+//! one is preferred, but it's not always possible if the changes are significant. AsmJit authors prefer to do
+//! accumulated breaking changes at once instead of breaking the API often. This page documents deprecated and
+//! removed APIs and should serve as a how-to guide for people that want to port existing code to work with the
+//! newest AsmJit.
+//!
+//! ### Tips
+//!
+//! Useful tips before you start:
+//!
+//!   - Visit our [Public Gitter Channel](https://gitter.im/asmjit/asmjit) if you need a quick help.
+//!
+//!   - Build AsmJit with `ASMJIT_NO_DEPRECATED` macro defined to make sure that you are not using deprecated
+//!     functionality at all. Deprecated functions are decorated with `ASMJIT_DEPRECATED()` macro, but sometimes
+//!     it's not possible to decorate everything like classes, which are used by deprecated functions as well,
+//!     because some compilers would warn about that. If your project compiles fine with `ASMJIT_NO_DEPRECATED`
+//!     it's not using anything, which was deprecated.
+//!
+//! ### Changes committed at 2021-12-13
+//!
+//! Core changes:
+//!
+//!   - Removed old deprecated API.
+//!
+//!   - Many enumerations were changed to enum class, and many public APIs were changed to use such enums instead
+//!     of uint32_t. This change makes some APIs backward incompatible - there are no deprecations this time.
+//!
+//!   - Extracted operand signature manipulation to `OperandSignature`.
+//!   - Setting function arguments through `Compiler::setArg()` was deprecated, use FuncNode::setArg() instead.
+//!   - Moved `{arch}::Features::k` to `CpuFeatures::{arch}::k`.
+//!   - Moved `BaseEmitter::kEncodingOption` to `EncodingOptions::k`.
+//!   - Moved `BaseEmitter::kFlag` to `EmitterFlags::k`.
+//!   - Moved `BaseEmitter::kType` to `EmitterType::k`.
+//!   - Moved `BaseEmitter::kValidationOption` to `DiagnosticOptions::kValidate`.
+//!   - Moved `BaseFeatures` to `CpuFeatures`.
+//!   - Moved `BaseInst::kControl` to `InstControlFlow::k`.
+//!   - Moved `BaseInst::kOption` and `x86::Inst::kOption` to `InstOptions::k`.
+//!   - Moved `BaseNode::kNode` to `NodeType::k`.
+//!   - Moved `BaseReg::kGroup` and `x86::Reg::kGroup` to `RegGroup::k`.
+//!   - Moved `BaseReg::kType` and `x86::Reg::kType` to `RegType::k`.
+//!   - Moved `CallConv::kFlag` to `CallConvFlags::k`.
+//!   - Moved `CallConv::kId` to `CallConvId::k`.
+//!   - Moved `CallConv::kStrategy` to `CallConvStrategy::k`.
+//!   - Moved `CodeBuffer::kFlag` to `CodeBufferFlags`.
+//!   - Moved `ConstPool::kScope` to `ConstPoolScope::k`.
+//!   - Moved `Environment::kArch` to `Arch::k`.
+//!   - Moved `Environment::kSubArch` to `SubArch::k`.
+//!   - Moved `Environment::kFormat` to `OjectFormat::k`.
+//!   - Moved `Environment::kPlatform` to `Platform::k`.
+//!   - Moved `Environment::kAbi` to `PlatformABI::k`.
+//!   - Moved `Environment::kVendor` to `Vendor::k`.
+//!   - Moved `FormatOptions::kFlag` to `FormatFlags::k` and `DiagnosticOptions::k` (Compiler diagnostics flags).
+//!   - Moved `FormatOptions::kIndentation` to `FormatIndentationGroup::k`.
+//!   - Moved `FuncFrame::kAttr` to `FuncAttributes::k`.
+//!   - Moved `Globals::kReset` to `ResetPolicy::k`.
+//!   - Moved `InstDB::kAvx512Flag` to `InstDB::Avx512Flags::k`.
+//!   - Moved `InstDB::kFlag` to `InstDB::InstFlags::k`.
+//!   - Moved `InstDB::kMemFlag` to `InstDB::OpFlags::kMem`.
+//!   - Moved `InstDB::kMode` to `InstDB::Mode::k`.
+//!   - Moved `InstDB::kOpFlag` to `InstDB::OpFlags::k{OpType}...`.
+//!   - Moved `JitAllocator::kOption` to `JitAllocatorOptions::k`.
+//!   - Moved `Label::kType` to `LabelType::k`.
+//!   - Moved `Operand::kOpType` to `OperandType::k`.
+//!   - Moved `OpRWInfo::kFlag` to `OpRWFlags::k`.
+//!   - Moved `Type::kId` to `TypeId::k`.
+//!   - Moved `VirtMem::k` to `VirtMem::MemoryFlags::k`.
+//!
+//! ### Changes committed at 2020-05-30
+//!
+//! AsmJit has been cleaned up significantly, many todo items have been fixed and many functions and classes have
+//! been redesigned, some in an incompatible way.
+//!
+//! Core changes:
+//!
+//!   - `Imm` operand has now only `Imm::value()` and `Imm::valueAs()` functions that return its value content,
+//!     and `Imm::setValue()` function that sets the content. Functions like `setI8()`, `setU8()` were deprecated.
+//!
+//!     Old functions were deprecated, but code using them should still compile.
+//!
+//!   - `ArchInfo` has been replaced with `Environment`. Environment provides more details about the architecture,
+//!     but drops some properties that were used by arch info - `gpSize(`) and `gpCount()`. `gpSize()` can be replaced
+//!     with `registerSize()` getter, which returns a native register size of the architecture the environment uses.
+//!     However, `gpCount()` was removed - at the moment `ArchTraits` can be used to access such properties.
+//!
+//!     Some other functions were renamed, like `ArchInfo::isX86Family()` is now `Environment::isFamilyX86()`, etc.
+//!     The reason for changing the order was support for more propertries and all the accessors now start with the
+//!     type of the property, like `Environment::isPlatformWindows()`.
+//!
+//!     This function causes many other classes to provide `environment()` getter instead of `archInfo()` getter.
+//!     In addition, AsmJit now uses `arch()` to get an architecture instead of `archId()`. `ArchInfo::kIdXXX` was
+//!     renamed to `Environment::kArchXXX`.
+//!
+//!     Some functions were deprecated, some removed...
+//!
+//!   - `CodeInfo` has been removed in favor of `Environment`. If you used `CodeInfo` to set architecture and base
+//!     address, this is now possible with `Environment` and setting base address explicitly by `CodeHolder::init()`
+//!     - the first argument is `Environment`, and the second argument is base address, which defaults to
+//!     `Globals::kNoBaseAddress`.
+//!
+//!     CodeInfo class was deprecated, but the code using it should still compile with warnings.
+//!
+//!   - `CallConv` has been updated to offer a more unified way of representing calling conventions - many calling
+//!     conventions were abstracted to follow standard naming like `CallConvId::kCDecl` or `CallConvId::kStdCall`.
+//!
+//!     This change means that other APIs like `FuncDetail::init()` now require both, calling convention and target
+//!     `Environment`.
+//!
+//!   - `Logging` namespace has been renamed to `Formatter`, which now provides general functionality for formatting
+//!     in AsmJit.
+//!
+//!     Logging namespace should still work, but its use is deprecated. Unfortunately this will be without deprecation
+//!     warnings, so make sure you don't use it.
+//!
+//!   - `Data64`, `Data128`, and `Data256` structs were deprecated and should no longer be used. There is no replacement,
+//!     AsmJit users should simply create their own structures if they need them or use the new repeated embed API in
+//!     emitters, see `BaseEmitter::embedDataArray()`.
+//!
+//! Emitter changes:
+//!
+//!   - `BaseEmitter::emit()` function signature has been changed to accept 3 operands by reference and the rest 3
+//!     operands as a continuous array. This change is purely cosmetic and shouldn't affect users as emit() has many
+//!     overloads that dispatch to the right function.
+//!
+//!   - `x86::Emitter` (Assembler, Builder, Compiler) deprecates embed utilities like `dint8()`, `duint8()`, `duint16()`,
+//!     `dxmm()`, etc... in favor of a new and more powerful `BaseEmitter::embedDataArray()`. This function also allows
+//!     emitting repeated values and/or patterns, which is used by helpers `BaseEmitter::embedUInt8()`, and others...
+//!
+//!   - Validation is now available through `BaseEmitter::DiagnosticOptions`, which can be enabled/disabled through
+//!     `BaseEmitter::addDiagnosticOptions()` and `BaseEmitter::clearDiagnosticOptions()`, respectively. Validation
+//!     options now separate between encoding and Builder/Compiler so it's possible to choose the granularity required.
+//!
+//! Builder changes:
+//!
+//!   - Internal functions for creating nodes were redesigned. They now accept a pointer to the node created as
+//!     a first parameter. These changes should not affect AsmJit users as these functions were used internally.
+//!
+//! Compiler changes:
+//!
+//!   - `FuncCallNode` has been renamed to `InvokeNode`. Additionally, function calls should now use
+//!     `x86::Compiler::invoke()` instead of `call()`. The reason behind this is to remove the confusion between a
+//!     `call` instruction and AsmJit's `call()` intrinsic, which is now `invoke()`.
+//!
+//!   - Creating new nodes also changed. Now the preferred way of invoking a function is to call
+//!     `x86::Compiler::invoke()` where the first argument is `InvokeNode**`. The function now returns an error and
+//!     would call `ErrorHandler` in case of a failure. Error handling was unspecified in the past - the function was
+//!     marked noexcept, but called error handler, which could throw.
+//!
+//!     The reason behind this change is to make the API consistent with other changes and to also make it possible
+//!     to inspect the possible error. In  the previous API it returned a new node or `nullptr` in case of error,
+//!     which the user couldn't inspect unless there was an attached `ErrorHandler`.
+//!
+//! Samples:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! // The basic setup of JitRuntime and CodeHolder changed, use environment()
+//! // instead of codeInfo().
+//! void basicSetup() {
+//!   JitRuntime rt;
+//!   CodeHolder code(rt.environment());
+//! }
+//!
+//! // Calling a function (Compiler) changed - use invoke() instead of call().
+//! void functionInvocation(x86::Compiler& cc) {
+//!   InvokeNode* invokeNode;
+//!   cc.invoke(&invokeNode, targetOperand, FuncSignatureT<...>(...));
+//! }
+//! ```
+
+
+//! \defgroup asmjit_core Core
+//! \brief Globals, code storage, and emitter interface.
+//!
+//! ### Overview
+//!
+//! AsmJit library uses \ref CodeHolder to hold code during code generation and emitters inheriting from \ref
+//! BaseEmitter to emit code. CodeHolder uses containers to manage its data:
+//!
+//!   - \ref Section - stores information about a code or data section.
+//!   - \ref CodeBuffer - stores actual code or data, part of \ref Section.
+//!   - \ref LabelEntry - stores information about a label - its name, offset, section where it belongs to, and
+//!     other bits.
+//!   - \ref LabelLink - stores information about yet unbound label, which was  already used by the assembler.
+//!   - \ref RelocEntry - stores information about a relocation.
+//!   - \ref AddressTableEntry - stores information about an address, which was used in a jump or call. Such
+//!     address may need relocation.
+//!
+//! To generate code you would need to instantiate at least the following classes:
+//!
+//!   - \ref CodeHolder - to hold code during code generation.
+//!   - \ref BaseEmitter - to emit code into \ref CodeHolder.
+//!   - \ref Target (optional) - most likely \ref JitRuntime to keep the generated code in executable memory. \ref
+//!     Target can be customized by inheriting from it.
+//!
+//! There are also other core classes that are important:
+//!
+//!   - \ref Environment - describes where the code will run. Environment brings the concept of target triples or
+//!     tuples into AsmJit, which means that users can specify target architecture, platform, and ABI.
+//!   - \ref TypeId - encapsulates lightweight type functionality that can be used to describe primitive and vector
+//!     types. Types are used by higher level utilities, for example by \ref asmjit_function and \ref asmjit_compiler.
+//!   - \ref CpuInfo - encapsulates CPU information - stores both CPU information and CPU features described by \ref
+//!     CpuFeatures.
+//!
+//! AsmJit also provides global constants:
+//!
+//!   - \ref Globals - namespace that provides global constants.
+//!   - \ref ByteOrder - byte-order constants and functionality.
+//!
+//! \note CodeHolder examples use \ref x86::Assembler as abstract interfaces cannot be used to generate code.
+//!
+//! ### CodeHolder & Emitters
+//!
+//! The example below shows how the mentioned classes interact to generate X86 code:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef int (*Func)(void);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!   a.mov(x86::eax, 1);               // Move one to eax register.
+//!   a.ret();                          // Return from function.
+//!   // ===== x86::Assembler is no longer needed from here and can be destroyed =====
+//!
+//!   Func fn;                          // Holds address to the generated function.
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ===== CodeHolder is no longer needed from here and can be destroyed =====
+//!
+//!   int result = fn();                // Execute the generated code.
+//!   printf("%d\n", result);           // Print the resulting "1".
+//!
+//!   // All classes use RAII, all resources will be released before `main()` returns,
+//!   // the generated function can be, however, released explicitly if you intend to
+//!   // reuse or keep the runtime alive, which you should in a production-ready code.
+//!   rt.release(fn);
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! The example above used \ref x86::Assembler as an emitter. AsmJit provides the following emitters that offer various
+//! levels of abstraction:
+//!
+//!   - \ref asmjit_assembler - Low-level emitter that emits directly to \ref CodeBuffer.
+//!   - \ref asmjit_builder - Low-level emitter that emits to a \ref BaseNode list.
+//!   - \ref asmjit_compiler - High-level emitter that provides register allocation.
+//!
+//! ### Targets and JitRuntime
+//!
+//! AsmJit's \ref Target is an interface that provides basic target abstraction. At the moment AsmJit provides only
+//! one implementation called \ref JitRuntime, which as the name suggests provides JIT code target and execution
+//! runtime. \ref JitRuntime provides all the necessary stuff to implement a simple JIT compiler with basic memory
+//! management. It only provides \ref JitRuntime::add() and \ref JitRuntime::release() functions that are used to
+//! either add code to the runtime or release it. \ref JitRuntime doesn't do any decisions on when the code should be
+//! released, the decision is up to the developer.
+//!
+//! See more at \ref asmjit_virtual_memory group.
+//!
+//! ### More About Environment
+//!
+//! In the previous example the \ref Environment is retrieved from \ref JitRuntime. It's logical as \ref JitRuntime
+//! always returns an \ref Environment that is compatible with the host. For example if your application runs on X86_64
+//! CPU the \ref Environment returned will use \ref Arch::kX64 architecture in contrast to \ref Arch::kX86, which will
+//! be used in 32-bit mode on an X86 target.
+//!
+//! AsmJit allows to setup the \ref Environment manually and to select a different architecture and ABI when necessary.
+//! So let's do something else this time, let's always generate a 32-bit code and print its binary representation. To
+//! do that, we can create our own \ref Environment and initialize it to \ref Arch::kX86.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! int main(int argc, char* argv[]) {
+//!   using namespace asmjit::x86;
+//!
+//!   // Create a custom environment initialized to 32-bit X86 architecture.
+//!   Environment env;
+//!   env.setArch(Arch::kX86);
+//!
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!   code.init(env);                   // Initialize CodeHolder with custom environment.
+//!
+//!   // Generate a 32-bit function that sums 4 floats and looks like:
+//!   //   void func(float* dst, const float* a, const float* b)
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to `code`.
+//!
+//!   a.mov(eax, dword_ptr(esp, 4));    // Load the destination pointer.
+//!   a.mov(ecx, dword_ptr(esp, 8));    // Load the first source pointer.
+//!   a.mov(edx, dword_ptr(esp, 12));   // Load the second source pointer.
+//!
+//!   a.movups(xmm0, ptr(ecx));         // Load 4 floats from [ecx] to XMM0.
+//!   a.movups(xmm1, ptr(edx));         // Load 4 floats from [edx] to XMM1.
+//!   a.addps(xmm0, xmm1);              // Add 4 floats in XMM1 to XMM0.
+//!   a.movups(ptr(eax), xmm0);         // Store the result to [eax].
+//!   a.ret();                          // Return from function.
+//!
+//!   // We have no Runtime this time, it's on us what we do with the code.
+//!   // CodeHolder stores code in Section, which provides some basic properties
+//!   // and CodeBuffer structure. We are interested in section's CodeBuffer.
+//!   //
+//!   // NOTE: The first section is always '.text', it can be retrieved by
+//!   // code.sectionById(0) or simply by code.textSection().
+//!   CodeBuffer& buffer = code.textSection()->buffer();
+//!
+//!   // Print the machine-code generated or do something else with it...
+//!   //   8B4424048B4C24048B5424040F28010F58010F2900C3
+//!   for (size_t i = 0; i < buffer.length; i++)
+//!     printf("%02X", buffer.data[i]);
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Explicit Code Relocation
+//!
+//! In addition to \ref Environment, \ref CodeHolder can be configured to specify a base-address (or a virtual base
+//! address in a linker terminology), which could be static (useful when you know the location where the target's
+//! machine code will be) or dynamic. AsmJit assumes dynamic base-address by default and relocates the code held by
+//! \ref CodeHolder to a user provided address on-demand. To be able to relocate to a user provided address it needs
+//! to store some information about relocations, which is represented by \ref RelocEntry. Relocation entries are only
+//! required if you call external functions from the generated code that cannot be encoded by using a 32-bit
+//! displacement (64-bit displacements are not provided by aby supported architecture).
+//!
+//! There is also a concept called \ref LabelLink - label link is a lightweight data structure that doesn't have any
+//! identifier and is stored in \ref LabelEntry as a single-linked list. Label link represents either unbound yet used
+//! label and cross-sections links (only relevant to code that uses multiple sections). Since crossing sections is
+//! something that cannot be resolved immediately these links persist until offsets of these sections are assigned and
+//! until \ref CodeHolder::resolveUnresolvedLinks() is called. It's an error if you end up with code that has
+//! unresolved label links after flattening. You can verify it by calling \ref CodeHolder::hasUnresolvedLinks(), which
+//! inspects the value returned by \ref CodeHolder::unresolvedLinkCount().
+//!
+//! AsmJit can flatten code that uses multiple sections by assigning each section an incrementing offset that respects
+//! its alignment. Use \ref CodeHolder::flatten() to do that. After the sections are flattened their offsets and
+//! virtual sizes are adjusted to respect each section's buffer size and alignment. The \ref
+//! CodeHolder::resolveUnresolvedLinks() function must be called before relocating the code held by \ref CodeHolder.
+//! You can also flatten your code manually by iterating over all sections and calculating their offsets (relative to
+//! base) by your own algorithm. In that case \ref CodeHolder::flatten() should not be called, however,
+//! \ref CodeHolder::resolveUnresolvedLinks() should be.
+//!
+//! The example below shows how to use a built-in virtual memory allocator \ref JitAllocator instead of using \ref
+//! JitRuntime (just in case you want to use your own memory management) and how to relocate the generated code
+//! into your own memory block - you can use your own virtual memory allocator if you prefer that, but that's OS
+//! specific and not covered by the documentation.
+//!
+//! The following code is similar to the previous one, but implements a function working in both 32-bit and 64-bit
+//! environments:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
+//!
+//! int main() {
+//!   // Create a custom environment that matches the current host environment.
+//!   Environment env = Environment::host();
+//!
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!   code.init(env);                   // Initialize CodeHolder with environment.
+//!
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to `code`.
+//!
+//!   // Signature: 'void func(int* dst, const int* a, const int* b)'.
+//!   x86::Gp dst;
+//!   x86::Gp src_a;
+//!   x86::Gp src_b;
+//!
+//!   // Handle the difference between 32-bit and 64-bit calling conventions
+//!   // (arguments passed through stack vs. arguments passed by registers).
+//!   if (env.is32Bit()) {
+//!     dst   = x86::eax;
+//!     src_a = x86::ecx;
+//!     src_b = x86::edx;
+//!     a.mov(dst  , x86::dword_ptr(x86::esp, 4));
+//!     a.mov(src_a, x86::dword_ptr(x86::esp, 8));
+//!     a.mov(src_b, x86::dword_ptr(x86::esp, 12));
+//!   }
+//!   else {
+//!     if (env.isPlatformWindows()) {
+//!       dst   = x86::rcx;             // First argument  (destination pointer).
+//!       src_a = x86::rdx;             // Second argument (source 'a' pointer).
+//!       src_b = x86::r8;              // Third argument  (source 'b' pointer).
+//!     }
+//!     else {
+//!       dst   = x86::rdi;             // First argument  (destination pointer).
+//!       src_a = x86::rsi;             // Second argument (source 'a' pointer).
+//!       src_b = x86::rdx;             // Third argument  (source 'b' pointer).
+//!     }
+//!   }
+//!
+//!   a.movdqu(x86::xmm0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0.
+//!   a.movdqu(x86::xmm1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1.
+//!   a.paddd(x86::xmm0, x86::xmm1);        // Add 4 ints in XMM1 to XMM0.
+//!   a.movdqu(x86::ptr(dst), x86::xmm0);   // Store the result to [dst].
+//!   a.ret();                              // Return from function.
+//!
+//!   // Even when we didn't use multiple sections AsmJit could insert one section
+//!   // called '.addrtab' (address table section), which would be filled by data
+//!   // required by relocations (absolute jumps and calls). You can omit this code
+//!   // if you are 100% sure your code doesn't contain multiple sections and
+//!   // such relocations. You can use `CodeHolder::hasAddressTable()` to verify
+//!   // whether the address table section does exist.
+//!   code.flatten();
+//!   code.resolveUnresolvedLinks();
+//!
+//!   // After the code was generated it can be relocated manually to any memory
+//!   // location, however, we need to know it's size before we perform memory
+//!   // allocation. `CodeHolder::codeSize()` returns the worst estimated code
+//!   // size in case that relocations are not possible without trampolines (in
+//!   // that case some extra code at the end of the current code buffer is
+//!   // generated during relocation).
+//!   size_t estimatedSize = code.codeSize();
+//!
+//!   // Instead of rolling up our own memory allocator we can use the one AsmJit
+//!   // provides. It's decoupled so you don't need to use `JitRuntime` for that.
+//!   JitAllocator allocator;
+//!
+//!   // Allocate an executable virtual memory and handle a possible failure.
+//!   void* p = allocator.alloc(estimatedSize);
+//!   if (!p)
+//!     return 0;
+//!
+//!   // Now relocate the code to the address provided by the memory allocator.
+//!   // Please note that this DOESN'T COPY anything to `p`. This function will
+//!   // store the address in CodeHolder and use relocation entries to patch the
+//!   // existing code in all sections to respect the base address provided.
+//!   code.relocateToBase((uint64_t)p);
+//!
+//!   // This is purely optional. There are cases in which the relocation can omit
+//!   // unneeded data, which would shrink the size of address table. If that
+//!   // happened the codeSize returned after relocateToBase() would be smaller
+//!   // than the originally `estimatedSize`.
+//!   size_t codeSize = code.codeSize();
+//!
+//!   // This will copy code from all sections to `p`. Iterating over all sections
+//!   // and calling `memcpy()` would work as well, however, this function supports
+//!   // additional options that can be used to also zero pad sections' virtual
+//!   // size, etc.
+//!   //
+//!   // With some additional features, copyFlattenData() does roughly this:
+//!   //   for (Section* section : code.sections())
+//!   //     memcpy((uint8_t*)p + section->offset(),
+//!   //            section->data(),
+//!   //            section->bufferSize());
+//!   code.copyFlattenedData(p, codeSize, CopySectionFlags::kPadSectionBuffer);
+//!
+//!   // Execute the generated function.
+//!   int inA[4] = { 4, 3, 2, 1 };
+//!   int inB[4] = { 1, 5, 2, 8 };
+//!   int out[4];
+//!
+//!   // This code uses AsmJit's ptr_as_func<> to cast between void* and SumIntsFunc.
+//!   ptr_as_func<SumIntsFunc>(p)(out, inA, inB);
+//!
+//!   // Prints {5 8 4 9}
+//!   printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
+//!
+//!   // Release 'p' is it's no longer needed. It will be destroyed with 'vm'
+//!   // instance anyway, but it's a good practice to release it explicitly
+//!   // when you know that the function will not be needed anymore.
+//!   allocator.release(p);
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! If you know the base-address in advance (before the code generation) it can be passed as a second argument to
+//! \ref CodeHolder::init(). In that case the Assembler will know the absolute position of each instruction and
+//! would be able to use it during instruction encoding to prevent relocations where possible. The following example
+//! shows how to configure the base address:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! void initializeCodeHolder(CodeHolder& code) {
+//!   Environment env = Environment::host();
+//!   uint64_t baseAddress = uint64_t(0x1234);
+//!
+//!   // initialize CodeHolder with environment and custom base address.
+//!   code.init(env, baseAddress);
+//! }
+//! ```
+//!
+//! ### Label Offsets and Links
+//!
+//! When a label that is not yet bound is used by the Assembler, it creates a \ref LabelLink, which is then added to
+//! a \ref LabelEntry. These links are also created if a label is used in a different section than in which it was
+//! bound. Let's examine some functions that can be used to check whether there are any unresolved links.
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! void labelLinksExample(CodeHolder& code, const Label& label) {
+//!   // Tests whether the `label` is bound.
+//!   bool isBound = code.isLabelBound(label);
+//!   printf("Label %u is %s\n", label.id(), isBound ? "bound" : "not bound");
+//!
+//!   // Returns true if the code contains either referenced, but unbound
+//!   // labels, or cross-section label links that are not resolved yet.
+//!   bool hasUnresolved = code.hasUnresolvedLinks();  // Boolean answer.
+//!   size_t nUnresolved = code.unresolvedLinkCount(); // Count of unresolved links.
+//!
+//!   printf("Number of unresolved links: %zu\n", nUnresolved);
+//! }
+//! ```
+//!
+//! There is no function that would return the number of unbound labels as this is completely unimportant from
+//! CodeHolder's perspective. If a label is not used then it doesn't matter whether it's bound or not, only actually
+//! used labels matter. After a Label is bound it's possible to query its offset offset relative to the start of the
+//! section where it was bound:
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! void labelOffsetExample(CodeHolder& code, const Label& label) {
+//!   // Label offset is known after it's bound. The offset provided is relative
+//!   // to the start of the section, see below for alternative. If the given
+//!   // label is not bound the offset returned will be zero. It's recommended
+//!   // to always check whether the label is bound before using its offset.
+//!   uint64_t sectionOffset = code.labelOffset(label);
+//!   printf("Label offset relative to section: %llu\n", (unsigned long long)sectionOffset);
+//!
+//!   // If you use multiple sections and want the offset relative to the base.
+//!   // NOTE: This function expects that the section has already an offset and
+//!   // the label-link was resolved (if this is not true you will still get an
+//!   // offset relative to the start of the section).
+//!   uint64_t baseOffset = code.labelOffsetFromBase(label);
+//!   printf("Label offset relative to base: %llu\n", (unsigned long long)baseOffset);
+//! }
+//! ```
+//!
+//! ### Sections
+//!
+//! AsmJit allows to create multiple sections within the same \ref CodeHolder. A test-case
+//! [asmjit_test_x86_sections.cpp](https://github.com/asmjit/asmjit/blob/master/test/asmjit_test_x86_sections.cpp)
+//! can be used as a reference point although the following example should also provide a useful insight:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! void sectionsExample(CodeHolder& code) {
+//!   // Text section is always provided as the first section.
+//!   Section* text = code.textSection(); // or code.sectionById(0);
+//!
+//!   // To create another section use CodeHolder::newSection().
+//!   Section* data;
+//!   Error err = code.newSection(&data,
+//!     ".data",                // Section name
+//!     SIZE_MAX,               // Name length if the name is not null terminated (or SIZE_MAX).
+//!     SectionFlags::kNone,    // Section flags, see SectionFlags.
+//!     8,                      // Section alignment, must be power of 2.
+//!     0);                     // Section order value (optional, default 0).
+//!
+//!   // When you switch sections in Assembler, Builder, or Compiler the cursor
+//!   // will always move to the end of that section. When you create an Assembler
+//!   // the cursor would be placed at the end of the first (.text) section, which
+//!   // is initially empty.
+//!   x86::Assembler a(&code);
+//!   Label L_Data = a.newLabel();
+//!
+//!   a.mov(x86::eax, x86::ebx); // Emits in .text section.
+//!
+//!   a.section(data);           // Switches to the end of .data section.
+//!   a.bind(L_Data);            // Binds label in this .data section
+//!   a.db(0x01);                // Emits byte in .data section.
+//!
+//!   a.section(text);           // Switches to the end of .text section.
+//!   a.add(x86::ebx, x86::eax); // Emits in .text section.
+//!
+//!   // References a label in .text section, which was bound in .data section.
+//!   // This would create a LabelLink even when the L_Data is already bound,
+//!   // because the reference crosses sections. See below...
+//!   a.lea(x86::rsi, x86::ptr(L_Data));
+//! }
+//! ```
+//!
+//! The last line in the example above shows that a LabelLink would be created even for bound labels that cross
+//! sections. In this case a referenced label was bound in another section, which means that the link couldn't be
+//! resolved at that moment. If your code uses sections, but you wish AsmJit to flatten these sections (you don't
+//! plan to flatten them manually) then there is an API for that.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // ... (continuing the previous example) ...
+//! void sectionsExampleContinued(CodeHolder& code) {
+//!   // Suppose we have some code that contains multiple sections and
+//!   // we would like to flatten it by using AsmJit's built-in API:
+//!   Error err = code.flatten();
+//!   if (err) {
+//!     // There are many reasons it can fail, so always handle a possible error.
+//!     printf("Failed to flatten the code: %s\n", DebugUtils::errorAsString(err));
+//!     exit(1);
+//!   }
+//!
+//!   // After flattening all sections would contain assigned offsets
+//!   // relative to base. Offsets are 64-bit unsigned integers so we
+//!   // cast them to `size_t` for simplicity. On 32-bit targets it's
+//!   // guaranteed that the offset cannot be greater than `2^32 - 1`.
+//!   printf("Data section offset %zu", size_t(data->offset()));
+//!
+//!   // The flattening doesn't resolve unresolved label links, this
+//!   // has to be done manually as flattening can be done separately.
+//!   err = code.resolveUnresolvedLinks();
+//!   if (err) {
+//!     // This is the kind of error that should always be handled...
+//!     printf("Failed to resolve label links: %s\n", DebugUtils::errorAsString(err));
+//!     exit(1);
+//!   }
+//!
+//!   if (code.hasUnresolvedLinks()) {
+//!     // This would mean either unbound label or some other issue.
+//!     printf("The code has %zu unbound labels\n", code.unresovedLinkCount());
+//!     exit(1);
+//!   }
+//! }
+//! ```
+
+
+//! \defgroup asmjit_assembler Assembler
+//! \brief Assembler interface and operands.
+//!
+//! ### Overview
+//!
+//! AsmJit's Assembler is used to emit machine code directly into a \ref CodeBuffer. In general, code generation
+//! with assembler requires the knowledge of the following:
+//!
+//!   - \ref BaseAssembler and architecture-specific assemblers:
+//!     - \ref x86::Assembler - Assembler specific to X86 architecture
+//!   - \ref Operand and its variations:
+//!     - \ref BaseReg - Base class for a register operand, inherited by:
+//!        - \ref x86::Reg - Register operand specific to X86 architecture.
+//!     - \ref BaseMem - Base class for a memory operand, inherited by:
+//!        - \ref x86::Mem - Memory operand specific to X86 architecture.
+//!     - \ref Imm - Immediate (value) operand.
+//!     - \ref Label - Label operand.
+//!
+//! \note Assembler examples use \ref x86::Assembler as abstract interfaces cannot be used to generate code.
+//!
+//! ### Operand Basics
+//!
+//! Let's start with operands. \ref Operand is a data structure that defines a data layout of any operand. It can be
+//! inherited, but any class inheriting it cannot add any members to it, only the existing layout can be reused.
+//! AsmJit allows to construct operands dynamically, to store them, and to query a complete information about them
+//! at run-time. Operands are small (always 16 bytes per \ref Operand) and can be copied and passed by value. Please
+//! never allocate individual operands dynamically by using a `new` keyword - it would work, but then you would have
+//! to be responsible for deleting such operands. In AsmJit operands are always part of some other data structures
+//! like \ref InstNode, which is part of \ref asmjit_builder tool.
+//!
+//! Operands contain only identifiers, but not pointers to any code-generation data. For example \ref Label operand
+//! only provides label identifier, but not a pointer to \ref LabelEntry structure. In AsmJit such IDs are used to
+//! link stuff together without having to deal with pointers.
+//!
+//! AsmJit's operands all inherit from a base class called \ref Operand. Operands have the following properties that
+//! are commonly accessible by getters and setters:
+//!
+//!   - \ref Operand - Base operand, which only provides accessors that are common to all operand types.
+//!   - \ref BaseReg - Describes either physical or virtual register. Physical registers have id that matches the
+//!     target's machine id directly whereas virtual registers must be allocated into physical registers by a register
+//!     allocator pass. Register operand provides:
+//!     - Register Type (\ref RegType) - Unique id that describes each possible register provided by the target
+//!       architecture - for example X86 backend provides general purpose registers (GPB-LO, GPB-HI, GPW, GPD, and GPQ)
+//!       and all types of other registers like K, MM, BND, XMM, YMM, ZMM, and TMM.
+//!     - Register Group (\ref RegGroup) - Groups multiple register types under a single group - for example all
+//!       general-purpose registers (of all sizes) on X86 are part of \ref RegGroup::kGp and all SIMD registers
+//!      (XMM, YMM, ZMM) are part of \ref RegGroup::kVec.
+//!     - Register Size - Contains the size of the register in bytes. If the size depends on the mode (32-bit vs
+//!       64-bit) then generally the higher size is used (for example RIP register has size 8 by default).
+//!     - Register Id - Contains physical or virtual id of the register.
+//!   - \ref BaseMem - Used to reference a memory location. Memory operand provides:
+//!     - Base Register - A base register type and id (physical or virtual).
+//!     - Index Register - An index register type and id (physical or virtual).
+//!     - Offset - Displacement or absolute address to be referenced (32-bit if base register is used and 64-bit if
+//!       base register is not used).
+//!     - Flags that can describe various architecture dependent information (like scale and segment-override on X86).
+//!   - \ref Imm - Immediate values are usually part of instructions (encoded within the instruction itself) or data.
+//!   - \ref Label - used to reference a location in code or data. Labels must be created by the \ref BaseEmitter or
+//!     by \ref CodeHolder. Each label has its unique id per \ref CodeHolder instance.
+//!
+//! ### Operand Manipulation
+//!
+//! AsmJit allows to construct operands dynamically, to store them, and to query a complete information about them at
+//! run-time. Operands are small (always 16 bytes per `Operand`) and should be always copied (by value) if you intend
+//! to store them (don't create operands by using `new` keyword, it's not recommended). Operands are safe to be passed
+//! to `memcpy()` and `memset()`, which becomes handy when working with arrays of operands. If you set all members of
+//! an \ref Operand to zero the operand would become NONE operand, which is the same as a default constructed Operand.
+//!
+//! The example below illustrates how operands can be used and modified even without using any other code generation
+//! classes. The example uses X86 architecture-specific operands.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Registers can be copied, it's a common practice.
+//! x86::Gp dstRegByValue() { return x86::ecx; }
+//!
+//! void usingOperandsExample(x86::Assembler& a) {
+//!   // Gets `ecx` register returned by a function.
+//!   x86::Gp dst = dstRegByValue();
+//!   // Gets `rax` register directly from the provided `x86` namespace.
+//!   x86::Gp src = x86::rax;
+//!   // Constructs `r10` dynamically.
+//!   x86::Gp idx = x86::gpq(10);
+//!   // Constructs [src + idx] memory address - referencing [rax + r10].
+//!   x86::Mem m = x86::ptr(src, idx);
+//!
+//!   // Examine `m`: Returns `RegType::kX86_Gpq`.
+//!   m.indexType();
+//!   // Examine `m`: Returns 10 (`r10`).
+//!   m.indexId();
+//!
+//!   // Reconstruct `idx` stored in mem:
+//!   x86::Gp idx_2 = x86::Gp::fromTypeAndId(m.indexType(), m.indexId());
+//!
+//!   // True, `idx` and idx_2` are identical.
+//!   idx == idx_2;
+//!
+//!   // Possible - op will still be the same as `m`.
+//!   Operand op = m;
+//!   // True (can be casted to BaseMem or architecture-specific Mem).
+//!   op.isMem();
+//!
+//!   // True, `op` is just a copy of `m`.
+//!   m == op;
+//!
+//!   // Static cast is fine and valid here.
+//!   static_cast<BaseMem&>(op).addOffset(1);
+//!   // However, using `as<T>()` to cast to a derived type is preferred.
+//!   op.as<BaseMem>().addOffset(1);
+//!   // False, `op` now points to [rax + r10 + 2], which is not [rax + r10].
+//!   m == op;
+//!
+//!   // Emitting 'mov' - type safe way.
+//!   a.mov(dst, m);
+//!   // Not possible, `mov` doesn't provide mov(x86::Gp, Operand) overload.
+//!   a.mov(dst, op);
+//!
+//!   // Type-unsafe, but possible.
+//!   a.emit(x86::Inst::kIdMov, dst, m);
+//!   // Also possible, `emit()` is typeless and can be used with raw Operand.
+//!   a.emit(x86::Inst::kIdMov, dst, op);
+//! }
+//! ```
+//!
+//! Some operands have to be created explicitly by emitters. For example labels must be created by \ref
+//! BaseEmitter::newLabel(), which creates a label entry and returns a \ref Label operand with the id that refers
+//! to it. Such label then can be used by emitters.
+//!
+//! ### Memory Operands
+//!
+//! Some architectures like X86 provide a complex memory addressing model that allows to encode addresses having a
+//! BASE register, INDEX register with a possible scale (left shift), and displacement (called offset in AsmJit).
+//! Memory address on X86 can also specify memory segment (segment-override in X86 terminology) and some instructions
+//! (gather / scatter) require INDEX to be a \ref x86::Vec register instead of a general-purpose register.
+//!
+//! AsmJit allows to encode and work with all forms of addresses mentioned and implemented by X86. In addition, it
+//! also allows to construct absolute 64-bit memory address operands, which is only allowed in one form of 'mov'
+//! instruction.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void testX86Mem() {
+//!   // Makes it easier to access x86 stuff...
+//!   using namespace asmjit::x86;
+//!
+//!   // BASE + OFFSET.
+//!   Mem a = ptr(rax);                 // a = [rax]
+//!   Mem b = ptr(rax, 15);             // b = [rax + 15]
+//!
+//!   // BASE + INDEX << SHIFT - Shift is in BITS as used by X86!
+//!   Mem c = ptr(rax, rbx);            // c = [rax + rbx]
+//!   Mem d = ptr(rax, rbx, 2);         // d = [rax + rbx << 2]
+//!   Mem e = ptr(rax, rbx, 2, 15);     // e = [rax + rbx << 2 + 15]
+//!
+//!   // BASE + VM (Vector Index) (encoded as MOD+VSIB).
+//!   Mem f = ptr(rax, xmm1);           // f = [rax + xmm1]
+//!   Mem g = ptr(rax, xmm1, 2);        // g = [rax + xmm1 << 2]
+//!   Mem h = ptr(rax, xmm1, 2, 15);    // h = [rax + xmm1 << 2 + 15]
+//!
+//!   // Absolute address:
+//!   uint64_t addr = (uint64_t)0x1234;
+//!   Mem i = ptr(addr);                // i = [0x1234]
+//!   Mem j = ptr(addr, rbx);           // j = [0x1234 + rbx]
+//!   Mem k = ptr(addr, rbx, 2);        // k = [0x1234 + rbx << 2]
+//!
+//!   // LABEL - Will be encoded as RIP (64-bit) or absolute address (32-bit).
+//!   Label L = ...;
+//!   Mem m = ptr(L);                   // m = [L]
+//!   Mem n = ptr(L, rbx);              // n = [L + rbx]
+//!   Mem o = ptr(L, rbx, 2);           // o = [L + rbx << 2]
+//!   Mem p = ptr(L, rbx, 2, 15);       // p = [L + rbx << 2 + 15]
+//!
+//!   // RIP - 64-bit only (RIP can't use INDEX).
+//!   Mem q = ptr(rip, 24);             // q = [rip + 24]
+//! }
+//! ```
+//!
+//! Memory operands can optionally contain memory size. This is required by instructions where the memory size cannot
+//! be deduced from other operands, like `inc` and `dec` on X86:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void testX86Mem() {
+//!   // The same as: dword ptr [rax + rbx].
+//!   x86::Mem a = x86::dword_ptr(rax, rbx);
+//!
+//!   // The same as: qword ptr [rdx + rsi << 0 + 1].
+//!   x86::Mem b = x86::qword_ptr(rdx, rsi, 0, 1);
+//! }
+//! ```
+//!
+//! Memory operands provide API that can be used to access its properties:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void testX86Mem() {
+//!   // The same as: dword ptr [rax + 12].
+//!   x86::Mem mem = x86::dword_ptr(rax, 12);
+//!
+//!   mem.hasBase();                    // true.
+//!   mem.hasIndex();                   // false.
+//!   mem.size();                       // 4.
+//!   mem.offset();                     // 12.
+//!
+//!   mem.setSize(0);                   // Sets the size to 0 (makes it sizeless).
+//!   mem.addOffset(-1);                // Adds -1 to the offset and makes it 11.
+//!   mem.setOffset(0);                 // Sets the offset to 0.
+//!   mem.setBase(rcx);                 // Changes BASE to RCX.
+//!   mem.setIndex(rax);                // Changes INDEX to RAX.
+//!   mem.hasIndex();                   // true.
+//! }
+//! // ...
+//! ```
+//!
+//! Making changes to memory operand is very comfortable when emitting loads
+//! and stores:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void testX86Mem(CodeHolder& code) {
+//!   x86::Assembler a(code);           // Your initialized x86::Assembler.
+//!   x86::Mem mSrc = x86::ptr(eax);    // Construct [eax] memory operand.
+//!
+//!   // One way of emitting bunch of loads is to use `mem.adjusted()`, which
+//!   // returns a new memory operand and keeps the source operand unchanged.
+//!   a.movaps(x86::xmm0, mSrc);        // No adjustment needed to load [eax].
+//!   a.movaps(x86::xmm1, mSrc.adjusted(16)); // Loads from [eax + 16].
+//!   a.movaps(x86::xmm2, mSrc.adjusted(32)); // Loads from [eax + 32].
+//!   a.movaps(x86::xmm3, mSrc.adjusted(48)); // Loads from [eax + 48].
+//!
+//!   // ... do something with xmm0-3 ...
+//!
+//!   // Another way of adjusting memory is to change the operand in-place.
+//!   // If you want to keep the original operand you can simply clone it.
+//!   x86::Mem mDst = mSrc.clone();     // Clone mSrc.
+//!
+//!   a.movaps(mDst, x86::xmm0);        // Stores xmm0 to [eax].
+//!   mDst.addOffset(16);               // Adds 16 to `mDst`.
+//!
+//!   a.movaps(mDst, x86::xmm1);        // Stores to [eax + 16] .
+//!   mDst.addOffset(16);               // Adds 16 to `mDst`.
+//!
+//!   a.movaps(mDst, x86::xmm2);        // Stores to [eax + 32].
+//!   mDst.addOffset(16);               // Adds 16 to `mDst`.
+//!
+//!   a.movaps(mDst, x86::xmm3);        // Stores to [eax + 48].
+//! }
+//! ```
+//!
+//! ### Assembler Examples
+//!
+//!   - \ref x86::Assembler provides many X86/X64 examples.
+
+
+//! \defgroup asmjit_builder Builder
+//! \brief Builder interface, nodes, and passes.
+//!
+//! ### Overview
+//!
+//! Both \ref BaseBuilder and \ref BaseCompiler interfaces describe emitters that emit into a representation that
+//! allows further processing. The code stored in such representation is completely safe to be patched, simplified,
+//! reordered, obfuscated, removed, injected, analyzed, or processed some other way. Each instruction, label,
+//! directive, or other building block is stored as \ref BaseNode (or derived class like \ref InstNode or \ref
+//! LabelNode) and contains all the information necessary to pass that node later to the assembler.
+//!
+//! \ref BaseBuilder is an emitter that inherits from \ref BaseEmitter interface. It was designed to provide a maximum
+//! compatibility with the existing \ref BaseAssembler emitter so users can move from assembler to builder when needed,
+//! for example to implement post-processing, which is not possible with Assembler.
+//!
+//! ### Builder Nodes
+//!
+//! \ref BaseBuilder doesn't generate machine code directly, it uses an intermediate representation based on nodes,
+//! however, it allows to serialize to \ref BaseAssembler when the code is ready to be encoded.
+//!
+//! There are multiple node types used by both \ref BaseBuilder and \ref BaseCompiler :
+//!
+//!   - Basic nodes:
+//!     - \ref BaseNode - Base class for all nodes.
+//!     - \ref InstNode - Represents an instruction node.
+//!     - \ref AlignNode - Represents an alignment directive (.align).
+//!     - \ref LabelNode - Represents a location where to bound a \ref Label.
+//!
+//!   - Data nodes:
+//!     - \ref EmbedDataNode - Represents data.
+//!     - \ref EmbedLabelNode - Represents \ref Label address embedded as data.
+//!     - \ref EmbedLabelDeltaNode - Represents a difference of two labels embedded in data.
+//!     - \ref ConstPoolNode - Represents a constant pool data embedded as data.
+//!
+//!   - Informative nodes:
+//!     - \ref CommentNode - Represents a comment string, doesn't affect code generation.
+//!     - \ref SentinelNode - A marker that can be used to remember certain position in code or data, doesn't affect
+//!       code generation. Used by \ref FuncNode to mark the end of a function.
+//!
+//!   - Other nodes are provided by \ref asmjit_compiler infrastructure.
+//!
+//! ### Builder Examples
+//!
+//!   - \ref x86::Builder provides many X86/X64 examples.
+
+
+//! \defgroup asmjit_compiler Compiler
+//! \brief Compiler interface.
+//!
+//! ### Overview
+//!
+//! \ref BaseCompiler is a high-level interface, which provides register allocation and support for defining and
+//! invoking functions, built on top of \ref BaseBuilder interface At the moment it's the easiest way of generating
+//! code in AsmJit as most architecture and OS specifics is properly abstracted and handled by AsmJit automatically.
+//! However, abstractions also mean restrictions, which means that \ref BaseCompiler has more limitations than \ref
+//! BaseAssembler or \ref BaseBuilder.
+//!
+//! Since \ref BaseCompiler provides register allocation it also establishes the concept of functions - a function
+//! in Compiler sense is a unit in which virtual registers are allocated into physical registers by the register
+//! allocator. In addition, it enables to use such virtual registers in function invocations.
+//!
+//! \ref BaseCompiler automatically handles function calling conventions. It's still architecture dependent, but
+//! makes the code generation much easies. Functions are essential; the first-step to generate some code is to define
+//! a signature of the function to be generated (before generating the function body itself). Function arguments and
+//! return value(s) are handled by assigning virtual registers to them. Similarly, function calls are handled the same
+//! way.
+//!
+//! ### Compiler Nodes
+//!
+//! \ref BaseCompiler adds some nodes that are required for function generation and invocation:
+//!
+//!   - \ref FuncNode - Represents a function definition.
+//!   - \ref FuncRetNode - Represents a function return.
+//!   - \ref InvokeNode - Represents a function invocation.
+//!
+//! \ref BaseCompiler also makes the use of passes (\ref Pass) and automatically adds an architecture-dependent
+//! register allocator pass to the list of passes when attached to \ref CodeHolder.
+//!
+//! ### Compiler Examples
+//!
+//!   - \ref x86::Compiler provides many X86/X64 examples.
+//!
+//! ### Compiler Tips
+//!
+//! Users of AsmJit have done mistakes in the past, this section should provide some useful tips for beginners:
+//!
+//!   - Virtual registers in compiler are bound to a single function. At the moment the implementation doesn't
+//!     care whether a single virtual register is used in multiple functions, but it sees it as two independent
+//!     virtual registers in that case. This means that virtual registers cannot be used to implement global
+//!     variables. Global variables are basically memory addresses which functions can read from and write to,
+//!     and they have to be implemented in the same way.
+//!
+//!   - Compiler provides a useful debugging functionality, which can be turned on through \ref FormatFlags. Use
+//!     \ref Logger::addFlags() to turn on additional logging features when using Compiler.
+
+
+//! \defgroup asmjit_function Function
+//! \brief Function definitions.
+//!
+//! ### Overview
+//!
+//! AsmJit provides functionality that can be used to define function signatures and to calculate automatically
+//! optimal function frame that can be used directly by a prolog and epilog insertion. This feature was exclusive
+//! to AsmJit's Compiler for a very long time, but was abstracted out and is now available for all users regardless
+//! of the emitter they use. The following use cases are possible:
+//!
+//!   - Calculate function frame before the function is generated - this is the only way available to \ref
+//!     BaseAssembler users and it will be described in this section.
+//!
+//!   - Calculate function frame after the function is generated - this way is generally used by \ref BaseBuilder
+//!     and \ref BaseCompiler emitters and this way is generally described in \ref asmjit_compiler section.
+//!
+//! The following concepts are used to describe and create functions in AsmJit:
+//!
+//!   - \ref TypeId - Type-id is an 8-bit value that describes a platform independent type as we know from C/C++.
+//!     It provides abstractions for most common types like `int8_t`, `uint32_t`, `uintptr_t`, `float`, `double`,
+//!     and all possible vector types to match ISAs up to AVX512. \ref TypeId was introduced originally for \ref
+//!     asmjit_compiler, but it's now used by \ref FuncSignature as well.
+//!
+//!   - \ref CallConv - Describes a calling convention - this class contains instructions to assign registers and
+//!     stack addresses to function arguments and return value(s), but doesn't specify any function signature itself.
+//!     Calling conventions are architecture and OS dependent.
+//!
+//!   - \ref FuncSignature - Describes a function signature, for example `int func(int, int)`. FuncSignature contains
+//!     a function calling convention id, return value type, and function arguments. The signature itself is platform
+//!     independent and uses \ref TypeId to describe types of function arguments and function return value(s).
+//!
+//!   - \ref FuncDetail - Architecture and ABI dependent information that describes \ref CallConv and expanded \ref
+//!     FuncSignature. Each function argument and return value is represented as \ref FuncValue that contains the
+//!     original \ref TypeId enriched with additional information that specifies whether the value is passed or
+//!     returned by register (and which register) or by stack. Each value also contains some other metadata that
+//!     provide additional information required to handle it properly (for example whether a vector is passed
+//!     indirectly by a pointer as required by WIN64 calling convention).
+//!
+//!   - \ref FuncFrame - Contains information about the function frame that can be used by prolog/epilog inserter
+//!     (PEI). Holds call stack size size and alignment, local stack size and alignment, and various attributes that
+//!     describe how prolog and epilog should be constructed. `FuncFrame` doesn't know anything about function's
+//!     arguments or return values, it hold only information necessary to create a valid and ABI conforming function
+//!     prologs and epilogs.
+//!
+//!   - \ref FuncArgsAssignment - A helper class that can be used to reassign function arguments into user specified
+//!     registers. It's architecture and ABI dependent mapping from function arguments described by \ref CallConv
+//!     and \ref FuncDetail into registers specified by the user.
+//!
+//! It's a lot of concepts where each represents one step in a function frame calculation. It can be used to create
+//! function prologs, epilogs, and also to calculate information necessary to perform function calls.
+
+
+//! \defgroup asmjit_logging Logging
+//! \brief Logging and formatting.
+//!
+//! ### Overview
+//!
+//! The initial phase of a project that generates machine code is not always smooth. Failure cases are common not just
+//! at the beginning phase, but also during the development or refactoring. AsmJit provides logging functionality to
+//! address this issue. AsmJit does already a good job with function overloading to prevent from emitting unencodable
+//! instructions, but it can't prevent from emitting machine code that is correct at instruction level, but doesn't
+//! work when it's executed asa whole. Logging has always been an important part of AsmJit's infrastructure and looking
+//! at logs can sometimes reveal code generation issues quickly.
+//!
+//! AsmJit provides API for logging and formatting:
+//!
+//!   - \ref Logger - A logger that you can pass to \ref CodeHolder and all emitters that inherit from \ref BaseEmitter.
+//!
+//!   - \ref FormatOptions - Formatting options that can change how instructions and operands are formatted.
+//!
+//!   - \ref Formatter - A namespace that provides functions that can format input data like \ref Operand, \ref BaseReg,
+//!     \ref Label, and \ref BaseNode into \ref String.
+//!
+//! AsmJit's \ref Logger serves the following purposes:
+//!
+//!   - Provides a basic foundation for logging.
+//!
+//!   - Abstract class leaving the implementation on users. The following built-in implementations are provided for
+//!     simplicity:
+//!
+//!     - \ref FileLogger implements logging into a standard `FILE` stream.
+//!     - \ref StringLogger serializes all logs into a \ref String instance.
+//!
+//! AsmJit's \ref FormatOptions provides the following to customize the formatting of instructions and operands through:
+//!
+//!   - \ref FormatFlags
+//!   - \ref FormatIndentationGroup
+//!
+//! ### Logging
+//!
+//! A \ref Logger is typically attached to a \ref CodeHolder, which propagates it to all attached emitters
+//! automatically. The example below illustrates how to use \ref FileLogger that outputs to standard output:
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! int main() {
+//!   JitRuntime rt;               // Runtime specialized for JIT code execution.
+//!   FileLogger logger(stdout);   // Logger should always survive CodeHolder.
+//!
+//!   CodeHolder code;             // Holds code and relocation information.
+//!   code.init(rt.environment()); // Initialize to the same arch as JIT runtime.
+//!   code.setLogger(&logger);     // Attach the `logger` to `code` holder.
+//!
+//!   // ... code as usual, everything emitted will be logged to `stdout` ...
+//!   return 0;
+//! }
+//! ```
+//!
+//! If output to FILE stream is not desired it's possible to use \ref StringLogger, which concatenates everything
+//! into a multi-line string:
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//! #include <utility>
+//!
+//! using namespace asmjit;
+//!
+//! int main() {
+//!   JitRuntime rt;               // Runtime specialized for JIT code execution.
+//!   StringLogger logger;         // Logger should always survive CodeHolder.
+//!
+//!   CodeHolder code;             // Holds code and relocation information.
+//!   code.init(rt.environment()); // Initialize to the same arch as JIT runtime.
+//!   code.setLogger(&logger);     // Attach the `logger` to `code` holder.
+//!
+//!   // ... code as usual, logging will be concatenated to logger string  ...
+//!
+//!   // You can either use the string from StringLogger directly or you can
+//!   // move it. Logger::data() returns its content as null terminated char[].
+//!   printf("Logger content: %s\n", logger.data());
+//!
+//!   // It can be moved into your own string like this:
+//!   String content = std::move(logger.content());
+//!   printf("The same content: %s\n", content.data());
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Formatting
+//!
+//! AsmJit uses \ref Formatter to format inputs that are then passed to \ref Logger. Formatting is public and can be
+//! used by AsmJit users as well. The most important thing to know regarding formatting is that \ref Formatter always
+//! appends to the output string, so it can be used to build complex strings without having to concatenate
+//! intermediate strings.
+//!
+//! The first example illustrates how to format operands:
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! void logOperand(Arch arch, const Operand_& op) {
+//!   // The emitter is optional (named labels and virtual registers need it).
+//!   BaseEmitter* emitter = nullptr;
+//!
+//!   // No flags by default.
+//!   FormatFlags formatFlags = FormatFlags::kNone;
+//!
+//!   StringTmp<128> sb;
+//!   Formatter::formatOperand(sb, formatFlags, emitter, arch, op);
+//!   printf("%s\n", sb.data());
+//! }
+//!
+//! void formattingExample() {
+//!   using namespace x86;
+//!
+//!   // Architecture is not part of operand, it must be passed explicitly.
+//!   // Format flags. We pass it explicitly also to 'logOperand' to make
+//!   // compatible with what AsmJit normally does.
+//!   Arch arch = Arch::kX64;
+//!
+//!   log(arch, rax);                    // Prints 'rax'.
+//!   log(arch, ptr(rax, rbx, 2));       // Prints '[rax + rbx * 4]`.
+//!   log(arch, dword_ptr(rax, rbx, 2)); // Prints 'dword [rax + rbx * 4]`.
+//!   log(arch, imm(42));                // Prints '42'.
+//! }
+//! ```
+//!
+//! Next example illustrates how to format whole instructions:
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//! #include <utility>
+//!
+//! using namespace asmjit;
+//!
+//! template<typename... Args>
+//! void logInstruction(Arch arch, const BaseInst& inst, Args&&... args) {
+//!   // The emitter is optional (named labels and virtual registers need it).
+//!   BaseEmitter* emitter = nullptr;
+//!
+//!   // No flags by default.
+//!   FormatFlags formatFlags = FormatFlags::kNone;
+//!
+//!   // The formatter expects operands in an array.
+//!   Operand_ operands { std::forward<Args>(args)... };
+//!
+//!   StringTmp<128> sb;
+//!   Formatter::formatInstruction(
+//!     sb, formatFlags, emitter, arch, inst, operands, sizeof...(args));
+//!   printf("%s\n", sb.data());
+//! }
+//!
+//! void formattingExample() {
+//!   using namespace x86;
+//!
+//!   // Architecture is not part of operand, it must be passed explicitly.
+//!   // Format flags. We pass it explicitly also to 'logOperand' to make
+//!   // compatible with what AsmJit normally does.
+//!   Arch arch = Arch::kX64;
+//!
+//!   // Prints 'mov rax, rcx'.
+//!   logInstruction(arch, BaseInst(Inst::kIdMov), rax, rcx);
+//!
+//!   // Prints 'vaddpd zmm0, zmm1, [rax] {1to8}'.
+//!   logInstruction(arch,
+//!                  BaseInst(Inst::kIdVaddpd),
+//!                  zmm0, zmm1, ptr(rax)._1toN());
+//!
+//!   // BaseInst abstracts instruction id, instruction options, and extraReg.
+//!   // Prints 'lock add [rax], rcx'.
+//!   logInstruction(arch,
+//!                  BaseInst(Inst::kIdAdd, InstOptions::kX86_Lock),
+//!                  x86::ptr(rax), rcx);
+//!
+//!   // Similarly an extra register (like AVX-512 selector) can be used.
+//!   // Prints 'vaddpd zmm0 {k2} {z}, zmm1, [rax]'.
+//!   logInstruction(arch,
+//!                  BaseInst(Inst::kIdAdd, InstOptions::kX86_ZMask, k2),
+//!                  zmm0, zmm1, ptr(rax));
+//! }
+//! ```
+//!
+//! And finally, the example below illustrates how to use a built-in function to format the content of
+//! \ref BaseBuilder, which consists of nodes:
+//!
+//! ```
+//! #include <asmjit/core.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! void formattingExample(BaseBuilder* builder) {
+//!   FormatFlags formatFlags = FormatFlags::kNone;
+//!
+//!   // This also shows how temporary strings can be used.
+//!   StringTmp<512> sb;
+//!
+//!   // FormatNodeList requires the String for output, formatting flags, which
+//!   // were zero (no extra flags), and the builder instance, which we have
+//!   // provided. An overloaded version also exists, which accepts begin and
+//!   // and end nodes, which can be used to only format a range of nodes.
+//!   Formatter::formatNodeList(sb, formatFlags, builder);
+//!
+//!   // You can do whatever else with the string, it's always null terminated,
+//!   // so it can be passed to C functions like printf().
+//!   printf("%s\n", sb.data());
+//! }
+//! ```
+
+
+//! \defgroup asmjit_error_handling Error Handling
+//! \brief Error handling.
+//!
+//! ### Overview
+//!
+//! AsmJit uses error codes to represent and return errors. Every function that can fail returns an \ref Error code.
+//! Exceptions are never thrown by AsmJit itself even in extreme conditions like out-of-memory, but it's possible to
+//! override \ref ErrorHandler::handleError() to throw, in that case no error will be returned and exception will be
+//! thrown instead. All functions where this can happen are not marked `noexcept`.
+//!
+//! Errors should never be ignored, however, checking errors after each AsmJit API call would simply overcomplicate
+//! the whole code generation experience. \ref ErrorHandler exists to make the use of AsmJit API simpler as it allows
+//! to customize how errors can be handled:
+//!
+//!   - Record the error and continue (the way how the error is user-implemented).
+//!   - Throw an exception. AsmJit doesn't use exceptions and is completely exception-safe, but it's perfectly legal
+//!     to throw an exception from the error handler.
+//!   - Use plain old C's `setjmp()` and `longjmp()`. Asmjit always puts Assembler, Builder and Compiler to a
+//!     consistent state before calling \ref ErrorHandler::handleError(), so `longjmp()` can be used without issues
+//!     to cancel the code-generation if an error occurred. This method can be used if exception handling in your
+//!     project is turned off and you still want some comfort. In most cases it should be safe as AsmJit uses \ref
+//!     Zone memory and the ownership of memory it allocates always ends with the instance that allocated it. If
+//!     using this approach please never jump outside the life-time of \ref CodeHolder and \ref BaseEmitter.
+//!
+//! ### Using ErrorHandler
+//!
+//! An example of attaching \ref ErrorHandler to \ref CodeHolder.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // A simple error handler implementation, extend according to your needs.
+//! class MyErrorHandler : public ErrorHandler {
+//! public:
+//!   void handleError(Error err, const char* message, BaseEmitter* origin) override {
+//!     printf("AsmJit error: %s\n", message);
+//!   }
+//! };
+//!
+//! int main() {
+//!   JitRuntime rt;
+//!
+//!   MyErrorHandler myErrorHandler;
+//!   CodeHolder code;
+//!
+//!   code.init(rt.environment());
+//!   code.setErrorHandler(&myErrorHandler);
+//!
+//!   x86::Assembler a(&code);
+//!   // ... code generation ...
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! Useful classes in error handling group:
+//!
+//!   - See \ref DebugUtils that provides utilities useful for debugging.
+//!   - See \ref Error that lists error codes that AsmJit uses.
+//!   - See \ref ErrorHandler for more details about error handling.
+
+
+//! \defgroup asmjit_instruction_db Instruction DB
+//! \brief Instruction database (introspection, read/write, validation, ...).
+//!
+//! ### Overview
+//!
+//! AsmJit provides a public instruction database that can be used to query information about a complete instruction.
+//! The instruction database requires the knowledge of the following:
+//!
+//!   - \ref BaseInst - Base instruction that contains instruction id, options, and a possible extra-register that
+//!     represents either REP prefix counter or AVX-512 selector (mask).
+//!
+//!   - \ref Operand - Represents operands of an instruction.
+//!
+//! Each instruction can be then queried for the following information:
+//!
+//!   - \ref InstRWInfo - Read/write information of instruction and its oprands (includes \ref OpRWInfo).
+//!
+//!   - \ref CpuFeatures - CPU features required to execute the instruction.
+//!
+//! In addition to query functionality AsmJit is also able to validate whether an instruction and its operands are
+//! valid. This is useful for making sure that what user tries to emit is correct and it can be also used by other
+//! projects that parse user input, like AsmTK project.
+//!
+//! ### Query API
+//!
+//! The instruction query API is provided by \ref InstAPI namespace. The following queries are possible:
+//!
+//!   - \ref InstAPI::queryRWInfo() - queries read/write information of the given instruction and its operands.
+//!     Includes also CPU flags read/written.
+//!
+//!   - \ref InstAPI::queryFeatures() - queries CPU features that are required to execute the given instruction. A full
+//!     instruction with operands must be given as some architectures like X86 may require different features for the
+//!     same instruction based on its operands.
+//!
+//!   - <a href="https://github.com/asmjit/asmjit/blob/master/test/asmjit_test_instinfo.cpp">asmjit_test_instinfo.cpp</a>
+//!     can be also used as a reference about accessing instruction information.
+//!
+//! ### Validation API
+//!
+//! The instruction validation API is provided by \ref InstAPI namespace in the similar fashion like the Query API,
+//! however, validation can also be turned on at \ref BaseEmitter level. The following is possible:
+//!
+//!   - \ref InstAPI::validate() - low-level instruction validation function that is used internally by emitters
+//!     if strict validation is enabled.
+//!
+//!   - \ref BaseEmitter::addDiagnosticOptions() - can be used to enable validation at emitter level, see \ref
+//!     DiagnosticOptions.
+
+
+//! \defgroup asmjit_virtual_memory Virtual Memory
+//! \brief Virtual memory management.
+//!
+//! ### Overview
+//!
+//! AsmJit's virtual memory management is divided into two main categories:
+//!
+//!   - Low level API that provides cross-platform abstractions for virtual memory allocation. Implemented in
+//!     \ref VirtMem namespace.
+//!
+//!   - High level API that makes it very easy to store generated code for execution. See \ref JitRuntime, which is
+//!     used by many examples for its simplicity and easy integration with \ref CodeHolder. There is also \ref
+//!     JitAllocator, which lays somewhere between RAW memory allocation and \ref JitRuntime.
+
+
+//! \defgroup asmjit_zone Zone Memory
+//! \brief Zone memory allocator and containers.
+//!
+//! ### Overview
+//!
+//! AsmJit uses zone memory allocation (also known as Arena allocation) to allocate most of the data it uses. It's a
+//! fast allocator that allows AsmJit to allocate a lot of small data structures fast and without `malloc()` overhead.
+//! Since code generators and all related classes are usually short-lived this approach decreases memory usage and
+//! fragmentation as arena-based allocators always allocate larger blocks of memory, which are then split into smaller
+//! chunks.
+//!
+//! Another advantage of zone memory allocation is that since the whole library uses this strategy it's very easy to
+//! deallocate everything that a particular instance is holding by simply releasing the memory the allocator holds.
+//! This improves destruction time of such objects as there is no destruction at all. Long-lived objects just reset
+//! its data in destructor or in their reset() member function for a future reuse. For this purpose all containers in
+//! AsmJit are also zone allocated.
+//!
+//! ### Zone Allocation
+//!
+//!   - \ref Zone - Incremental zone memory allocator with minimum features. It can only allocate memory without the
+//!     possibility to return it back to the allocator.
+//!
+//!   - \ref ZoneTmp - A temporary \ref Zone with some initial static storage. If the allocation requests fit the
+//!     static storage allocated then there will be no dynamic memory allocation during the lifetime of \ref ZoneTmp,
+//!     otherwise it would act as \ref Zone with one preallocated block on the stack.
+//!
+//!   - \ref ZoneAllocator - A wrapper of \ref Zone that provides the capability of returning memory to the allocator.
+//!     Such memory is stored in a pool for later reuse.
+//!
+//! ### Zone Allocated Containers
+//!
+//!   - \ref ZoneString - Zone allocated string.
+//!   - \ref ZoneHash - Zone allocated hash table.
+//!   - \ref ZoneTree - Zone allocated red-black tree.
+//!   - \ref ZoneList - Zone allocated double-linked list.
+//!   - \ref ZoneStack - Zone allocated stack.
+//!   - \ref ZoneVector - Zone allocated vector.
+//!   - \ref ZoneBitVector - Zone allocated vector of bits.
+//!
+//! ### Using Zone Allocated Containers
+//!
+//! The most common data structure exposed by AsmJit is \ref ZoneVector. It's very similar to `std::vector`, but the
+//! implementation doesn't use exceptions and uses the mentioned \ref ZoneAllocator for performance reasons. You don't
+//! have to worry about allocations as you should not need to add items to AsmJit's data structures directly as there
+//! should be API for all required operations.
+//!
+//! The following APIs in \ref CodeHolder returns \ref ZoneVector reference:
+//!
+//! ```
+//! using namespace asmjit;
+//!
+//! void example(CodeHolder& code) {
+//!   // Contains all emitters attached to CodeHolder.
+//!   const ZoneVector<BaseEmitter*>& emitters = code.emitters();
+//!
+//!   // Contains all section entries managed by CodeHolder.
+//!   const ZoneVector<Section*>& sections = code.sections();
+//!
+//!   // Contains all label entries managed by CodeHolder.
+//!   const ZoneVector<LabelEntry*>& labelEntries = code.labelEntries();
+//!
+//!   // Contains all relocation entries managed by CodeHolder.
+//!   const ZoneVector<RelocEntry*>& relocEntries = code.relocEntries();
+//! }
+//! ```
+//!
+//! \ref ZoneVector has overloaded array access operator to make it possible to access its elements through operator[].
+//! Some standard functions like \ref ZoneVector::empty(), \ref ZoneVector::size(), and \ref ZoneVector::data() are
+//! provided as well. Vectors are also iterable through a range-based for loop:
+//!
+//! ```
+//! using namespace asmjit;
+//!
+//! void example(CodeHolder& code) {
+//!   for (LabelEntry* le : code.labelEntries()) {
+//!     printf("Label #%u {Bound=%s Offset=%llu}",
+//!       le->id(),
+//!       le->isBound() ? "true" : "false",
+//!       (unsigned long long)le->offset());
+//!   }
+//! }
+//! ```
+//!
+//! ### Design Considerations
+//!
+//! Zone-allocated containers do not store the allocator within the container. This decision was made to reduce the
+//! footprint of such containers as AsmJit tooling, especially Compiler's register allocation, may use many instances
+//! of such containers to perform code analysis and register allocation.
+//!
+//! For example to append an item into a \ref ZoneVector it's required to pass the allocator as the first argument,
+//! so it can be used in case that the vector needs a reallocation. Such function also returns an error, which must
+//! be propagated to the caller.
+//!
+//! ```
+//! using namespace asmjit
+//!
+//! Error example(ZoneAllocator* allocator) {
+//!   ZoneVector<int> vector;
+//!
+//!   // Unfortunately, allocator must be provided to all functions that mutate
+//!   // the vector. However, AsmJit users should never need to do this as all
+//!   // manipulation should be done through public API, which takes care of
+//!   // that.
+//!   for (int i = 0; i < 100; i++) {
+//!     ASMJIT_PROPAGATE(vector.append(allocator, i));
+//!   }
+//!
+//!   // By default vector's destructor doesn't release anything as it knows
+//!   // that its content is zone allocated. However, \ref ZoneVector::release
+//!   // can be used to explicitly release the vector data to the allocator if
+//!   // necessary
+//!   vector.release(allocator);
+//! }
+//! ```
+//!
+//! Containers like \ref ZoneVector also provide a functionality to reserve a certain number of items before any items
+//! are added to it. This approach is used internally in most places as it allows to prepare space for data that will
+//! be added to some container before the data itself was created.
+//!
+//! ```
+//! using namespace asmjit
+//!
+//! Error example(ZoneAllocator* allocator) {
+//!   ZoneVector<int> vector;
+//!
+//!   ASMJIT_PROPAGATE(vector.willGrow(100));
+//!   for (int i = 0; i < 100; i++) {
+//!     // Cannot fail.
+//!     vector.appendUnsafe(allocator, i);
+//!   }
+//!
+//!   vector.release(allocator);
+//! }
+//! ```
+
+
+//! \defgroup asmjit_utilities Utilities
+//! \brief Utility classes and functions.
+//!
+//! ### Overview
+//!
+//! AsmJit uses and provides utility classes and functions, that can be used with AsmJit. The functionality can be
+//! divided into the following topics:
+//!
+//! ### String Functionality
+//!
+//!   - \ref String - AsmJit's string container, which is used internally and which doesn't use exceptions and has
+//!     a stable layout, which is not dependent on C++ standard library.
+//!
+//!   - \ref StringTmp - String that can have base storage allocated on stack. The amount of storage on stack can
+//!     be specified as a template parameter.
+//!
+//!   - \ref FixedString - Fixed string container limited up to N characters.
+//!
+//! ### Code Generation Utilities
+//!
+//!   - \ref ConstPool - Constant pool used by \ref BaseCompiler, but also available to users that may find use of it.
+//!
+//! ### Support Functionality Used by AsmJit
+//!
+//!   - \ref Support namespace provides many other utility functions and classes that are used by AsmJit, and made
+//!     public.
+
+
+//! \defgroup asmjit_x86 X86 Backend
+//! \brief X86/X64 backend.
+
+
+//! \defgroup asmjit_arm ARM Commons
+//! \brief ARM commons shared between AArch32 and AArch64.
+
+
+//! \defgroup asmjit_a64 AArch64 Backend
+//! \brief AArch64 backend.
+
+
+//! \cond INTERNAL
+//! \defgroup asmjit_ra RA
+//! \brief Register allocator internals.
+//! \endcond
+
+} // {asmjit}
+
+#include "asmjit-scope-begin.h"
+#include "core/archtraits.h"
+#include "core/assembler.h"
+#include "core/builder.h"
+#include "core/codeholder.h"
+#include "core/compiler.h"
+#include "core/constpool.h"
+#include "core/cpuinfo.h"
+#include "core/emitter.h"
+#include "core/environment.h"
+#include "core/errorhandler.h"
+#include "core/formatter.h"
+#include "core/func.h"
+#include "core/globals.h"
+#include "core/inst.h"
+#include "core/jitallocator.h"
+#include "core/jitruntime.h"
+#include "core/logger.h"
+#include "core/operand.h"
+#include "core/osutils.h"
+#include "core/string.h"
+#include "core/support.h"
+#include "core/target.h"
+#include "core/type.h"
+#include "core/virtmem.h"
+#include "core/zone.h"
+#include "core/zonehash.h"
+#include "core/zonelist.h"
+#include "core/zonetree.h"
+#include "core/zonestack.h"
+#include "core/zonestring.h"
+#include "core/zonevector.h"
+#include "asmjit-scope-end.h"
+
+#endif // ASMJIT_CORE_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/api-build_p.h b/lib/lepton/asmjit/core/api-build_p.h
new file mode 100644
index 0000000000..6eca971037
--- /dev/null
+++ b/lib/lepton/asmjit/core/api-build_p.h
@@ -0,0 +1,55 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_API_BUILD_P_H_INCLUDED
+#define ASMJIT_CORE_API_BUILD_P_H_INCLUDED
+
+#define ASMJIT_EXPORTS
+
+// Only turn-off these warnings when building asmjit itself.
+#ifdef _MSC_VER
+  #ifndef _CRT_SECURE_NO_DEPRECATE
+    #define _CRT_SECURE_NO_DEPRECATE
+  #endif
+  #ifndef _CRT_SECURE_NO_WARNINGS
+    #define _CRT_SECURE_NO_WARNINGS
+  #endif
+#endif
+
+// Dependencies only required for asmjit build, but never exposed through public headers.
+#ifdef _WIN32
+  #ifndef WIN32_LEAN_AND_MEAN
+    #define WIN32_LEAN_AND_MEAN
+  #endif
+  #ifndef NOMINMAX
+    #define NOMINMAX
+  #endif
+  #include <windows.h>
+#endif
+
+#include "./api-config.h"
+
+#if !defined(ASMJIT_BUILD_DEBUG) && defined(__GNUC__) && !defined(__clang__)
+  #define ASMJIT_FAVOR_SIZE  __attribute__((__optimize__("Os")))
+  #define ASMJIT_FAVOR_SPEED __attribute__((__optimize__("O3")))
+#elif ASMJIT_CXX_HAS_ATTRIBUTE(__minsize__, 0)
+  #define ASMJIT_FAVOR_SIZE __attribute__((__minsize__))
+  #define ASMJIT_FAVOR_SPEED
+#else
+  #define ASMJIT_FAVOR_SIZE
+  #define ASMJIT_FAVOR_SPEED
+#endif
+
+// Make sure '#ifdef'ed unit tests are properly highlighted in IDE.
+#if !defined(ASMJIT_TEST) && defined(__INTELLISENSE__)
+  #define ASMJIT_TEST
+#endif
+
+// Include a unit testing package if this is a `asmjit_test_unit` build.
+#if defined(ASMJIT_TEST)
+  #include "../../../test/broken.h"
+#endif
+
+#endif // ASMJIT_CORE_API_BUILD_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/api-config.h b/lib/lepton/asmjit/core/api-config.h
new file mode 100644
index 0000000000..a0fb979eb3
--- /dev/null
+++ b/lib/lepton/asmjit/core/api-config.h
@@ -0,0 +1,613 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_API_CONFIG_H_INCLUDED
+#define ASMJIT_CORE_API_CONFIG_H_INCLUDED
+
+// AsmJit Library & ABI Version
+// ============================
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! AsmJit library version in `(Major << 16) | (Minor << 8) | (Patch)` format.
+#define ASMJIT_LIBRARY_VERSION 0x010900 /* 1.9.0 */
+
+//! \def ASMJIT_ABI_NAMESPACE
+//!
+//! AsmJit ABI namespace is an inline namespace within \ref asmjit namespace.
+//!
+//! It's used to make sure that when user links to an incompatible version of AsmJit, it won't link. It has also some
+//! additional properties as well. When `ASMJIT_ABI_NAMESPACE` is defined by the user it would override the AsmJit
+//! default, which makes it possible to use use multiple AsmJit libraries within a single project, totally controlled
+//! by the users. This is useful especially in cases in which some of such library comes from a third party.
+#ifndef ASMJIT_ABI_NAMESPACE
+  #define ASMJIT_ABI_NAMESPACE _abi_1_9
+#endif
+
+//! \}
+
+// Global Dependencies
+// ===================
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h> // We really want std types as globals, not under 'std' namespace.
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <iterator>
+#include <limits>
+#include <new>
+#include <type_traits>
+#include <utility>
+
+#if !defined(_WIN32) && !defined(__EMSCRIPTEN__)
+  #include <pthread.h>
+#endif
+
+// Build Options
+// =============
+
+// NOTE: Doxygen cannot document macros that are not defined, that's why we have to define them and then undefine
+// them immediately, so it won't use the macros with its own preprocessor.
+#ifdef _DOXYGEN
+namespace asmjit {
+
+//! \addtogroup asmjit_build
+//! \{
+
+//! Asmjit is embedded, implies \ref ASMJIT_STATIC.
+#define ASMJIT_EMBED
+
+//! Enables static-library build.
+#define ASMJIT_STATIC
+
+//! Defined when AsmJit's build configuration is 'Debug'.
+//!
+//! \note Can be defined explicitly to bypass autodetection.
+#define ASMJIT_BUILD_DEBUG
+
+//! Defined when AsmJit's build configuration is 'Release'.
+//!
+//! \note Can be defined explicitly to bypass autodetection.
+#define ASMJIT_BUILD_RELEASE
+
+//! Disables X86/X64 backends.
+#define ASMJIT_NO_X86
+
+//! Disables AArch32 backends (both ARM and Thumb).
+#define ASMJIT_NO_AARCH32
+
+//! Disables AArch64 backend.
+#define ASMJIT_NO_AARCH64
+
+//! Disables non-host backends entirely (useful for JIT compilers to minimize the library size).
+#define ASMJIT_NO_FOREIGN
+
+//! Disables deprecated API at compile time (deprecated API won't be available).
+#define ASMJIT_NO_DEPRECATED
+
+//! Disables \ref asmjit_builder functionality completely.
+#define ASMJIT_NO_BUILDER
+
+//! Disables \ref asmjit_compiler functionality completely.
+#define ASMJIT_NO_COMPILER
+
+//! Disables JIT memory management and \ref asmjit::JitRuntime.
+#define ASMJIT_NO_JIT
+
+//! Disables \ref asmjit::Logger and \ref asmjit::Formatter.
+#define ASMJIT_NO_LOGGING
+
+//! Disables everything that contains text.
+#define ASMJIT_NO_TEXT
+
+//! Disables instruction validation API.
+#define ASMJIT_NO_VALIDATION
+
+//! Disables instruction introspection API.
+#define ASMJIT_NO_INTROSPECTION
+
+// Avoid doxygen preprocessor using feature-selection definitions.
+#undef ASMJIT_BUILD_EMBNED
+#undef ASMJIT_BUILD_STATIC
+#undef ASMJIT_BUILD_DEBUG
+#undef ASMJIT_BUILD_RELEASE
+#undef ASMJIT_NO_X86
+#undef ASMJIT_NO_FOREIGN
+// (keep ASMJIT_NO_DEPRECATED defined, we don't document deprecated APIs).
+#undef ASMJIT_NO_BUILDER
+#undef ASMJIT_NO_COMPILER
+#undef ASMJIT_NO_JIT
+#undef ASMJIT_NO_LOGGING
+#undef ASMJIT_NO_TEXT
+#undef ASMJIT_NO_VALIDATION
+#undef ASMJIT_NO_INTROSPECTION
+
+//! \}
+
+} // {asmjit}
+#endif // _DOXYGEN
+
+// ASMJIT_NO_BUILDER implies ASMJIT_NO_COMPILER.
+#if defined(ASMJIT_NO_BUILDER) && !defined(ASMJIT_NO_COMPILER)
+  #define ASMJIT_NO_COMPILER
+#endif
+
+// Prevent compile-time errors caused by misconfiguration.
+#if defined(ASMJIT_NO_TEXT) && !defined(ASMJIT_NO_LOGGING)
+  #pragma message("'ASMJIT_NO_TEXT' can only be defined when 'ASMJIT_NO_LOGGING' is defined.")
+  #undef ASMJIT_NO_TEXT
+#endif
+
+#if defined(ASMJIT_NO_INTROSPECTION) && !defined(ASMJIT_NO_COMPILER)
+  #pragma message("'ASMJIT_NO_INTROSPECTION' can only be defined when 'ASMJIT_NO_COMPILER' is defined")
+  #undef ASMJIT_NO_INTROSPECTION
+#endif
+
+// Build Mode
+// ==========
+
+// Detect ASMJIT_BUILD_DEBUG and ASMJIT_BUILD_RELEASE if not defined.
+#if !defined(ASMJIT_BUILD_DEBUG) && !defined(ASMJIT_BUILD_RELEASE)
+  #if !defined(NDEBUG)
+    #define ASMJIT_BUILD_DEBUG
+  #else
+    #define ASMJIT_BUILD_RELEASE
+  #endif
+#endif
+
+// Target Architecture Detection
+// =============================
+
+#if defined(_M_X64) || defined(__x86_64__)
+  #define ASMJIT_ARCH_X86 64
+#elif defined(_M_IX86) || defined(__X86__) || defined(__i386__)
+  #define ASMJIT_ARCH_X86 32
+#else
+  #define ASMJIT_ARCH_X86 0
+#endif
+
+#if defined(__arm64__) || defined(__aarch64__)
+# define ASMJIT_ARCH_ARM 64
+#elif defined(_M_ARM) || defined(_M_ARMT) || defined(__arm__) || defined(__thumb__) || defined(__thumb2__)
+  #define ASMJIT_ARCH_ARM 32
+#else
+  #define ASMJIT_ARCH_ARM 0
+#endif
+
+#if defined(_MIPS_ARCH_MIPS64) || defined(__mips64)
+  #define ASMJIT_ARCH_MIPS 64
+#elif defined(_MIPS_ARCH_MIPS32) || defined(_M_MRX000) || defined(__mips__)
+  #define ASMJIT_ARCH_MIPS 32
+#else
+  #define ASMJIT_ARCH_MIPS 0
+#endif
+
+#define ASMJIT_ARCH_BITS (ASMJIT_ARCH_X86 | ASMJIT_ARCH_ARM | ASMJIT_ARCH_MIPS)
+#if ASMJIT_ARCH_BITS == 0
+  #undef ASMJIT_ARCH_BITS
+  #if defined (__LP64__) || defined(_LP64)
+    #define ASMJIT_ARCH_BITS 64
+  #else
+    #define ASMJIT_ARCH_BITS 32
+  #endif
+#endif
+
+#if (defined(__ARMEB__))  || \
+    (defined(__MIPSEB__)) || \
+    (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
+  #define ASMJIT_ARCH_LE 0
+  #define ASMJIT_ARCH_BE 1
+#else
+  #define ASMJIT_ARCH_LE 1
+  #define ASMJIT_ARCH_BE 0
+#endif
+
+#if defined(ASMJIT_NO_FOREIGN)
+  #if !ASMJIT_ARCH_X86 && !defined(ASMJIT_NO_X86)
+    #define ASMJIT_NO_X86
+  #endif
+
+  #if !ASMJIT_ARCH_ARM && !defined(ASMJIT_NO_AARCH64)
+    #define ASMJIT_NO_AARCH64
+  #endif
+#endif
+
+
+// C++ Compiler and Features Detection
+// ===================================
+
+#define ASMJIT_CXX_GNU 0
+#define ASMJIT_CXX_MAKE_VER(MAJOR, MINOR) ((MAJOR) * 1000 + (MINOR))
+
+// Intel Compiler [pretends to be GNU or MSC, so it must be checked first]:
+//   - https://software.intel.com/en-us/articles/c0x-features-supported-by-intel-c-compiler
+//   - https://software.intel.com/en-us/articles/c14-features-supported-by-intel-c-compiler
+//   - https://software.intel.com/en-us/articles/c17-features-supported-by-intel-c-compiler
+#if defined(__INTEL_COMPILER)
+
+// MSC Compiler:
+//   - https://msdn.microsoft.com/en-us/library/hh567368.aspx
+//
+// Version List:
+//   - 16.00.0 == VS2010
+//   - 17.00.0 == VS2012
+//   - 18.00.0 == VS2013
+//   - 19.00.0 == VS2015
+//   - 19.10.0 == VS2017
+#elif defined(_MSC_VER) && defined(_MSC_FULL_VER)
+
+// Clang Compiler [Pretends to be GNU, so it must be checked before]:
+//   - https://clang.llvm.org/cxx_status.html
+#elif defined(__clang_major__) && defined(__clang_minor__) && defined(__clang_patchlevel__)
+
+// GNU Compiler:
+//   - https://gcc.gnu.org/projects/cxx-status.html
+#elif defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
+
+  #undef ASMJIT_CXX_GNU
+  #define ASMJIT_CXX_GNU ASMJIT_CXX_MAKE_VER(__GNUC__, __GNUC_MINOR__)
+
+#endif
+
+// Compiler features detection macros.
+#if defined(__clang__) && defined(__has_attribute)
+  #define ASMJIT_CXX_HAS_ATTRIBUTE(NAME, CHECK) (__has_attribute(NAME))
+#else
+  #define ASMJIT_CXX_HAS_ATTRIBUTE(NAME, CHECK) (!(!(CHECK)))
+#endif
+
+// API Decorators & C++ Extensions
+// ===============================
+
+//! \def ASMJIT_API
+//!
+//! A decorator that is used to decorate API that AsmJit exports when built as a shared library.
+
+// API (Export / Import).
+#if !defined(ASMJIT_STATIC)
+  #if defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__))
+    #ifdef ASMJIT_EXPORTS
+      #define ASMJIT_API __declspec(dllexport)
+    #else
+      #define ASMJIT_API __declspec(dllimport)
+    #endif
+  #elif defined(_WIN32) && defined(__GNUC__)
+    #ifdef ASMJIT_EXPORTS
+      #define ASMJIT_API __attribute__((__dllexport__))
+    #else
+      #define ASMJIT_API __attribute__((__dllimport__))
+    #endif
+  #elif defined(__GNUC__)
+    #define ASMJIT_API __attribute__((__visibility__("default")))
+  #endif
+#endif
+
+#if !defined(ASMJIT_API)
+  #define ASMJIT_API
+#endif
+
+#if !defined(ASMJIT_VARAPI)
+  #define ASMJIT_VARAPI extern ASMJIT_API
+#endif
+
+//! \def ASMJIT_VIRTAPI
+//!
+//! This is basically a workaround. When using MSVC and marking class as DLL export everything gets exported, which
+//! is unwanted in most projects. MSVC automatically exports typeinfo and vtable if at least one symbol of the class
+//! is exported. However, GCC has some strange behavior that even if one or more symbol is exported it doesn't export
+//! typeinfo unless the class itself is decorated with "visibility(default)" (i.e. ASMJIT_API).
+#if !defined(_WIN32) && defined(__GNUC__)
+  #define ASMJIT_VIRTAPI ASMJIT_API
+#else
+  #define ASMJIT_VIRTAPI
+#endif
+
+// Function attributes.
+#if !defined(ASMJIT_BUILD_DEBUG) && defined(__GNUC__)
+  #define ASMJIT_FORCE_INLINE inline __attribute__((__always_inline__))
+#elif !defined(ASMJIT_BUILD_DEBUG) && defined(_MSC_VER)
+  #define ASMJIT_FORCE_INLINE __forceinline
+#else
+  #define ASMJIT_FORCE_INLINE inline
+#endif
+
+#if defined(__GNUC__)
+  #define ASMJIT_NOINLINE __attribute__((__noinline__))
+  #define ASMJIT_NORETURN __attribute__((__noreturn__))
+#elif defined(_MSC_VER)
+  #define ASMJIT_NOINLINE __declspec(noinline)
+  #define ASMJIT_NORETURN __declspec(noreturn)
+#else
+  #define ASMJIT_NOINLINE
+  #define ASMJIT_NORETURN
+#endif
+
+// Calling conventions.
+#if ASMJIT_ARCH_X86 == 32 && defined(__GNUC__)
+  #define ASMJIT_CDECL __attribute__((__cdecl__))
+  #define ASMJIT_STDCALL __attribute__((__stdcall__))
+  #define ASMJIT_FASTCALL __attribute__((__fastcall__))
+  #define ASMJIT_REGPARM(N) __attribute__((__regparm__(N)))
+#elif ASMJIT_ARCH_X86 == 32 && defined(_MSC_VER)
+  #define ASMJIT_CDECL __cdecl
+  #define ASMJIT_STDCALL __stdcall
+  #define ASMJIT_FASTCALL __fastcall
+  #define ASMJIT_REGPARM(N)
+#else
+  #define ASMJIT_CDECL
+  #define ASMJIT_STDCALL
+  #define ASMJIT_FASTCALL
+  #define ASMJIT_REGPARM(N)
+#endif
+
+#if ASMJIT_ARCH_X86 && defined(_WIN32) && defined(_MSC_VER)
+  #define ASMJIT_VECTORCALL __vectorcall
+#elif ASMJIT_ARCH_X86 && defined(_WIN32)
+  #define ASMJIT_VECTORCALL __attribute__((__vectorcall__))
+#else
+  #define ASMJIT_VECTORCALL
+#endif
+
+// Type alignment (not allowed by C++11 'alignas' keyword).
+#if defined(__GNUC__)
+  #define ASMJIT_ALIGN_TYPE(TYPE, N) __attribute__((__aligned__(N))) TYPE
+#elif defined(_MSC_VER)
+  #define ASMJIT_ALIGN_TYPE(TYPE, N) __declspec(align(N)) TYPE
+#else
+  #define ASMJIT_ALIGN_TYPE(TYPE, N) TYPE
+#endif
+
+//! \def ASMJIT_MAY_ALIAS
+//!
+//! Expands to `__attribute__((__may_alias__))` if supported.
+#if defined(__GNUC__)
+  #define ASMJIT_MAY_ALIAS __attribute__((__may_alias__))
+#else
+  #define ASMJIT_MAY_ALIAS
+#endif
+
+//! \def ASMJIT_MAYBE_UNUSED
+//!
+//! Expands to `[[maybe_unused]]` if supported or a compiler attribute instead.
+#if __cplusplus >= 201703L
+  #define ASMJIT_MAYBE_UNUSED [[maybe_unused]]
+#elif defined(__GNUC__)
+  #define ASMJIT_MAYBE_UNUSED __attribute__((unused))
+#else
+  #define ASMJIT_MAYBE_UNUSED
+#endif
+
+#if defined(__clang_major__) && __clang_major__ >= 4 && !defined(_DOXYGEN)
+  // NOTE: Clang allows to apply this attribute to function arguments, which is what we want. Once GCC decides to
+  // support this use, we will enable it for GCC as well. However, until that, it will be clang only, which is
+  // what we need for static analysis.
+  #define ASMJIT_NONNULL(FUNCTION_ARGUMENT) FUNCTION_ARGUMENT __attribute__((__nonnull__))
+#else
+  #define ASMJIT_NONNULL(FUNCTION_ARGUMENT) FUNCTION_ARGUMENT
+#endif
+
+//! \def ASMJIT_NOEXCEPT_TYPE
+//!
+//! Defined to `noexcept` in C++17 mode or nothing otherwise. Used by function typedefs.
+#if __cplusplus >= 201703L
+  #define ASMJIT_NOEXCEPT_TYPE noexcept
+#else
+  #define ASMJIT_NOEXCEPT_TYPE
+#endif
+
+//! \def ASMJIT_ASSUME(...)
+//!
+//! Macro that tells the C/C++ compiler that the expression `...` evaluates to true.
+//!
+//! This macro has two purposes:
+//!
+//!   1. Enable optimizations that would not be possible without the assumption.
+//!   2. Hint static analysis tools that a certain condition is true to prevent false positives.
+#if defined(__clang__)
+  #define ASMJIT_ASSUME(...) __builtin_assume(__VA_ARGS__)
+#elif defined(__GNUC__)
+  #define ASMJIT_ASSUME(...) do { if (!(__VA_ARGS__)) __builtin_unreachable(); } while (0)
+#elif defined(_MSC_VER)
+  #define ASMJIT_ASSUME(...) __assume(__VA_ARGS__)
+#else
+  #define ASMJIT_ASSUME(...) (void)0
+#endif
+
+//! \def ASMJIT_LIKELY(...)
+//!
+//! Condition is likely to be taken (mostly error handling and edge cases).
+
+//! \def ASMJIT_UNLIKELY(...)
+//!
+//! Condition is unlikely to be taken (mostly error handling and edge cases).
+#if defined(__GNUC__)
+  #define ASMJIT_LIKELY(...) __builtin_expect(!!(__VA_ARGS__), 1)
+  #define ASMJIT_UNLIKELY(...) __builtin_expect(!!(__VA_ARGS__), 0)
+#else
+  #define ASMJIT_LIKELY(...) (__VA_ARGS__)
+  #define ASMJIT_UNLIKELY(...) (__VA_ARGS__)
+#endif
+
+//! \def ASMJIT_FALLTHROUGH
+//!
+//! Portable [[fallthrough]] attribute.
+#if defined(__clang__) && __cplusplus >= 201103L
+  #define ASMJIT_FALLTHROUGH [[clang::fallthrough]]
+#elif defined(__GNUC__) && __GNUC__ >= 7
+  #define ASMJIT_FALLTHROUGH __attribute__((__fallthrough__))
+#else
+  #define ASMJIT_FALLTHROUGH ((void)0) /* fallthrough */
+#endif
+
+//! \def ASMJIT_DEPRECATED
+//!
+//! Marks function, class, struct, enum, or anything else as deprecated.
+#if defined(__GNUC__)
+  #define ASMJIT_DEPRECATED(MESSAGE) __attribute__((__deprecated__(MESSAGE)))
+  #if defined(__clang__)
+    #define ASMJIT_DEPRECATED_STRUCT(MESSAGE) __attribute__((__deprecated__(MESSAGE)))
+  #else
+    #define ASMJIT_DEPRECATED_STRUCT(MESSAGE) /* not usable if a deprecated function uses it */
+  #endif
+#elif defined(_MSC_VER)
+  #define ASMJIT_DEPRECATED(MESSAGE) __declspec(deprecated(MESSAGE))
+  #define ASMJIT_DEPRECATED_STRUCT(MESSAGE) /* not usable if a deprecated function uses it */
+#else
+  #define ASMJIT_DEPRECATED(MESSAGE)
+  #define ASMJIT_DEPRECATED_STRUCT(MESSAGE)
+#endif
+
+// Utilities.
+#define ASMJIT_OFFSET_OF(STRUCT, MEMBER) ((int)(intptr_t)((const char*)&((const STRUCT*)0x100)->MEMBER) - 0x100)
+#define ASMJIT_ARRAY_SIZE(X) uint32_t(sizeof(X) / sizeof(X[0]))
+
+#if ASMJIT_CXX_HAS_ATTRIBUTE(no_sanitize, 0)
+  #define ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF __attribute__((__no_sanitize__("undefined")))
+#elif ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(4, 9)
+  #define ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF __attribute__((__no_sanitize_undefined__))
+#else
+  #define ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF
+#endif
+
+// Begin-Namespace & End-Namespace Macros
+// ======================================
+
+#if defined _DOXYGEN
+  #define ASMJIT_BEGIN_NAMESPACE namespace asmjit {
+  #define ASMJIT_END_NAMESPACE }
+#elif defined(__clang__)
+  #define ASMJIT_BEGIN_NAMESPACE                                              \
+    namespace asmjit { inline namespace ASMJIT_ABI_NAMESPACE {                \
+      _Pragma("clang diagnostic push")                                        \
+      _Pragma("clang diagnostic ignored \"-Wconstant-logical-operand\"")      \
+      _Pragma("clang diagnostic ignored \"-Wunnamed-type-template-args\"")
+  #define ASMJIT_END_NAMESPACE                                                \
+      _Pragma("clang diagnostic pop")                                         \
+    }}
+#elif defined(__GNUC__) && __GNUC__ == 4
+  #define ASMJIT_BEGIN_NAMESPACE                                              \
+    namespace asmjit { inline namespace ASMJIT_ABI_NAMESPACE {                \
+      _Pragma("GCC diagnostic push")                                          \
+      _Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"")
+  #define ASMJIT_END_NAMESPACE                                                \
+      _Pragma("GCC diagnostic pop")                                           \
+    }}
+#elif defined(__GNUC__) && __GNUC__ >= 8
+  #define ASMJIT_BEGIN_NAMESPACE                                              \
+    namespace asmjit { inline namespace ASMJIT_ABI_NAMESPACE {                \
+      _Pragma("GCC diagnostic push")                                          \
+      _Pragma("GCC diagnostic ignored \"-Wclass-memaccess\"")
+  #define ASMJIT_END_NAMESPACE                                                \
+      _Pragma("GCC diagnostic pop")                                           \
+    }}
+#elif defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+  #define ASMJIT_BEGIN_NAMESPACE                                              \
+    namespace asmjit { inline namespace ASMJIT_ABI_NAMESPACE {                \
+      __pragma(warning(push))                                                 \
+      __pragma(warning(disable: 4127))  /* conditional expression is const */ \
+      __pragma(warning(disable: 4201))  /* nameless struct/union */
+  #define ASMJIT_END_NAMESPACE                                                \
+      __pragma(warning(pop))                                                  \
+    }}
+#endif
+
+#if !defined(ASMJIT_BEGIN_NAMESPACE) && !defined(ASMJIT_END_NAMESPACE)
+  #define ASMJIT_BEGIN_NAMESPACE namespace asmjit { inline namespace ASMJIT_ABI_NAMESPACE {
+  #define ASMJIT_END_NAMESPACE }}
+#endif
+
+#define ASMJIT_BEGIN_SUB_NAMESPACE(NAMESPACE)                                 \
+  ASMJIT_BEGIN_NAMESPACE                                                      \
+  namespace NAMESPACE {
+
+#define ASMJIT_END_SUB_NAMESPACE                                              \
+  }                                                                           \
+  ASMJIT_END_NAMESPACE
+
+// C++ Utilities
+// =============
+
+#define ASMJIT_NONCOPYABLE(Type)                                              \
+    Type(const Type& other) = delete;                                         \
+    Type& operator=(const Type& other) = delete;
+
+#define ASMJIT_NONCONSTRUCTIBLE(Type)                                         \
+    Type() = delete;                                                          \
+    Type(const Type& other) = delete;                                         \
+    Type& operator=(const Type& other) = delete;
+
+//! \def ASMJIT_DEFINE_ENUM_FLAGS(T)
+//!
+//! Defines bit operations for enumeration flags.
+#ifdef _DOXYGEN
+  #define ASMJIT_DEFINE_ENUM_FLAGS(T)
+#else
+  #define ASMJIT_DEFINE_ENUM_FLAGS(T)                                         \
+    static ASMJIT_FORCE_INLINE constexpr T operator~(T a) noexcept {          \
+      return T(~(std::underlying_type<T>::type)(a));                          \
+    }                                                                         \
+                                                                              \
+    static ASMJIT_FORCE_INLINE constexpr T operator|(T a, T b) noexcept {     \
+      return T((std::underlying_type<T>::type)(a) |                           \
+              (std::underlying_type<T>::type)(b));                            \
+    }                                                                         \
+    static ASMJIT_FORCE_INLINE constexpr T operator&(T a, T b) noexcept {     \
+      return T((std::underlying_type<T>::type)(a) &                           \
+              (std::underlying_type<T>::type)(b));                            \
+    }                                                                         \
+    static ASMJIT_FORCE_INLINE constexpr T operator^(T a, T b) noexcept {     \
+      return T((std::underlying_type<T>::type)(a) ^                           \
+              (std::underlying_type<T>::type)(b));                            \
+    }                                                                         \
+                                                                              \
+    static ASMJIT_FORCE_INLINE T& operator|=(T& a, T b) noexcept {            \
+      a = T((std::underlying_type<T>::type)(a) |                              \
+            (std::underlying_type<T>::type)(b));                              \
+      return a;                                                               \
+    }                                                                         \
+    static ASMJIT_FORCE_INLINE T& operator&=(T& a, T b) noexcept {            \
+      a = T((std::underlying_type<T>::type)(a) &                              \
+            (std::underlying_type<T>::type)(b));                              \
+      return a;                                                               \
+    }                                                                         \
+    static ASMJIT_FORCE_INLINE T& operator^=(T& a, T b) noexcept {            \
+      a = T((std::underlying_type<T>::type)(a) ^                              \
+            (std::underlying_type<T>::type)(b));                              \
+      return a;                                                               \
+    }
+#endif
+
+//! \def ASMJIT_DEFINE_ENUM_COMPARE(T)
+//!
+//! Defines comparison operations for enumeration flags.
+#ifdef _DOXYGEN
+  #define ASMJIT_DEFINE_ENUM_COMPARE(T)
+#else
+  #define ASMJIT_DEFINE_ENUM_COMPARE(T)                                                \
+    static ASMJIT_FORCE_INLINE bool operator<(T a, T b) noexcept {                     \
+      return (std::underlying_type<T>::type)(a) < (std::underlying_type<T>::type)(b);  \
+    }                                                                                  \
+    static ASMJIT_FORCE_INLINE bool operator<=(T a, T b) noexcept {                    \
+      return (std::underlying_type<T>::type)(a) <= (std::underlying_type<T>::type)(b); \
+    }                                                                                  \
+    static ASMJIT_FORCE_INLINE bool operator>(T a, T b) noexcept {                     \
+      return (std::underlying_type<T>::type)(a) > (std::underlying_type<T>::type)(b);  \
+    }                                                                                  \
+    static ASMJIT_FORCE_INLINE bool operator>=(T a, T b) noexcept {                    \
+      return (std::underlying_type<T>::type)(a) >= (std::underlying_type<T>::type)(b); \
+    }
+#endif
+
+// Cleanup Api-Config Specific Macros
+// ==================================
+
+#undef ASMJIT_CXX_GNU
+#undef ASMJIT_CXX_MAKE_VER
+
+#endif // ASMJIT_CORE_API_CONFIG_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/archcommons.h b/lib/lepton/asmjit/core/archcommons.h
new file mode 100644
index 0000000000..e9d2c84d72
--- /dev/null
+++ b/lib/lepton/asmjit/core/archcommons.h
@@ -0,0 +1,229 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ARCHCOMMONS_H_INCLUDED
+#define ASMJIT_CORE_ARCHCOMMONS_H_INCLUDED
+
+// This file provides architecture-specific classes that are required in the core library. For example Imm operand
+// allows to be created from arm::Shift in a const-expr way, so the arm::Shift must be provided. So this header file
+// provides everything architecture-specific that is used by the Core API.
+
+#include "../core/globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+//! \addtogroup asmjit_arm
+//! \{
+
+//! Condition code (both AArch32 & AArch64).
+//!
+//! \note This enumeration doesn't match condition code that is used in AArch32/AArch64 opcodes. In general this
+//! condition code is encoded as `(cc - 2) & 0xF` so that `kAL` condition code is zero and encoded as 0xE in opcode.
+//! This makes it easier to use a condition code as an instruction modifier that defaults to 'al'.
+enum class CondCode : uint8_t {
+  kAL             = 0x00u,      //!< (no condition code) (always)
+  kNA             = 0x01u,      //!< (not available)     (special)
+  kEQ             = 0x02u,      //!<        Z==1         (any_sign ==)
+  kNE             = 0x03u,      //!<        Z==0         (any_sign !=)
+  kCS             = 0x04u,      //!< C==1                (unsigned >=)
+  kHS             = 0x04u,      //!< C==1                (unsigned >=)
+  kCC             = 0x05u,      //!< C==0                (unsigned < )
+  kLO             = 0x05u,      //!< C==0                (unsigned < )
+  kMI             = 0x06u,      //!<               N==1  (is negative)
+  kPL             = 0x07u,      //!<               N==0  (is positive or zero)
+  kVS             = 0x08u,      //!<               V==1  (is overflow)
+  kVC             = 0x09u,      //!<               V==0  (no overflow)
+  kHI             = 0x0Au,      //!< C==1 & Z==0         (unsigned > )
+  kLS             = 0x0Bu,      //!< C==0 | Z==1         (unsigned <=)
+  kGE             = 0x0Cu,      //!<               N==V  (signed   >=)
+  kLT             = 0x0Du,      //!<               N!=V  (signed   < )
+  kGT             = 0x0Eu,      //!<        Z==0 & N==V  (signed   > )
+  kLE             = 0x0Fu,      //!<        Z==1 | N!=V  (signed   <=)
+
+  kSign           = kMI,        //!< Sign.
+  kNotSign        = kPL,        //!< Not sign.
+
+  kOverflow       = kVS,        //!< Signed overflow.
+  kNotOverflow    = kVC,        //!< Not signed overflow.
+
+  kEqual          = kEQ,        //!< Equal     `a == b`.
+  kNotEqual       = kNE,        //!< Not Equal `a != b`.
+
+  kZero           = kEQ,        //!< Zero (alias to equal).
+  kNotZero        = kNE,        //!< Not Zero (alias to Not Equal).
+
+  kNegative       = kMI,        //!< Negative.
+  kPositive       = kPL,        //!< Positive or zero.
+
+  kSignedLT       = kLT,        //!< Signed    `a <  b`.
+  kSignedLE       = kLE,        //!< Signed    `a <= b`.
+  kSignedGT       = kGT,        //!< Signed    `a >  b`.
+  kSignedGE       = kGE,        //!< Signed    `a >= b`.
+
+  kUnsignedLT     = kLO,        //!< Unsigned  `a <  b`.
+  kUnsignedLE     = kLS,        //!< Unsigned  `a <= b`.
+  kUnsignedGT     = kHI,        //!< Unsigned  `a >  b`.
+  kUnsignedGE     = kHS,        //!< Unsigned  `a >= b`.
+
+  kAlways         = kAL,        //!< No condition code (always).
+
+  kMaxValue       = 0x0Fu       //!< Maximum value of `CondCode`.
+};
+
+//! Negates a condition code.
+static inline constexpr CondCode negateCond(CondCode cond) noexcept { return CondCode(uint8_t(cond) ^ uint8_t(1)); }
+
+//! Data type that can be encoded with the instruction (AArch32 only).
+enum class DataType : uint32_t {
+  //! No data type specified (default for all general purpose instructions).
+  kNone = 0,
+  //! 8-bit signed integer, specified as `.s8` in assembly.
+  kS8 = 1,
+  //! 16-bit signed integer, specified as `.s16` in assembly.
+  kS16 = 2,
+  //! 32-bit signed integer, specified as `.s32` in assembly.
+  kS32 = 3,
+  //! 64-bit signed integer, specified as `.s64` in assembly.
+  kS64 = 4,
+  //! 8-bit unsigned integer, specified as `.u8` in assembly.
+  kU8 = 5,
+  //! 16-bit unsigned integer, specified as `.u16` in assembly.
+  kU16 = 6,
+  //! 32-bit unsigned integer, specified as `.u32` in assembly.
+  kU32 = 7,
+  //! 64-bit unsigned integer, specified as `.u64` in assembly.
+  kU64 = 8,
+  //! 16-bit floating point (half precision), specified as `.f16` in assembly.
+  kF16 = 10,
+  //! 32-bit floating point (single precision), specified as `.f32` in assembly.
+  kF32 = 11,
+  //! 64-bit floating point (double precision), specified as `.f64` in assembly.
+  kF64 = 12,
+  //! 8-bit polynomial.
+  kP8 = 13,
+  //! 64-bit polynomial.
+  kP64 = 15,
+
+  //! Maximum value of `DataType`.
+  kMaxValue = 15
+};
+
+//! Shift operation predicate (ARM) describes either SHIFT or EXTEND operation.
+//!
+//! \note The constants are AsmJit specific. The first 5 values describe real constants on ARM32 and AArch64 hardware,
+//! however, the addition constants that describe extend modes are specific to AsmJit and would be translated to the
+//! AArch64 specific constants by the assembler.
+enum class ShiftOp : uint32_t {
+  //! Shift left logical operation (default).
+  //!
+  //! Available to all ARM architectures.
+  kLSL = 0x00u,
+
+  //! Shift right logical operation.
+  //!
+  //! Available to all ARM architectures.
+  kLSR = 0x01u,
+
+  //! Shift right arithmetic operation.
+  //!
+  //! Available to all ARM architectures.
+  kASR = 0x02u,
+
+  //! Rotate right operation (AArch32 only).
+  kROR = 0x03u,
+
+  //! Rotate right with carry operation (encoded as `ShiftOp::kROR` with zero) (AArch32 only).
+  kRRX = 0x04u,
+
+  //! Shift left by filling low order bits with ones.
+  kMSL = 0x05u,
+
+  //! UXTN extend register operation (AArch64 only).
+  kUXTB = 0x06u,
+  //! UXTH extend register operation (AArch64 only).
+  kUXTH = 0x07u,
+  //! UXTW extend register operation (AArch64 only).
+  kUXTW = 0x08u,
+  //! UXTX extend register operation (AArch64 only).
+  kUXTX = 0x09u,
+
+  //! SXTB extend register operation (AArch64 only).
+  kSXTB = 0x0Au,
+  //! SXTH extend register operation (AArch64 only).
+  kSXTH = 0x0Bu,
+  //! SXTW extend register operation (AArch64 only).
+  kSXTW = 0x0Cu,
+  //! SXTX extend register operation (AArch64 only).
+  kSXTX = 0x0Du
+
+  // NOTE: 0xE and 0xF are used by memory operand to specify POST|PRE offset mode.
+};
+
+//! Represents ARM immediate shift operation type and value.
+class Shift {
+public:
+  //! Shift operation.
+  ShiftOp _op;
+  //! Shift Value.
+  uint32_t _value;
+
+  //! Default constructed Shift is not initialized.
+  inline Shift() noexcept = default;
+
+  //! Copy constructor (default)
+  constexpr Shift(const Shift& other) noexcept = default;
+
+  //! Constructs Shift from operation `op` and shift `value`.
+  constexpr Shift(ShiftOp op, uint32_t value) noexcept
+    : _op(op),
+      _value(value) {}
+
+  //! Returns the shift operation.
+  constexpr ShiftOp op() const noexcept { return _op; }
+  //! Sets shift operation to `op`.
+  inline void setOp(ShiftOp op) noexcept { _op = op; }
+
+  //! Returns the shift smount.
+  constexpr uint32_t value() const noexcept { return _value; }
+  //! Sets shift amount to `value`.
+  inline void setValue(uint32_t value) noexcept { _value = value; }
+};
+
+//! Constructs a `LSL #value` shift (logical shift left).
+static constexpr Shift lsl(uint32_t value) noexcept { return Shift(ShiftOp::kLSL, value); }
+//! Constructs a `LSR #value` shift (logical shift right).
+static constexpr Shift lsr(uint32_t value) noexcept { return Shift(ShiftOp::kLSR, value); }
+//! Constructs a `ASR #value` shift (arithmetic shift right).
+static constexpr Shift asr(uint32_t value) noexcept { return Shift(ShiftOp::kASR, value); }
+//! Constructs a `ROR #value` shift (rotate right).
+static constexpr Shift ror(uint32_t value) noexcept { return Shift(ShiftOp::kROR, value); }
+//! Constructs a `RRX` shift (rotate with carry by 1).
+static constexpr Shift rrx() noexcept { return Shift(ShiftOp::kRRX, 0); }
+//! Constructs a `MSL #value` shift (logical shift left filling ones).
+static constexpr Shift msl(uint32_t value) noexcept { return Shift(ShiftOp::kMSL, value); }
+
+//! Constructs a `UXTB #value` extend and shift (unsigned byte extend).
+static constexpr Shift uxtb(uint32_t value) noexcept { return Shift(ShiftOp::kUXTB, value); }
+//! Constructs a `UXTH #value` extend and shift (unsigned hword extend).
+static constexpr Shift uxth(uint32_t value) noexcept { return Shift(ShiftOp::kUXTH, value); }
+//! Constructs a `UXTW #value` extend and shift (unsigned word extend).
+static constexpr Shift uxtw(uint32_t value) noexcept { return Shift(ShiftOp::kUXTW, value); }
+//! Constructs a `UXTX #value` extend and shift (unsigned dword extend).
+static constexpr Shift uxtx(uint32_t value) noexcept { return Shift(ShiftOp::kUXTX, value); }
+
+//! Constructs a `SXTB #value` extend and shift (signed byte extend).
+static constexpr Shift sxtb(uint32_t value) noexcept { return Shift(ShiftOp::kSXTB, value); }
+//! Constructs a `SXTH #value` extend and shift (signed hword extend).
+static constexpr Shift sxth(uint32_t value) noexcept { return Shift(ShiftOp::kSXTH, value); }
+//! Constructs a `SXTW #value` extend and shift (signed word extend).
+static constexpr Shift sxtw(uint32_t value) noexcept { return Shift(ShiftOp::kSXTW, value); }
+//! Constructs a `SXTX #value` extend and shift (signed dword extend).
+static constexpr Shift sxtx(uint32_t value) noexcept { return Shift(ShiftOp::kSXTX, value); }
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_CORE_ARCHCOMMONS_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/archtraits.cpp b/lib/lepton/asmjit/core/archtraits.cpp
new file mode 100644
index 0000000000..fc825df800
--- /dev/null
+++ b/lib/lepton/asmjit/core/archtraits.cpp
@@ -0,0 +1,160 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/archtraits.h"
+#include "../core/misc_p.h"
+
+#if !defined(ASMJIT_NO_X86)
+  #include "../x86/x86archtraits_p.h"
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  #include "../arm/a64archtraits_p.h"
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+static const constexpr ArchTraits noArchTraits = {
+  // SP/FP/LR/PC.
+  0xFF, 0xFF, 0xFF, 0xFF,
+
+  // Reserved,
+  { 0, 0, 0 },
+
+  // HW stack alignment.
+  0,
+
+  // Min/Max stack offset.
+  0, 0,
+
+  // ISA features [Gp, Vec, Other0, Other1].
+  {{
+    InstHints::kNoHints,
+    InstHints::kNoHints,
+    InstHints::kNoHints,
+    InstHints::kNoHints
+  }},
+
+  // RegTypeToSignature.
+  #define V(index) OperandSignature{0}
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // RegTypeToTypeId.
+  #define V(index) TypeId::kVoid
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // TypeIdToRegType.
+  #define V(index) RegType::kNone
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // Word names of 8-bit, 16-bit, 32-bit, and 64-bit quantities.
+  {
+    ArchTypeNameId::kByte,
+    ArchTypeNameId::kHalf,
+    ArchTypeNameId::kWord,
+    ArchTypeNameId::kQuad
+  }
+};
+
+ASMJIT_VARAPI const ArchTraits _archTraits[uint32_t(Arch::kMaxValue) + 1] = {
+  // No architecture.
+  noArchTraits,
+
+  // X86/X86 architectures.
+#if !defined(ASMJIT_NO_X86)
+  x86::x86ArchTraits,
+  x86::x64ArchTraits,
+#else
+  noArchTraits,
+  noArchTraits,
+#endif
+
+  // RISCV32/RISCV64 architectures.
+  noArchTraits,
+  noArchTraits,
+
+  // ARM architecture
+  noArchTraits,
+
+  // AArch64 architecture.
+#if !defined(ASMJIT_NO_AARCH64)
+  a64::a64ArchTraits,
+#else
+  noArchTraits,
+#endif
+
+  // ARM/Thumb architecture.
+  noArchTraits,
+
+  // Reserved.
+  noArchTraits,
+
+  // MIPS32/MIPS64
+  noArchTraits,
+  noArchTraits
+};
+
+ASMJIT_FAVOR_SIZE Error ArchUtils::typeIdToRegSignature(Arch arch, TypeId typeId, TypeId* typeIdOut, OperandSignature* regSignatureOut) noexcept {
+  const ArchTraits& archTraits = ArchTraits::byArch(arch);
+
+  // TODO: Remove this, should never be used like this.
+  // Passed RegType instead of TypeId?
+  if (uint32_t(typeId) <= uint32_t(RegType::kMaxValue))
+    typeId = archTraits.regTypeToTypeId(RegType(uint32_t(typeId)));
+
+  if (ASMJIT_UNLIKELY(!TypeUtils::isValid(typeId)))
+    return DebugUtils::errored(kErrorInvalidTypeId);
+
+  // First normalize architecture dependent types.
+  if (TypeUtils::isAbstract(typeId)) {
+    bool is32Bit = Environment::is32Bit(arch);
+    if (typeId == TypeId::kIntPtr)
+      typeId = is32Bit ? TypeId::kInt32 : TypeId::kInt64;
+    else
+      typeId = is32Bit ? TypeId::kUInt32 : TypeId::kUInt64;
+  }
+
+  // Type size helps to construct all groups of registers.
+  // TypeId is invalid if the size is zero.
+  uint32_t size = TypeUtils::sizeOf(typeId);
+  if (ASMJIT_UNLIKELY(!size))
+    return DebugUtils::errored(kErrorInvalidTypeId);
+
+  if (ASMJIT_UNLIKELY(typeId == TypeId::kFloat80))
+    return DebugUtils::errored(kErrorInvalidUseOfF80);
+
+  RegType regType = RegType::kNone;
+  if (TypeUtils::isBetween(typeId, TypeId::_kBaseStart, TypeId::_kVec32Start)) {
+    regType = archTraits._typeIdToRegType[uint32_t(typeId) - uint32_t(TypeId::_kBaseStart)];
+    if (regType == RegType::kNone) {
+      if (typeId == TypeId::kInt64 || typeId == TypeId::kUInt64)
+        return DebugUtils::errored(kErrorInvalidUseOfGpq);
+      else
+        return DebugUtils::errored(kErrorInvalidTypeId);
+    }
+  }
+  else {
+    if (size <= 8 && archTraits._regSignature[RegType::kVec64].isValid())
+      regType = RegType::kVec64;
+    else if (size <= 16 && archTraits._regSignature[RegType::kVec128].isValid())
+      regType = RegType::kVec128;
+    else if (size == 32 && archTraits._regSignature[RegType::kVec256].isValid())
+      regType = RegType::kVec256;
+    else if (archTraits._regSignature[RegType::kVec512].isValid())
+      regType = RegType::kVec512;
+    else
+      return DebugUtils::errored(kErrorInvalidTypeId);
+  }
+
+  *typeIdOut = typeId;
+  *regSignatureOut = archTraits.regTypeToSignature(regType);
+  return kErrorOk;
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/archtraits.h b/lib/lepton/asmjit/core/archtraits.h
new file mode 100644
index 0000000000..192a826e51
--- /dev/null
+++ b/lib/lepton/asmjit/core/archtraits.h
@@ -0,0 +1,290 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ARCHTRAITS_H_INCLUDED
+#define ASMJIT_CORE_ARCHTRAITS_H_INCLUDED
+
+#include "../core/operand.h"
+#include "../core/support.h"
+#include "../core/type.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! Instruction set architecture (ISA).
+enum class Arch : uint8_t {
+  //! Unknown or uninitialized ISA.
+  kUnknown = 0,
+
+  //! 32-bit X86 ISA.
+  kX86 = 1,
+  //! 64-bit X86 ISA also known as X64, X86_64, and AMD64.
+  kX64 = 2,
+
+  //! 32-bit RISC-V ISA.
+  kRISCV32 = 3,
+  //! 64-bit RISC-V ISA.
+  kRISCV64 = 4,
+
+  //! 32-bit ARM ISA (little endian).
+  kARM = 5,
+  //! 64-bit ARM ISA in (little endian).
+  kAArch64 = 6,
+  //! 32-bit ARM ISA in Thumb mode (little endian).
+  kThumb = 7,
+
+  // 8 is not used at the moment, even numbers are 64-bit architectures.
+
+  //! 32-bit MIPS ISA in (little endian).
+  kMIPS32_LE = 9,
+  //! 64-bit MIPS ISA in (little endian).
+  kMIPS64_LE = 10,
+
+  //! 32-bit ARM ISA (big endian).
+  kARM_BE = 11,
+  //! 64-bit ARM ISA in (big endian).
+  kAArch64_BE = 12,
+  //! 32-bit ARM ISA in Thumb mode (big endian).
+  kThumb_BE = 13,
+
+  // 14 is not used at the moment, even numbers are 64-bit architectures.
+
+  //! 32-bit MIPS ISA in (big endian).
+  kMIPS32_BE = 15,
+  //! 64-bit MIPS ISA in (big endian).
+  kMIPS64_BE = 16,
+
+  //! Maximum value of `Arch`.
+  kMaxValue = kMIPS64_BE,
+
+  //! Mask used by 32-bit ISAs (odd are 32-bit, even are 64-bit).
+  k32BitMask = 0x01,
+  //! First big-endian architecture.
+  kBigEndian = kARM_BE,
+
+  //! ISA detected at compile-time (ISA of the host).
+  kHost =
+#if defined(_DOXYGEN)
+    DETECTED_AT_COMPILE_TIME
+#else
+    ASMJIT_ARCH_X86 == 32 ? kX86 :
+    ASMJIT_ARCH_X86 == 64 ? kX64 :
+
+    ASMJIT_ARCH_ARM == 32 && ASMJIT_ARCH_LE ? kARM :
+    ASMJIT_ARCH_ARM == 32 && ASMJIT_ARCH_BE ? kARM_BE :
+    ASMJIT_ARCH_ARM == 64 && ASMJIT_ARCH_LE ? kAArch64 :
+    ASMJIT_ARCH_ARM == 64 && ASMJIT_ARCH_BE ? kAArch64_BE :
+
+    ASMJIT_ARCH_MIPS == 32 && ASMJIT_ARCH_LE ? kMIPS32_LE :
+    ASMJIT_ARCH_MIPS == 32 && ASMJIT_ARCH_BE ? kMIPS32_BE :
+    ASMJIT_ARCH_MIPS == 64 && ASMJIT_ARCH_LE ? kMIPS64_LE :
+    ASMJIT_ARCH_MIPS == 64 && ASMJIT_ARCH_BE ? kMIPS64_BE :
+
+    kUnknown
+#endif
+};
+
+//! Sub-architecture.
+enum class SubArch : uint8_t {
+  //! Unknown or uninitialized architecture sub-type.
+  kUnknown = 0,
+
+  //! Maximum value of `SubArch`.
+  kMaxValue = kUnknown,
+
+  //! Sub-architecture detected at compile-time (sub-architecture of the host).
+  kHost =
+#if defined(_DOXYGEN)
+    DETECTED_AT_COMPILE_TIME
+#else
+    kUnknown
+#endif
+};
+
+//! Identifier used to represent names of different data types across architectures.
+enum class ArchTypeNameId : uint8_t {
+  //! Describes 'db' (X86/X86_64 convention, always 8-bit quantity).
+  kDB = 0,
+  //! Describes 'dw' (X86/X86_64 convention, always 16-bit word).
+  kDW,
+  //! Describes 'dd' (X86/X86_64 convention, always 32-bit word).
+  kDD,
+  //! Describes 'dq' (X86/X86_64 convention, always 64-bit word).
+  kDQ,
+  //! Describes 'byte' (always 8-bit quantity).
+  kByte,
+  //! Describes 'half' (most likely 16-bit word).
+  kHalf,
+  //! Describes 'word' (either 16-bit or 32-bit word).
+  kWord,
+  //! Describes 'hword' (most likely 16-bit word).
+  kHWord,
+  //! Describes 'dword' (either 32-bit or 64-bit word).
+  kDWord,
+  //! Describes 'qword' (64-bit word).
+  kQWord,
+  //! Describes 'xword' (64-bit word).
+  kXWord,
+  //! Describes 'short' (always 16-bit word).
+  kShort,
+  //! Describes 'long' (most likely 32-bit word).
+  kLong,
+  //! Describes 'quad' (64-bit word).
+  kQuad,
+
+  //! Maximum value of `ArchTypeNameId`.
+  kMaxValue = kQuad
+};
+
+//! Instruction feature hints for each register group provided by \ref ArchTraits.
+//!
+//! Instruction feature hints describe miscellaneous instructions provided by the architecture that can be used by
+//! register allocator to make certain things simpler - like register swaps or emitting register push/pop sequences.
+//!
+//! \remarks Instruction feature hints are only defined for register groups that can be used with \ref
+//! asmjit_compiler infrastructure. Register groups that are not managed by Compiler are not provided by
+//! \ref ArchTraits and cannot be queried.
+enum class InstHints : uint8_t {
+  //! No feature hints.
+  kNoHints = 0,
+
+  //! Architecture supports a register swap by using a single instruction.
+  kRegSwap = 0x01u,
+  //! Architecture provides push/pop instructions.
+  kPushPop = 0x02u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(InstHints)
+
+//! Architecture traits used by Function API and Compiler's register allocator.
+struct ArchTraits {
+  //! \name Members
+  //! \{
+
+  //! Stack pointer register id.
+  uint8_t _spRegId;
+  //! Frame pointer register id.
+  uint8_t _fpRegId;
+  //! Link register id.
+  uint8_t _linkRegId;
+  //! Instruction pointer (or program counter) register id, if accessible.
+  uint8_t _ipRegId;
+
+  // Reserved.
+  uint8_t _reserved[3];
+  //! Hardware stack alignment requirement.
+  uint8_t _hwStackAlignment;
+
+  //! Minimum addressable offset on stack guaranteed for all instructions.
+  uint32_t _minStackOffset;
+  //! Maximum addressable offset on stack depending on specific instruction.
+  uint32_t _maxStackOffset;
+
+  //! Flags for each virtual register group.
+  Support::Array<InstHints, Globals::kNumVirtGroups> _instHints;
+
+  //! Maps register type into a signature, that provides group, size and can be used to construct register operands.
+  Support::Array<OperandSignature, uint32_t(RegType::kMaxValue) + 1> _regSignature;
+  //! Maps a register to type-id, see \ref TypeId.
+  Support::Array<TypeId, uint32_t(RegType::kMaxValue) + 1> _regTypeToTypeId;
+  //! Maps scalar TypeId values (from TypeId::_kIdBaseStart) to register types, see \ref TypeId.
+  Support::Array<RegType, 32> _typeIdToRegType;
+
+  //! Word name identifiers of 8-bit, 16-bit, 32-biit, and 64-bit quantities that appear in formatted text.
+  ArchTypeNameId _typeNameIdTable[4];
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns stack pointer register id.
+  inline uint32_t spRegId() const noexcept { return _spRegId; }
+  //! Returns stack frame register id.
+  inline uint32_t fpRegId() const noexcept { return _fpRegId; }
+  //! Returns link register id, if the architecture provides it.
+  inline uint32_t linkRegId() const noexcept { return _linkRegId; }
+  //! Returns instruction pointer register id, if the architecture provides it.
+  inline uint32_t ipRegId() const noexcept { return _ipRegId; }
+
+  //! Returns a hardware stack alignment requirement.
+  //!
+  //! \note This is a hardware constraint. Architectures that don't constrain it would return the lowest alignment
+  //! (1), however, some architectures may constrain the alignment, for example AArch64 requires 16-byte alignment.
+  inline uint32_t hwStackAlignment() const noexcept { return _hwStackAlignment; }
+
+  //! Tests whether the architecture provides link register, which is used across function calls. If the link
+  //! register is not provided then a function call pushes the return address on stack (X86/X64).
+  inline bool hasLinkReg() const noexcept { return _linkRegId != BaseReg::kIdBad; }
+
+  //! Returns minimum addressable offset on stack guaranteed for all instructions.
+  inline uint32_t minStackOffset() const noexcept { return _minStackOffset; }
+  //! Returns maximum addressable offset on stack depending on specific instruction.
+  inline uint32_t maxStackOffset() const noexcept { return _maxStackOffset; }
+
+  //! Returns ISA flags of the given register `group`.
+  inline InstHints instFeatureHints(RegGroup group) const noexcept { return _instHints[group]; }
+  //! Tests whether the given register `group` has the given `flag` set.
+  inline bool hasInstHint(RegGroup group, InstHints feature) const noexcept { return Support::test(_instHints[group], feature); }
+  //! Tests whether the ISA provides register swap instruction for the given register `group`.
+  inline bool hasInstRegSwap(RegGroup group) const noexcept { return hasInstHint(group, InstHints::kRegSwap); }
+  //! Tests whether the ISA provides push/pop instructions for the given register `group`.
+  inline bool hasInstPushPop(RegGroup group) const noexcept { return hasInstHint(group, InstHints::kPushPop); }
+
+  inline bool hasRegType(RegType type) const noexcept {
+    return type <= RegType::kMaxValue && _regSignature[type].isValid();
+  }
+
+  //! Returns an operand signature from the given register `type` of this architecture.
+  inline OperandSignature regTypeToSignature(RegType type) const noexcept { return _regSignature[type]; }
+  //! Returns a register from the given register `type` of this architecture.
+  inline RegGroup regTypeToGroup(RegType type) const noexcept { return _regSignature[type].regGroup(); }
+  //! Returns a register size the given register `type` of this architecture.
+  inline uint32_t regTypeToSize(RegType type) const noexcept { return _regSignature[type].size(); }
+  //! Returns a corresponding `TypeId` from the given register `type` of this architecture.
+  inline TypeId regTypeToTypeId(RegType type) const noexcept { return _regTypeToTypeId[type]; }
+
+  //! Returns a table of ISA word names that appear in formatted text. Word names are ISA dependent.
+  //!
+  //! The index of this table is log2 of the size:
+  //!   - [0] 8-bits
+  //!   - [1] 16-bits
+  //!   - [2] 32-bits
+  //!   - [3] 64-bits
+  inline const ArchTypeNameId* typeNameIdTable() const noexcept { return _typeNameIdTable; }
+
+  //! Returns an ISA word name identifier of the given `index`, see \ref typeNameIdTable() for more details.
+  inline ArchTypeNameId typeNameIdByIndex(uint32_t index) const noexcept { return _typeNameIdTable[index]; }
+
+  //! \}
+
+  //! \name Statics
+  //! \{
+
+  //! Returns a const reference to `ArchTraits` for the given architecture `arch`.
+  static inline const ArchTraits& byArch(Arch arch) noexcept;
+
+  //! \}
+};
+
+ASMJIT_VARAPI const ArchTraits _archTraits[uint32_t(Arch::kMaxValue) + 1];
+
+//! \cond
+inline const ArchTraits& ArchTraits::byArch(Arch arch) noexcept { return _archTraits[uint32_t(arch)]; }
+//! \endcond
+
+//! Architecture utilities.
+namespace ArchUtils {
+
+ASMJIT_API Error typeIdToRegSignature(Arch arch, TypeId typeId, TypeId* typeIdOut, OperandSignature* regSignatureOut) noexcept;
+
+} // {ArchUtils}
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ARCHTRAITS_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/assembler.cpp b/lib/lepton/asmjit/core/assembler.cpp
new file mode 100644
index 0000000000..d6c87627ec
--- /dev/null
+++ b/lib/lepton/asmjit/core/assembler.cpp
@@ -0,0 +1,406 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/assembler.h"
+#include "../core/codewriter_p.h"
+#include "../core/constpool.h"
+#include "../core/emitterutils_p.h"
+#include "../core/formatter.h"
+#include "../core/logger.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// BaseAssembler - Construction & Destruction
+// ==========================================
+
+BaseAssembler::BaseAssembler() noexcept
+  : BaseEmitter(EmitterType::kAssembler) {}
+
+BaseAssembler::~BaseAssembler() noexcept {}
+
+// BaseAssembler - Buffer Management
+// =================================
+
+Error BaseAssembler::setOffset(size_t offset) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  size_t size = Support::max<size_t>(_section->bufferSize(), this->offset());
+  if (ASMJIT_UNLIKELY(offset > size))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  _bufferPtr = _bufferData + offset;
+  return kErrorOk;
+}
+
+// BaseAssembler - Section Management
+// ==================================
+
+static void BaseAssembler_initSection(BaseAssembler* self, Section* section) noexcept {
+  uint8_t* p = section->_buffer._data;
+
+  self->_section = section;
+  self->_bufferData = p;
+  self->_bufferPtr  = p + section->_buffer._size;
+  self->_bufferEnd  = p + section->_buffer._capacity;
+}
+
+Error BaseAssembler::section(Section* section) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  if (!_code->isSectionValid(section->id()) || _code->_sections[section->id()] != section)
+    return reportError(DebugUtils::errored(kErrorInvalidSection));
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger)
+    _logger->logf(".section %s {#%u}\n", section->name(), section->id());
+#endif
+
+  BaseAssembler_initSection(this, section);
+  return kErrorOk;
+}
+
+// BaseAssembler - Label Management
+// ================================
+
+Label BaseAssembler::newLabel() {
+  uint32_t labelId = Globals::kInvalidId;
+  if (ASMJIT_LIKELY(_code)) {
+    LabelEntry* le;
+    Error err = _code->newLabelEntry(&le);
+    if (ASMJIT_UNLIKELY(err))
+      reportError(err);
+    else
+      labelId = le->id();
+  }
+  return Label(labelId);
+}
+
+Label BaseAssembler::newNamedLabel(const char* name, size_t nameSize, LabelType type, uint32_t parentId) {
+  uint32_t labelId = Globals::kInvalidId;
+  if (ASMJIT_LIKELY(_code)) {
+    LabelEntry* le;
+    Error err = _code->newNamedLabelEntry(&le, name, nameSize, type, parentId);
+    if (ASMJIT_UNLIKELY(err))
+      reportError(err);
+    else
+      labelId = le->id();
+  }
+  return Label(labelId);
+}
+
+Error BaseAssembler::bind(const Label& label) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  Error err = _code->bindLabel(label, _section->id(), offset());
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger)
+    EmitterUtils::logLabelBound(this, label);
+#endif
+
+  resetInlineComment();
+  if (err)
+    return reportError(err);
+
+  return kErrorOk;
+}
+
+// BaseAssembler - Embed
+// =====================
+
+Error BaseAssembler::embed(const void* data, size_t dataSize) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  if (dataSize == 0)
+    return kErrorOk;
+
+  CodeWriter writer(this);
+  ASMJIT_PROPAGATE(writer.ensureSpace(this, dataSize));
+
+  writer.emitData(data, dataSize);
+  writer.done(this);
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    StringTmp<512> sb;
+    Formatter::formatData(sb, _logger->flags(), arch(), TypeId::kUInt8, data, dataSize, 1);
+    sb.append('\n');
+    _logger->log(sb);
+  }
+#endif
+
+  return kErrorOk;
+}
+
+Error BaseAssembler::embedDataArray(TypeId typeId, const void* data, size_t itemCount, size_t repeatCount) {
+  uint32_t deabstractDelta = TypeUtils::deabstractDeltaOfSize(registerSize());
+  TypeId finalTypeId = TypeUtils::deabstract(typeId, deabstractDelta);
+
+  if (ASMJIT_UNLIKELY(!TypeUtils::isValid(finalTypeId)))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (itemCount == 0 || repeatCount == 0)
+    return kErrorOk;
+
+  uint32_t typeSize = TypeUtils::sizeOf(finalTypeId);
+  Support::FastUInt8 of = 0;
+
+  size_t dataSize = Support::mulOverflow(itemCount, size_t(typeSize), &of);
+  size_t totalSize = Support::mulOverflow(dataSize, repeatCount, &of);
+
+  if (ASMJIT_UNLIKELY(of))
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+  CodeWriter writer(this);
+  ASMJIT_PROPAGATE(writer.ensureSpace(this, totalSize));
+
+  for (size_t i = 0; i < repeatCount; i++)
+    writer.emitData(data, dataSize);
+
+  writer.done(this);
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    StringTmp<512> sb;
+    Formatter::formatData(sb, _logger->flags(), arch(), typeId, data, itemCount, repeatCount);
+    sb.append('\n');
+    _logger->log(sb);
+  }
+#endif
+
+  return kErrorOk;
+}
+
+#ifndef ASMJIT_NO_LOGGING
+static const TypeId dataTypeIdBySize[9] = {
+  TypeId::kVoid,   // [0] (invalid)
+  TypeId::kUInt8,  // [1] (uint8_t)
+  TypeId::kUInt16, // [2] (uint16_t)
+  TypeId::kVoid,   // [3] (invalid)
+  TypeId::kUInt32, // [4] (uint32_t)
+  TypeId::kVoid,   // [5] (invalid)
+  TypeId::kVoid,   // [6] (invalid)
+  TypeId::kVoid,   // [7] (invalid)
+  TypeId::kUInt64  // [8] (uint64_t)
+};
+#endif
+
+Error BaseAssembler::embedConstPool(const Label& label, const ConstPool& pool) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  if (ASMJIT_UNLIKELY(!isLabelValid(label)))
+    return reportError(DebugUtils::errored(kErrorInvalidLabel));
+
+  ASMJIT_PROPAGATE(align(AlignMode::kData, uint32_t(pool.alignment())));
+  ASMJIT_PROPAGATE(bind(label));
+
+  size_t size = pool.size();
+  if (!size)
+    return kErrorOk;
+
+  CodeWriter writer(this);
+  ASMJIT_PROPAGATE(writer.ensureSpace(this, size));
+
+#ifndef ASMJIT_NO_LOGGING
+  uint8_t* data = writer.cursor();
+#endif
+
+  pool.fill(writer.cursor());
+  writer.advance(size);
+  writer.done(this);
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    uint32_t dataSizeLog2 = Support::min<uint32_t>(Support::ctz(pool.minItemSize()), 3);
+    uint32_t dataSize = 1 << dataSizeLog2;
+
+    StringTmp<512> sb;
+    Formatter::formatData(sb, _logger->flags(), arch(), dataTypeIdBySize[dataSize], data, size >> dataSizeLog2);
+    sb.append('\n');
+    _logger->log(sb);
+  }
+#endif
+
+  return kErrorOk;
+}
+
+Error BaseAssembler::embedLabel(const Label& label, size_t dataSize) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  ASMJIT_ASSERT(_code != nullptr);
+  RelocEntry* re;
+  LabelEntry* le = _code->labelEntry(label);
+
+  if (ASMJIT_UNLIKELY(!le))
+    return reportError(DebugUtils::errored(kErrorInvalidLabel));
+
+  if (dataSize == 0)
+    dataSize = registerSize();
+
+  if (ASMJIT_UNLIKELY(!Support::isPowerOf2(dataSize) || dataSize > 8))
+    return reportError(DebugUtils::errored(kErrorInvalidOperandSize));
+
+  CodeWriter writer(this);
+  ASMJIT_PROPAGATE(writer.ensureSpace(this, dataSize));
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    StringTmp<256> sb;
+    sb.append('.');
+    Formatter::formatDataType(sb, _logger->flags(), arch(), dataTypeIdBySize[dataSize]);
+    sb.append(' ');
+    Formatter::formatLabel(sb, FormatFlags::kNone, this, label.id());
+    sb.append('\n');
+    _logger->log(sb);
+  }
+#endif
+
+  Error err = _code->newRelocEntry(&re, RelocType::kRelToAbs);
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  re->_sourceSectionId = _section->id();
+  re->_sourceOffset = offset();
+  re->_format.resetToSimpleValue(OffsetType::kUnsignedOffset, dataSize);
+
+  if (le->isBound()) {
+    re->_targetSectionId = le->section()->id();
+    re->_payload = le->offset();
+  }
+  else {
+    OffsetFormat of;
+    of.resetToSimpleValue(OffsetType::kUnsignedOffset, dataSize);
+
+    LabelLink* link = _code->newLabelLink(le, _section->id(), offset(), 0, of);
+    if (ASMJIT_UNLIKELY(!link))
+      return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+    link->relocId = re->id();
+  }
+
+  // Emit dummy DWORD/QWORD depending on the data size.
+  writer.emitZeros(dataSize);
+  writer.done(this);
+
+  return kErrorOk;
+}
+
+Error BaseAssembler::embedLabelDelta(const Label& label, const Label& base, size_t dataSize) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  LabelEntry* labelEntry = _code->labelEntry(label);
+  LabelEntry* baseEntry = _code->labelEntry(base);
+
+  if (ASMJIT_UNLIKELY(!labelEntry || !baseEntry))
+    return reportError(DebugUtils::errored(kErrorInvalidLabel));
+
+  if (dataSize == 0)
+    dataSize = registerSize();
+
+  if (ASMJIT_UNLIKELY(!Support::isPowerOf2(dataSize) || dataSize > 8))
+    return reportError(DebugUtils::errored(kErrorInvalidOperandSize));
+
+  CodeWriter writer(this);
+  ASMJIT_PROPAGATE(writer.ensureSpace(this, dataSize));
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    StringTmp<256> sb;
+    sb.append('.');
+    Formatter::formatDataType(sb, _logger->flags(), arch(), dataTypeIdBySize[dataSize]);
+    sb.append(" (");
+    Formatter::formatLabel(sb, FormatFlags::kNone, this, label.id());
+    sb.append(" - ");
+    Formatter::formatLabel(sb, FormatFlags::kNone, this, base.id());
+    sb.append(")\n");
+    _logger->log(sb);
+  }
+#endif
+
+  // If both labels are bound within the same section it means the delta can be calculated now.
+  if (labelEntry->isBound() && baseEntry->isBound() && labelEntry->section() == baseEntry->section()) {
+    uint64_t delta = labelEntry->offset() - baseEntry->offset();
+    writer.emitValueLE(delta, dataSize);
+  }
+  else {
+    RelocEntry* re;
+    Error err = _code->newRelocEntry(&re, RelocType::kExpression);
+    if (ASMJIT_UNLIKELY(err))
+      return reportError(err);
+
+    Expression* exp = _code->_zone.newT<Expression>();
+    if (ASMJIT_UNLIKELY(!exp))
+      return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+    exp->reset();
+    exp->opType = ExpressionOpType::kSub;
+    exp->setValueAsLabel(0, labelEntry);
+    exp->setValueAsLabel(1, baseEntry);
+
+    re->_format.resetToSimpleValue(OffsetType::kSignedOffset, dataSize);
+    re->_sourceSectionId = _section->id();
+    re->_sourceOffset = offset();
+    re->_payload = (uint64_t)(uintptr_t)exp;
+
+    writer.emitZeros(dataSize);
+  }
+
+  writer.done(this);
+  return kErrorOk;
+}
+
+// BaseAssembler - Comment
+// =======================
+
+Error BaseAssembler::comment(const char* data, size_t size) {
+  if (!hasEmitterFlag(EmitterFlags::kLogComments)) {
+    if (!hasEmitterFlag(EmitterFlags::kAttached))
+      return reportError(DebugUtils::errored(kErrorNotInitialized));
+    return kErrorOk;
+  }
+
+#ifndef ASMJIT_NO_LOGGING
+  // Logger cannot be NULL if `EmitterFlags::kLogComments` is set.
+  ASMJIT_ASSERT(_logger != nullptr);
+
+  _logger->log(data, size);
+  _logger->log("\n", 1);
+  return kErrorOk;
+#else
+  DebugUtils::unused(data, size);
+  return kErrorOk;
+#endif
+}
+
+// BaseAssembler - Events
+// ======================
+
+Error BaseAssembler::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+
+  // Attach to the end of the .text section.
+  BaseAssembler_initSection(this, code->_sections[0]);
+
+  return kErrorOk;
+}
+
+Error BaseAssembler::onDetach(CodeHolder* code) noexcept {
+  _section    = nullptr;
+  _bufferData = nullptr;
+  _bufferEnd  = nullptr;
+  _bufferPtr  = nullptr;
+  return Base::onDetach(code);
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/assembler.h b/lib/lepton/asmjit/core/assembler.h
new file mode 100644
index 0000000000..7ea2505f04
--- /dev/null
+++ b/lib/lepton/asmjit/core/assembler.h
@@ -0,0 +1,129 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ASSEMBLER_H_INCLUDED
+#define ASMJIT_CORE_ASSEMBLER_H_INCLUDED
+
+#include "../core/codeholder.h"
+#include "../core/emitter.h"
+#include "../core/operand.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_assembler
+//! \{
+
+//! Base assembler.
+//!
+//! This is a base class that provides interface used by architecture specific
+//! assembler implementations. Assembler doesn't hold any data, instead it's
+//! attached to \ref CodeHolder, which provides all the data that Assembler
+//! needs and which can be altered by it.
+//!
+//! Check out architecture specific assemblers for more details and examples:
+//!
+//!   - \ref x86::Assembler - X86/X64 assembler implementation.
+class ASMJIT_VIRTAPI BaseAssembler : public BaseEmitter {
+public:
+  ASMJIT_NONCOPYABLE(BaseAssembler)
+  typedef BaseEmitter Base;
+
+  //! Current section where the assembling happens.
+  Section* _section = nullptr;
+  //! Start of the CodeBuffer of the current section.
+  uint8_t* _bufferData = nullptr;
+  //! End (first invalid byte) of the current section.
+  uint8_t* _bufferEnd = nullptr;
+  //! Pointer in the CodeBuffer of the current section.
+  uint8_t* _bufferPtr = nullptr;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `BaseAssembler` instance.
+  ASMJIT_API BaseAssembler() noexcept;
+  //! Destroys the `BaseAssembler` instance.
+  ASMJIT_API virtual ~BaseAssembler() noexcept;
+
+  //! \}
+
+  //! \name Code-Buffer Management
+  //! \{
+
+  //! Returns the capacity of the current CodeBuffer.
+  inline size_t bufferCapacity() const noexcept { return (size_t)(_bufferEnd - _bufferData); }
+  //! Returns the number of remaining bytes in the current CodeBuffer.
+  inline size_t remainingSpace() const noexcept { return (size_t)(_bufferEnd - _bufferPtr); }
+
+  //! Returns the current position in the CodeBuffer.
+  inline size_t offset() const noexcept { return (size_t)(_bufferPtr - _bufferData); }
+
+  //! Sets the current position in the CodeBuffer to `offset`.
+  //!
+  //! \note The `offset` cannot be greater than buffer size even if it's
+  //! within the buffer's capacity.
+  ASMJIT_API Error setOffset(size_t offset);
+
+  //! Returns the start of the CodeBuffer in the current section.
+  inline uint8_t* bufferData() const noexcept { return _bufferData; }
+  //! Returns the end (first invalid byte) in the current section.
+  inline uint8_t* bufferEnd() const noexcept { return _bufferEnd; }
+  //! Returns the current pointer in the CodeBuffer in the current section.
+  inline uint8_t* bufferPtr() const noexcept { return _bufferPtr; }
+
+  //! \}
+
+  //! \name Section Management
+  //! \{
+
+  //! Returns the current section.
+  inline Section* currentSection() const noexcept { return _section; }
+
+  ASMJIT_API Error section(Section* section) override;
+
+  //! \}
+
+  //! \name Label Management
+  //! \{
+
+  ASMJIT_API Label newLabel() override;
+  ASMJIT_API Label newNamedLabel(const char* name, size_t nameSize = SIZE_MAX, LabelType type = LabelType::kGlobal, uint32_t parentId = Globals::kInvalidId) override;
+  ASMJIT_API Error bind(const Label& label) override;
+
+  //! \}
+
+  //! \name Embed
+  //! \{
+
+  ASMJIT_API Error embed(const void* data, size_t dataSize) override;
+  ASMJIT_API Error embedDataArray(TypeId typeId, const void* data, size_t itemCount, size_t repeatCount = 1) override;
+  ASMJIT_API Error embedConstPool(const Label& label, const ConstPool& pool) override;
+
+  ASMJIT_API Error embedLabel(const Label& label, size_t dataSize = 0) override;
+  ASMJIT_API Error embedLabelDelta(const Label& label, const Label& base, size_t dataSize = 0) override;
+
+  //! \}
+
+  //! \name Comment
+  //! \{
+
+  ASMJIT_API Error comment(const char* data, size_t size = SIZE_MAX) override;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ASSEMBLER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/builder.cpp b/lib/lepton/asmjit/core/builder.cpp
new file mode 100644
index 0000000000..5df243e7b8
--- /dev/null
+++ b/lib/lepton/asmjit/core/builder.cpp
@@ -0,0 +1,889 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_BUILDER
+
+#include "../core/builder.h"
+#include "../core/emitterutils_p.h"
+#include "../core/errorhandler.h"
+#include "../core/formatter.h"
+#include "../core/logger.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// PostponedErrorHandler (Internal)
+// ================================
+
+//! Postponed error handler that never throws. Used as a temporal error handler
+//! to run passes. If error occurs, the caller is notified and will call the
+//! real error handler, that can throw.
+class PostponedErrorHandler : public ErrorHandler {
+public:
+  void handleError(Error err, const char* message, BaseEmitter* origin) override {
+    DebugUtils::unused(err, origin);
+    _message.assign(message);
+  }
+
+  StringTmp<128> _message;
+};
+
+// BaseBuilder - Utilities
+// =======================
+
+static void BaseBuilder_deletePasses(BaseBuilder* self) noexcept {
+  for (Pass* pass : self->_passes)
+    pass->~Pass();
+  self->_passes.reset();
+}
+
+// BaseBuilder - Construction & Destruction
+// ========================================
+
+BaseBuilder::BaseBuilder() noexcept
+  : BaseEmitter(EmitterType::kBuilder),
+    _codeZone(32768 - Zone::kBlockOverhead),
+    _dataZone(16384 - Zone::kBlockOverhead),
+    _passZone(65536 - Zone::kBlockOverhead),
+    _allocator(&_codeZone) {}
+
+BaseBuilder::~BaseBuilder() noexcept {
+  BaseBuilder_deletePasses(this);
+}
+
+// BaseBuilder - Node Management
+// =============================
+
+Error BaseBuilder::newInstNode(InstNode** out, InstId instId, InstOptions instOptions, uint32_t opCount) {
+  uint32_t opCapacity = InstNode::capacityOfOpCount(opCount);
+  ASMJIT_ASSERT(opCapacity >= InstNode::kBaseOpCapacity);
+
+  InstNode* node = _allocator.allocT<InstNode>(InstNode::nodeSizeOfOpCapacity(opCapacity));
+  if (ASMJIT_UNLIKELY(!node))
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+  *out = new(node) InstNode(this, instId, instOptions, opCount, opCapacity);
+  return kErrorOk;
+}
+
+
+Error BaseBuilder::newLabelNode(LabelNode** out) {
+  *out = nullptr;
+
+  ASMJIT_PROPAGATE(_newNodeT<LabelNode>(out));
+  return registerLabelNode(*out);
+}
+
+Error BaseBuilder::newAlignNode(AlignNode** out, AlignMode alignMode, uint32_t alignment) {
+  *out = nullptr;
+  return _newNodeT<AlignNode>(out, alignMode, alignment);
+}
+
+Error BaseBuilder::newEmbedDataNode(EmbedDataNode** out, TypeId typeId, const void* data, size_t itemCount, size_t repeatCount) {
+  *out = nullptr;
+
+  uint32_t deabstractDelta = TypeUtils::deabstractDeltaOfSize(registerSize());
+  TypeId finalTypeId = TypeUtils::deabstract(typeId, deabstractDelta);
+
+  if (ASMJIT_UNLIKELY(!TypeUtils::isValid(finalTypeId)))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  uint32_t typeSize = TypeUtils::sizeOf(finalTypeId);
+  Support::FastUInt8 of = 0;
+
+  size_t dataSize = Support::mulOverflow(itemCount, size_t(typeSize), &of);
+  if (ASMJIT_UNLIKELY(of))
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+  EmbedDataNode* node;
+  ASMJIT_PROPAGATE(_newNodeT<EmbedDataNode>(&node));
+
+  node->_embed._typeId = typeId;
+  node->_embed._typeSize = uint8_t(typeSize);
+  node->_itemCount = itemCount;
+  node->_repeatCount = repeatCount;
+
+  uint8_t* dstData = node->_inlineData;
+  if (dataSize > EmbedDataNode::kInlineBufferSize) {
+    dstData = static_cast<uint8_t*>(_dataZone.alloc(dataSize, 8));
+    if (ASMJIT_UNLIKELY(!dstData))
+      return reportError(DebugUtils::errored(kErrorOutOfMemory));
+    node->_externalData = dstData;
+  }
+
+  if (data)
+    memcpy(dstData, data, dataSize);
+
+  *out = node;
+  return kErrorOk;
+}
+
+Error BaseBuilder::newConstPoolNode(ConstPoolNode** out) {
+  *out = nullptr;
+
+  ASMJIT_PROPAGATE(_newNodeT<ConstPoolNode>(out));
+  return registerLabelNode(*out);
+}
+
+Error BaseBuilder::newCommentNode(CommentNode** out, const char* data, size_t size) {
+  *out = nullptr;
+
+  if (data) {
+    if (size == SIZE_MAX)
+      size = strlen(data);
+
+    if (size > 0) {
+      data = static_cast<char*>(_dataZone.dup(data, size, true));
+      if (ASMJIT_UNLIKELY(!data))
+        return reportError(DebugUtils::errored(kErrorOutOfMemory));
+    }
+  }
+
+  return _newNodeT<CommentNode>(out, data);
+}
+
+BaseNode* BaseBuilder::addNode(BaseNode* node) noexcept {
+  ASMJIT_ASSERT(!node->_prev);
+  ASMJIT_ASSERT(!node->_next);
+  ASMJIT_ASSERT(!node->isActive());
+
+  if (!_cursor) {
+    if (!_firstNode) {
+      _firstNode = node;
+      _lastNode = node;
+    }
+    else {
+      node->_next = _firstNode;
+      _firstNode->_prev = node;
+      _firstNode = node;
+    }
+  }
+  else {
+    BaseNode* prev = _cursor;
+    BaseNode* next = _cursor->next();
+
+    node->_prev = prev;
+    node->_next = next;
+
+    prev->_next = node;
+    if (next)
+      next->_prev = node;
+    else
+      _lastNode = node;
+  }
+
+  node->addFlags(NodeFlags::kIsActive);
+  if (node->isSection())
+    _dirtySectionLinks = true;
+
+  _cursor = node;
+  return node;
+}
+
+BaseNode* BaseBuilder::addAfter(BaseNode* node, BaseNode* ref) noexcept {
+  ASMJIT_ASSERT(!node->_prev);
+  ASMJIT_ASSERT(!node->_next);
+
+  BaseNode* prev = ref;
+  BaseNode* next = ref->next();
+
+  node->_prev = prev;
+  node->_next = next;
+
+  node->addFlags(NodeFlags::kIsActive);
+  if (node->isSection())
+    _dirtySectionLinks = true;
+
+  prev->_next = node;
+  if (next)
+    next->_prev = node;
+  else
+    _lastNode = node;
+
+  return node;
+}
+
+BaseNode* BaseBuilder::addBefore(BaseNode* node, BaseNode* ref) noexcept {
+  ASMJIT_ASSERT(!node->_prev);
+  ASMJIT_ASSERT(!node->_next);
+  ASMJIT_ASSERT(!node->isActive());
+  ASMJIT_ASSERT(ref->isActive());
+
+  BaseNode* prev = ref->prev();
+  BaseNode* next = ref;
+
+  node->_prev = prev;
+  node->_next = next;
+
+  node->addFlags(NodeFlags::kIsActive);
+  if (node->isSection())
+    _dirtySectionLinks = true;
+
+  next->_prev = node;
+  if (prev)
+    prev->_next = node;
+  else
+    _firstNode = node;
+
+  return node;
+}
+
+BaseNode* BaseBuilder::removeNode(BaseNode* node) noexcept {
+  if (!node->isActive())
+    return node;
+
+  BaseNode* prev = node->prev();
+  BaseNode* next = node->next();
+
+  if (_firstNode == node)
+    _firstNode = next;
+  else
+    prev->_next = next;
+
+  if (_lastNode == node)
+    _lastNode  = prev;
+  else
+    next->_prev = prev;
+
+  node->_prev = nullptr;
+  node->_next = nullptr;
+  node->clearFlags(NodeFlags::kIsActive);
+  if (node->isSection())
+    _dirtySectionLinks = true;
+
+  if (_cursor == node)
+    _cursor = prev;
+
+  return node;
+}
+
+void BaseBuilder::removeNodes(BaseNode* first, BaseNode* last) noexcept {
+  if (first == last) {
+    removeNode(first);
+    return;
+  }
+
+  if (!first->isActive())
+    return;
+
+  BaseNode* prev = first->prev();
+  BaseNode* next = last->next();
+
+  if (_firstNode == first)
+    _firstNode = next;
+  else
+    prev->_next = next;
+
+  if (_lastNode == last)
+    _lastNode  = prev;
+  else
+    next->_prev = prev;
+
+  BaseNode* node = first;
+  uint32_t didRemoveSection = false;
+
+  for (;;) {
+    next = node->next();
+    ASMJIT_ASSERT(next != nullptr);
+
+    node->_prev = nullptr;
+    node->_next = nullptr;
+    node->clearFlags(NodeFlags::kIsActive);
+    didRemoveSection |= uint32_t(node->isSection());
+
+    if (_cursor == node)
+      _cursor = prev;
+
+    if (node == last)
+      break;
+    node = next;
+  }
+
+  if (didRemoveSection)
+    _dirtySectionLinks = true;
+}
+
+BaseNode* BaseBuilder::setCursor(BaseNode* node) noexcept {
+  BaseNode* old = _cursor;
+  _cursor = node;
+  return old;
+}
+
+// BaseBuilder - Sections
+// ======================
+
+Error BaseBuilder::sectionNodeOf(SectionNode** out, uint32_t sectionId) {
+  *out = nullptr;
+
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (ASMJIT_UNLIKELY(!_code->isSectionValid(sectionId)))
+    return reportError(DebugUtils::errored(kErrorInvalidSection));
+
+  if (sectionId >= _sectionNodes.size()) {
+    Error err = _sectionNodes.reserve(&_allocator, sectionId + 1);
+    if (ASMJIT_UNLIKELY(err != kErrorOk))
+      return reportError(err);
+  }
+
+  SectionNode* node = nullptr;
+  if (sectionId < _sectionNodes.size())
+    node = _sectionNodes[sectionId];
+
+  if (!node) {
+    ASMJIT_PROPAGATE(_newNodeT<SectionNode>(&node, sectionId));
+
+    // We have already reserved enough space, this cannot fail now.
+    if (sectionId >= _sectionNodes.size())
+      _sectionNodes.resize(&_allocator, sectionId + 1);
+
+    _sectionNodes[sectionId] = node;
+  }
+
+  *out = node;
+  return kErrorOk;
+}
+
+Error BaseBuilder::section(Section* section) {
+  SectionNode* node;
+  ASMJIT_PROPAGATE(sectionNodeOf(&node, section->id()));
+  ASMJIT_ASSUME(node != nullptr);
+
+  if (!node->isActive()) {
+    // Insert the section at the end if it was not part of the code.
+    addAfter(node, lastNode());
+    _cursor = node;
+  }
+  else {
+    // This is a bit tricky. We cache section links to make sure that
+    // switching sections doesn't involve traversal in linked-list unless
+    // the position of the section has changed.
+    if (hasDirtySectionLinks())
+      updateSectionLinks();
+
+    if (node->_nextSection)
+      _cursor = node->_nextSection->_prev;
+    else
+      _cursor = _lastNode;
+  }
+
+  return kErrorOk;
+}
+
+void BaseBuilder::updateSectionLinks() noexcept {
+  if (!_dirtySectionLinks)
+    return;
+
+  BaseNode* node_ = _firstNode;
+  SectionNode* currentSection = nullptr;
+
+  while (node_) {
+    if (node_->isSection()) {
+      if (currentSection)
+        currentSection->_nextSection = node_->as<SectionNode>();
+      currentSection = node_->as<SectionNode>();
+    }
+    node_ = node_->next();
+  }
+
+  if (currentSection)
+    currentSection->_nextSection = nullptr;
+
+  _dirtySectionLinks = false;
+}
+
+// BaseBuilder - Labels
+// ====================
+
+Error BaseBuilder::labelNodeOf(LabelNode** out, uint32_t labelId) {
+  *out = nullptr;
+
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  uint32_t index = labelId;
+  if (ASMJIT_UNLIKELY(index >= _code->labelCount()))
+    return DebugUtils::errored(kErrorInvalidLabel);
+
+  if (index >= _labelNodes.size())
+    ASMJIT_PROPAGATE(_labelNodes.resize(&_allocator, index + 1));
+
+  LabelNode* node = _labelNodes[index];
+  if (!node) {
+    ASMJIT_PROPAGATE(_newNodeT<LabelNode>(&node, labelId));
+    _labelNodes[index] = node;
+  }
+
+  *out = node;
+  return kErrorOk;
+}
+
+Error BaseBuilder::registerLabelNode(LabelNode* node) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  LabelEntry* le;
+  ASMJIT_PROPAGATE(_code->newLabelEntry(&le));
+  uint32_t labelId = le->id();
+
+  // We just added one label so it must be true.
+  ASMJIT_ASSERT(_labelNodes.size() < labelId + 1);
+  ASMJIT_PROPAGATE(_labelNodes.resize(&_allocator, labelId + 1));
+
+  _labelNodes[labelId] = node;
+  node->_labelId = labelId;
+
+  return kErrorOk;
+}
+
+static Error BaseBuilder_newLabelInternal(BaseBuilder* self, uint32_t labelId) {
+  ASMJIT_ASSERT(self->_labelNodes.size() < labelId + 1);
+
+  uint32_t growBy = labelId - self->_labelNodes.size();
+  Error err = self->_labelNodes.willGrow(&self->_allocator, growBy);
+
+  if (ASMJIT_UNLIKELY(err))
+    return self->reportError(err);
+
+  LabelNode* node;
+  ASMJIT_PROPAGATE(self->_newNodeT<LabelNode>(&node, labelId));
+
+  self->_labelNodes.resize(&self->_allocator, labelId + 1);
+  self->_labelNodes[labelId] = node;
+  node->_labelId = labelId;
+  return kErrorOk;
+}
+
+Label BaseBuilder::newLabel() {
+  uint32_t labelId = Globals::kInvalidId;
+  LabelEntry* le;
+
+  if (_code &&
+      _code->newLabelEntry(&le) == kErrorOk &&
+      BaseBuilder_newLabelInternal(this, le->id()) == kErrorOk) {
+    labelId = le->id();
+  }
+
+  return Label(labelId);
+}
+
+Label BaseBuilder::newNamedLabel(const char* name, size_t nameSize, LabelType type, uint32_t parentId) {
+  uint32_t labelId = Globals::kInvalidId;
+  LabelEntry* le;
+
+  if (_code &&
+      _code->newNamedLabelEntry(&le, name, nameSize, type, parentId) == kErrorOk &&
+      BaseBuilder_newLabelInternal(this, le->id()) == kErrorOk) {
+    labelId = le->id();
+  }
+
+  return Label(labelId);
+}
+
+Error BaseBuilder::bind(const Label& label) {
+  LabelNode* node;
+  ASMJIT_PROPAGATE(labelNodeOf(&node, label));
+
+  addNode(node);
+  return kErrorOk;
+}
+
+// BaseBuilder - Passes
+// ====================
+
+ASMJIT_FAVOR_SIZE Pass* BaseBuilder::passByName(const char* name) const noexcept {
+  for (Pass* pass : _passes)
+    if (strcmp(pass->name(), name) == 0)
+      return pass;
+  return nullptr;
+}
+
+ASMJIT_FAVOR_SIZE Error BaseBuilder::addPass(Pass* pass) noexcept {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (ASMJIT_UNLIKELY(pass == nullptr)) {
+    // Since this is directly called by `addPassT()` we treat `null` argument
+    // as out-of-memory condition. Otherwise it would be API misuse.
+    return DebugUtils::errored(kErrorOutOfMemory);
+  }
+  else if (ASMJIT_UNLIKELY(pass->_cb)) {
+    // Kinda weird, but okay...
+    if (pass->_cb == this)
+      return kErrorOk;
+    return DebugUtils::errored(kErrorInvalidState);
+  }
+
+  ASMJIT_PROPAGATE(_passes.append(&_allocator, pass));
+  pass->_cb = this;
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error BaseBuilder::deletePass(Pass* pass) noexcept {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (ASMJIT_UNLIKELY(pass == nullptr))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  if (pass->_cb != nullptr) {
+    if (pass->_cb != this)
+      return DebugUtils::errored(kErrorInvalidState);
+
+    uint32_t index = _passes.indexOf(pass);
+    ASMJIT_ASSERT(index != Globals::kNotFound);
+
+    pass->_cb = nullptr;
+    _passes.removeAt(index);
+  }
+
+  pass->~Pass();
+  return kErrorOk;
+}
+
+Error BaseBuilder::runPasses() {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (_passes.empty())
+    return kErrorOk;
+
+  ErrorHandler* prev = errorHandler();
+  PostponedErrorHandler postponed;
+
+  Error err = kErrorOk;
+  setErrorHandler(&postponed);
+
+  for (Pass* pass : _passes) {
+    _passZone.reset();
+    err = pass->run(&_passZone, _logger);
+    if (err)
+      break;
+  }
+  _passZone.reset();
+  setErrorHandler(prev);
+
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err, !postponed._message.empty() ? postponed._message.data() : nullptr);
+
+  return kErrorOk;
+}
+
+// BaseBuilder - Emit
+// ==================
+
+Error BaseBuilder::_emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) {
+  uint32_t opCount = EmitterUtils::opCountFromEmitArgs(o0, o1, o2, opExt);
+  InstOptions options = instOptions() | forcedInstOptions();
+
+  if (Support::test(options, InstOptions::kReserved)) {
+    if (ASMJIT_UNLIKELY(!_code))
+      return DebugUtils::errored(kErrorNotInitialized);
+
+#ifndef ASMJIT_NO_VALIDATION
+    // Strict validation.
+    if (hasDiagnosticOption(DiagnosticOptions::kValidateIntermediate)) {
+      Operand_ opArray[Globals::kMaxOpCount];
+      EmitterUtils::opArrayFromEmitArgs(opArray, o0, o1, o2, opExt);
+
+      ValidationFlags validationFlags = isCompiler() ? ValidationFlags::kEnableVirtRegs : ValidationFlags::kNone;
+      Error err = _funcs.validate(arch(), BaseInst(instId, options, _extraReg), opArray, opCount, validationFlags);
+
+      if (ASMJIT_UNLIKELY(err)) {
+        resetInstOptions();
+        resetExtraReg();
+        resetInlineComment();
+        return reportError(err);
+      }
+    }
+#endif
+
+    // Clear instruction options that should never be part of a regular instruction.
+    options &= ~InstOptions::kReserved;
+  }
+
+  uint32_t opCapacity = InstNode::capacityOfOpCount(opCount);
+  ASMJIT_ASSERT(opCapacity >= InstNode::kBaseOpCapacity);
+
+  InstNode* node = _allocator.allocT<InstNode>(InstNode::nodeSizeOfOpCapacity(opCapacity));
+  const char* comment = inlineComment();
+
+  resetInstOptions();
+  resetInlineComment();
+
+  if (ASMJIT_UNLIKELY(!node)) {
+    resetExtraReg();
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+  }
+
+  node = new(node) InstNode(this, instId, options, opCount, opCapacity);
+  node->setExtraReg(extraReg());
+  node->setOp(0, o0);
+  node->setOp(1, o1);
+  node->setOp(2, o2);
+  for (uint32_t i = 3; i < opCount; i++)
+    node->setOp(i, opExt[i - 3]);
+  node->resetOpRange(opCount, opCapacity);
+
+  if (comment)
+    node->setInlineComment(static_cast<char*>(_dataZone.dup(comment, strlen(comment), true)));
+
+  addNode(node);
+  resetExtraReg();
+  return kErrorOk;
+}
+
+// BaseBuilder - Align
+// ===================
+
+Error BaseBuilder::align(AlignMode alignMode, uint32_t alignment) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  AlignNode* node;
+  ASMJIT_PROPAGATE(newAlignNode(&node, alignMode, alignment));
+  ASMJIT_ASSUME(node != nullptr);
+
+  addNode(node);
+  return kErrorOk;
+}
+
+// BaseBuilder - Embed
+// ===================
+
+Error BaseBuilder::embed(const void* data, size_t dataSize) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  EmbedDataNode* node;
+  ASMJIT_PROPAGATE(newEmbedDataNode(&node, TypeId::kUInt8, data, dataSize));
+  ASMJIT_ASSUME(node != nullptr);
+
+  addNode(node);
+  return kErrorOk;
+}
+
+Error BaseBuilder::embedDataArray(TypeId typeId, const void* data, size_t itemCount, size_t itemRepeat) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  EmbedDataNode* node;
+  ASMJIT_PROPAGATE(newEmbedDataNode(&node, typeId, data, itemCount, itemRepeat));
+  ASMJIT_ASSUME(node != nullptr);
+
+  addNode(node);
+  return kErrorOk;
+}
+
+Error BaseBuilder::embedConstPool(const Label& label, const ConstPool& pool) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (!isLabelValid(label))
+    return reportError(DebugUtils::errored(kErrorInvalidLabel));
+
+  ASMJIT_PROPAGATE(align(AlignMode::kData, uint32_t(pool.alignment())));
+  ASMJIT_PROPAGATE(bind(label));
+
+  EmbedDataNode* node;
+  ASMJIT_PROPAGATE(newEmbedDataNode(&node, TypeId::kUInt8, nullptr, pool.size()));
+  ASMJIT_ASSUME(node != nullptr);
+
+  pool.fill(node->data());
+  addNode(node);
+  return kErrorOk;
+}
+
+// BaseBuilder - EmbedLabel & EmbedLabelDelta
+// ==========================================
+//
+// If dataSize is zero it means that the size is the same as target register width, however,
+// if it's provided we really want to validate whether it's within the possible range.
+
+static inline bool BaseBuilder_checkDataSize(size_t dataSize) noexcept {
+  return !dataSize || (Support::isPowerOf2(dataSize) && dataSize <= 8);
+}
+
+Error BaseBuilder::embedLabel(const Label& label, size_t dataSize) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (!BaseBuilder_checkDataSize(dataSize))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  EmbedLabelNode* node;
+  ASMJIT_PROPAGATE(_newNodeT<EmbedLabelNode>(&node, label.id(), uint32_t(dataSize)));
+
+  addNode(node);
+  return kErrorOk;
+}
+
+Error BaseBuilder::embedLabelDelta(const Label& label, const Label& base, size_t dataSize) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (!BaseBuilder_checkDataSize(dataSize))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  EmbedLabelDeltaNode* node;
+  ASMJIT_PROPAGATE(_newNodeT<EmbedLabelDeltaNode>(&node, label.id(), base.id(), uint32_t(dataSize)));
+
+  addNode(node);
+  return kErrorOk;
+}
+
+// BaseBuilder - Comment
+// =====================
+
+Error BaseBuilder::comment(const char* data, size_t size) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  CommentNode* node;
+  ASMJIT_PROPAGATE(newCommentNode(&node, data, size));
+  ASMJIT_ASSUME(node != nullptr);
+
+  addNode(node);
+  return kErrorOk;
+}
+
+// BaseBuilder - SerializeTo
+// =========================
+
+Error BaseBuilder::serializeTo(BaseEmitter* dst) {
+  Error err = kErrorOk;
+  BaseNode* node_ = _firstNode;
+
+  Operand_ opArray[Globals::kMaxOpCount];
+
+  do {
+    dst->setInlineComment(node_->inlineComment());
+
+    if (node_->isInst()) {
+      InstNode* node = node_->as<InstNode>();
+
+      // NOTE: Inlined to remove one additional call per instruction.
+      dst->setInstOptions(node->options());
+      dst->setExtraReg(node->extraReg());
+
+      const Operand_* op = node->operands();
+      const Operand_* opExt = EmitterUtils::noExt;
+
+      uint32_t opCount = node->opCount();
+      if (opCount > 3) {
+        uint32_t i = 4;
+        opArray[3] = op[3];
+
+        while (i < opCount) {
+          opArray[i].copyFrom(op[i]);
+          i++;
+        }
+        while (i < Globals::kMaxOpCount) {
+          opArray[i].reset();
+          i++;
+        }
+        opExt = opArray + 3;
+      }
+
+      err = dst->_emit(node->id(), op[0], op[1], op[2], opExt);
+    }
+    else if (node_->isLabel()) {
+      if (node_->isConstPool()) {
+        ConstPoolNode* node = node_->as<ConstPoolNode>();
+        err = dst->embedConstPool(node->label(), node->constPool());
+      }
+      else {
+        LabelNode* node = node_->as<LabelNode>();
+        err = dst->bind(node->label());
+      }
+    }
+    else if (node_->isAlign()) {
+      AlignNode* node = node_->as<AlignNode>();
+      err = dst->align(node->alignMode(), node->alignment());
+    }
+    else if (node_->isEmbedData()) {
+      EmbedDataNode* node = node_->as<EmbedDataNode>();
+      err = dst->embedDataArray(node->typeId(), node->data(), node->itemCount(), node->repeatCount());
+    }
+    else if (node_->isEmbedLabel()) {
+      EmbedLabelNode* node = node_->as<EmbedLabelNode>();
+      err = dst->embedLabel(node->label(), node->dataSize());
+    }
+    else if (node_->isEmbedLabelDelta()) {
+      EmbedLabelDeltaNode* node = node_->as<EmbedLabelDeltaNode>();
+      err = dst->embedLabelDelta(node->label(), node->baseLabel(), node->dataSize());
+    }
+    else if (node_->isSection()) {
+      SectionNode* node = node_->as<SectionNode>();
+      err = dst->section(_code->sectionById(node->id()));
+    }
+    else if (node_->isComment()) {
+      CommentNode* node = node_->as<CommentNode>();
+      err = dst->comment(node->inlineComment());
+    }
+
+    if (err) break;
+    node_ = node_->next();
+  } while (node_);
+
+  return err;
+}
+
+// BaseBuilder - Events
+// ====================
+
+Error BaseBuilder::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+
+  SectionNode* initialSection;
+  Error err = sectionNodeOf(&initialSection, 0);
+
+  if (!err)
+    err = _passes.willGrow(&_allocator, 8);
+
+  if (ASMJIT_UNLIKELY(err)) {
+    onDetach(code);
+    return err;
+  }
+
+  ASMJIT_ASSUME(initialSection != nullptr);
+  _cursor = initialSection;
+  _firstNode = initialSection;
+  _lastNode = initialSection;
+  initialSection->setFlags(NodeFlags::kIsActive);
+
+  return kErrorOk;
+}
+
+Error BaseBuilder::onDetach(CodeHolder* code) noexcept {
+  BaseBuilder_deletePasses(this);
+  _sectionNodes.reset();
+  _labelNodes.reset();
+
+  _allocator.reset(&_codeZone);
+  _codeZone.reset();
+  _dataZone.reset();
+  _passZone.reset();
+
+  _nodeFlags = NodeFlags::kNone;
+  _cursor = nullptr;
+  _firstNode = nullptr;
+  _lastNode = nullptr;
+
+  return Base::onDetach(code);
+}
+
+// Pass - Construction & Destruction
+// =================================
+
+Pass::Pass(const char* name) noexcept
+  : _name(name) {}
+Pass::~Pass() noexcept {}
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_BUILDER
diff --git a/lib/lepton/asmjit/core/builder.h b/lib/lepton/asmjit/core/builder.h
new file mode 100644
index 0000000000..3575de2fbb
--- /dev/null
+++ b/lib/lepton/asmjit/core/builder.h
@@ -0,0 +1,1391 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_BUILDER_H_INCLUDED
+#define ASMJIT_CORE_BUILDER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_BUILDER
+
+#include "../core/assembler.h"
+#include "../core/codeholder.h"
+#include "../core/constpool.h"
+#include "../core/formatter.h"
+#include "../core/inst.h"
+#include "../core/operand.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../core/zone.h"
+#include "../core/zonevector.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_builder
+//! \{
+
+class BaseBuilder;
+class Pass;
+
+class BaseNode;
+class InstNode;
+class SectionNode;
+class LabelNode;
+class AlignNode;
+class EmbedDataNode;
+class EmbedLabelNode;
+class ConstPoolNode;
+class CommentNode;
+class SentinelNode;
+class LabelDeltaNode;
+
+//! Type of node used by \ref BaseBuilder and \ref BaseCompiler.
+enum class NodeType : uint8_t {
+  //! Invalid node (internal, don't use).
+  kNone = 0,
+
+  // [BaseBuilder]
+
+  //! Node is \ref InstNode or \ref InstExNode.
+  kInst = 1,
+  //! Node is \ref SectionNode.
+  kSection = 2,
+  //! Node is \ref LabelNode.
+  kLabel = 3,
+  //! Node is \ref AlignNode.
+  kAlign = 4,
+  //! Node is \ref EmbedDataNode.
+  kEmbedData = 5,
+  //! Node is \ref EmbedLabelNode.
+  kEmbedLabel = 6,
+  //! Node is \ref EmbedLabelDeltaNode.
+  kEmbedLabelDelta = 7,
+  //! Node is \ref ConstPoolNode.
+  kConstPool = 8,
+  //! Node is \ref CommentNode.
+  kComment = 9,
+  //! Node is \ref SentinelNode.
+  kSentinel = 10,
+
+  // [BaseCompiler]
+
+  //! Node is \ref JumpNode (acts as InstNode).
+  kJump = 15,
+  //! Node is \ref FuncNode (acts as LabelNode).
+  kFunc = 16,
+  //! Node is \ref FuncRetNode (acts as InstNode).
+  kFuncRet = 17,
+  //! Node is \ref InvokeNode (acts as InstNode).
+  kInvoke = 18,
+
+  // [UserDefined]
+
+  //! First id of a user-defined node.
+  kUser = 32
+};
+
+//! Node flags, specify what the node is and/or does.
+enum class NodeFlags : uint8_t {
+  //! No flags.
+  kNone = 0,
+  //! Node is code that can be executed (instruction, label, align, etc...).
+  kIsCode = 0x01u,
+  //! Node is data that cannot be executed (data, const-pool, etc...).
+  kIsData = 0x02u,
+  //! Node is informative, can be removed and ignored.
+  kIsInformative = 0x04u,
+  //! Node can be safely removed if unreachable.
+  kIsRemovable = 0x08u,
+  //! Node does nothing when executed (label, align, explicit nop).
+  kHasNoEffect = 0x10u,
+  //! Node is an instruction or acts as it.
+  kActsAsInst = 0x20u,
+  //! Node is a label or acts as it.
+  kActsAsLabel = 0x40u,
+  //! Node is active (part of the code).
+  kIsActive = 0x80u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(NodeFlags)
+
+//! Type of the sentinel (purery informative purpose).
+enum class SentinelType : uint8_t {
+  //! Type of the sentinel is not known.
+  kUnknown = 0u,
+  //! This is a sentinel used at the end of \ref FuncNode.
+  kFuncEnd = 1u
+};
+
+//! Builder interface.
+//!
+//! `BaseBuilder` interface was designed to be used as a \ref BaseAssembler replacement in case pre-processing or
+//! post-processing of the generated code is required. The code can be modified during or after code generation.
+//! Pre processing or post processing can be done manually or through a \ref Pass object. \ref BaseBuilder stores
+//! the emitted code as a double-linked list of nodes, which allows O(1) insertion and removal during processing.
+//!
+//! Check out architecture specific builders for more details and examples:
+//!
+//!   - \ref x86::Builder - X86/X64 builder implementation.
+class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter {
+public:
+  ASMJIT_NONCOPYABLE(BaseBuilder)
+  typedef BaseEmitter Base;
+
+  //! \name Members
+  //! \{
+
+  //! Base zone used to allocate nodes and passes.
+  Zone _codeZone;
+  //! Data zone used to allocate data and names.
+  Zone _dataZone;
+  //! Pass zone, passed to `Pass::run()`.
+  Zone _passZone;
+  //! Allocator that uses `_codeZone`.
+  ZoneAllocator _allocator;
+
+  //! Array of `Pass` objects.
+  ZoneVector<Pass*> _passes {};
+  //! Maps section indexes to `LabelNode` nodes.
+  ZoneVector<SectionNode*> _sectionNodes {};
+  //! Maps label indexes to `LabelNode` nodes.
+  ZoneVector<LabelNode*> _labelNodes {};
+
+  //! Current node (cursor).
+  BaseNode* _cursor = nullptr;
+  //! First node of the current section.
+  BaseNode* _firstNode = nullptr;
+  //! Last node of the current section.
+  BaseNode* _lastNode = nullptr;
+
+  //! Flags assigned to each new node.
+  NodeFlags _nodeFlags = NodeFlags::kNone;
+  //! The sections links are dirty (used internally).
+  bool _dirtySectionLinks = false;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `BaseBuilder` instance.
+  ASMJIT_API BaseBuilder() noexcept;
+  //! Destroys the `BaseBuilder` instance.
+  ASMJIT_API virtual ~BaseBuilder() noexcept;
+
+  //! \}
+
+  //! \name Node Management
+  //! \{
+
+  //! Returns the first node.
+  inline BaseNode* firstNode() const noexcept { return _firstNode; }
+  //! Returns the last node.
+  inline BaseNode* lastNode() const noexcept { return _lastNode; }
+
+  //! Allocates and instantiates a new node of type `T` and returns its instance. If the allocation fails `nullptr`
+  //! is returned.
+  //!
+  //! The template argument `T` must be a type that is extends \ref BaseNode.
+  //!
+  //! \remarks The pointer returned (if non-null) is owned by the Builder or Compiler. When the Builder/Compiler
+  //! is destroyed it destroys all nodes it created so no manual memory management is required.
+  template<typename T, typename... Args>
+  inline Error _newNodeT(T** ASMJIT_NONNULL(out), Args&&... args) {
+    *out = _allocator.newT<T>(this, std::forward<Args>(args)...);
+    if (ASMJIT_UNLIKELY(!*out))
+      return reportError(DebugUtils::errored(kErrorOutOfMemory));
+    return kErrorOk;
+  }
+
+  //! Creates a new \ref InstNode.
+  ASMJIT_API Error newInstNode(InstNode** ASMJIT_NONNULL(out), InstId instId, InstOptions instOptions, uint32_t opCount);
+  //! Creates a new \ref LabelNode.
+  ASMJIT_API Error newLabelNode(LabelNode** ASMJIT_NONNULL(out));
+  //! Creates a new \ref AlignNode.
+  ASMJIT_API Error newAlignNode(AlignNode** ASMJIT_NONNULL(out), AlignMode alignMode, uint32_t alignment);
+  //! Creates a new \ref EmbedDataNode.
+  ASMJIT_API Error newEmbedDataNode(EmbedDataNode** ASMJIT_NONNULL(out), TypeId typeId, const void* data, size_t itemCount, size_t repeatCount = 1);
+  //! Creates a new \ref ConstPoolNode.
+  ASMJIT_API Error newConstPoolNode(ConstPoolNode** ASMJIT_NONNULL(out));
+  //! Creates a new \ref CommentNode.
+  ASMJIT_API Error newCommentNode(CommentNode** ASMJIT_NONNULL(out), const char* data, size_t size);
+
+  //! Adds `node` after the current and sets the current node to the given `node`.
+  ASMJIT_API BaseNode* addNode(BaseNode* ASMJIT_NONNULL(node)) noexcept;
+  //! Inserts the given `node` after `ref`.
+  ASMJIT_API BaseNode* addAfter(BaseNode* ASMJIT_NONNULL(node), BaseNode* ASMJIT_NONNULL(ref)) noexcept;
+  //! Inserts the given `node` before `ref`.
+  ASMJIT_API BaseNode* addBefore(BaseNode* ASMJIT_NONNULL(node), BaseNode* ASMJIT_NONNULL(ref)) noexcept;
+  //! Removes the given `node`.
+  ASMJIT_API BaseNode* removeNode(BaseNode* ASMJIT_NONNULL(node)) noexcept;
+  //! Removes multiple nodes.
+  ASMJIT_API void removeNodes(BaseNode* first, BaseNode* last) noexcept;
+
+  //! Returns the cursor.
+  //!
+  //! When the Builder/Compiler is created it automatically creates a '.text' \ref SectionNode, which will be the
+  //! initial one. When instructions are added they are always added after the cursor and the cursor is changed
+  //! to be that newly added node. Use `setCursor()` to change where new nodes are inserted.
+  inline BaseNode* cursor() const noexcept { return _cursor; }
+
+  //! Sets the current node to `node` and return the previous one.
+  ASMJIT_API BaseNode* setCursor(BaseNode* node) noexcept;
+
+  //! Sets the current node without returning the previous node.
+  //!
+  //! Only use this function if you are concerned about performance and want this inlined (for example if you set
+  //! the cursor in a loop, etc...).
+  inline void _setCursor(BaseNode* node) noexcept { _cursor = node; }
+
+  //! \}
+
+  //! \name Section Management
+  //! \{
+
+  //! Returns a vector of SectionNode objects.
+  //!
+  //! \note If a section of some id is not associated with the Builder/Compiler it would be null, so always check
+  //! for nulls if you iterate over the vector.
+  inline const ZoneVector<SectionNode*>& sectionNodes() const noexcept {
+    return _sectionNodes;
+  }
+
+  //! Tests whether the `SectionNode` of the given `sectionId` was registered.
+  inline bool hasRegisteredSectionNode(uint32_t sectionId) const noexcept {
+    return sectionId < _sectionNodes.size() && _sectionNodes[sectionId] != nullptr;
+  }
+
+  //! Returns or creates a `SectionNode` that matches the given `sectionId`.
+  //!
+  //! \remarks This function will either get the existing `SectionNode` or create it in case it wasn't created before.
+  //! You can check whether a section has a registered `SectionNode` by using `BaseBuilder::hasRegisteredSectionNode()`.
+  ASMJIT_API Error sectionNodeOf(SectionNode** ASMJIT_NONNULL(out), uint32_t sectionId);
+
+  ASMJIT_API Error section(Section* ASMJIT_NONNULL(section)) override;
+
+  //! Returns whether the section links of active section nodes are dirty. You can update these links by calling
+  //! `updateSectionLinks()` in such case.
+  inline bool hasDirtySectionLinks() const noexcept { return _dirtySectionLinks; }
+
+  //! Updates links of all active section nodes.
+  ASMJIT_API void updateSectionLinks() noexcept;
+
+  //! \}
+
+  //! \name Label Management
+  //! \{
+
+  //! Returns a vector of \ref LabelNode nodes.
+  //!
+  //! \note If a label of some id is not associated with the Builder/Compiler it would be null, so always check for
+  //! nulls if you iterate over the vector.
+  inline const ZoneVector<LabelNode*>& labelNodes() const noexcept { return _labelNodes; }
+
+  //! Tests whether the `LabelNode` of the given `labelId` was registered.
+  inline bool hasRegisteredLabelNode(uint32_t labelId) const noexcept {
+    return labelId < _labelNodes.size() && _labelNodes[labelId] != nullptr;
+  }
+
+  //! \overload
+  inline bool hasRegisteredLabelNode(const Label& label) const noexcept {
+    return hasRegisteredLabelNode(label.id());
+  }
+
+  //! Gets or creates a \ref LabelNode that matches the given `labelId`.
+  //!
+  //! \remarks This function will either get the existing `LabelNode` or create it in case it wasn't created before.
+  //! You can check whether a label has a registered `LabelNode` by calling \ref BaseBuilder::hasRegisteredLabelNode().
+  ASMJIT_API Error labelNodeOf(LabelNode** ASMJIT_NONNULL(out), uint32_t labelId);
+
+  //! \overload
+  inline Error labelNodeOf(LabelNode** ASMJIT_NONNULL(out), const Label& label) {
+    return labelNodeOf(out, label.id());
+  }
+
+  //! Registers this \ref LabelNode (internal).
+  //!
+  //! This function is used internally to register a newly created `LabelNode` with this instance of Builder/Compiler.
+  //! Use \ref labelNodeOf() functions to get back \ref LabelNode from a label or its identifier.
+  ASMJIT_API Error registerLabelNode(LabelNode* ASMJIT_NONNULL(node));
+
+  ASMJIT_API Label newLabel() override;
+  ASMJIT_API Label newNamedLabel(const char* name, size_t nameSize = SIZE_MAX, LabelType type = LabelType::kGlobal, uint32_t parentId = Globals::kInvalidId) override;
+  ASMJIT_API Error bind(const Label& label) override;
+
+  //! \}
+
+  //! \name Passes
+  //! \{
+
+  //! Returns a vector of `Pass` instances that will be executed by `runPasses()`.
+  inline const ZoneVector<Pass*>& passes() const noexcept { return _passes; }
+
+  //! Allocates and instantiates a new pass of type `T` and returns its instance. If the allocation fails `nullptr` is
+  //! returned.
+  //!
+  //! The template argument `T` must be a type that is extends \ref Pass.
+  //!
+  //! \remarks The pointer returned (if non-null) is owned by the Builder or Compiler. When the Builder/Compiler is
+  //! destroyed it destroys all passes it created so no manual memory management is required.
+  template<typename T>
+  inline T* newPassT() noexcept { return _codeZone.newT<T>(); }
+
+  //! \overload
+  template<typename T, typename... Args>
+  inline T* newPassT(Args&&... args) noexcept { return _codeZone.newT<T>(std::forward<Args>(args)...); }
+
+  template<typename T>
+  inline Error addPassT() { return addPass(newPassT<T>()); }
+
+  template<typename T, typename... Args>
+  inline Error addPassT(Args&&... args) { return addPass(newPassT<T, Args...>(std::forward<Args>(args)...)); }
+
+  //! Returns `Pass` by name.
+  //!
+  //! If the pass having the given `name` doesn't exist `nullptr` is returned.
+  ASMJIT_API Pass* passByName(const char* name) const noexcept;
+  //! Adds `pass` to the list of passes.
+  ASMJIT_API Error addPass(Pass* pass) noexcept;
+  //! Removes `pass` from the list of passes and delete it.
+  ASMJIT_API Error deletePass(Pass* pass) noexcept;
+
+  //! Runs all passes in order.
+  ASMJIT_API Error runPasses();
+
+  //! \}
+
+  //! \name Emit
+  //! \{
+
+  ASMJIT_API Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override;
+
+  //! \}
+
+  //! \name Align
+  //! \{
+
+  ASMJIT_API Error align(AlignMode alignMode, uint32_t alignment) override;
+
+  //! \}
+
+  //! \name Embed
+  //! \{
+
+  ASMJIT_API Error embed(const void* data, size_t dataSize) override;
+  ASMJIT_API Error embedDataArray(TypeId typeId, const void* data, size_t count, size_t repeat = 1) override;
+  ASMJIT_API Error embedConstPool(const Label& label, const ConstPool& pool) override;
+
+  ASMJIT_API Error embedLabel(const Label& label, size_t dataSize = 0) override;
+  ASMJIT_API Error embedLabelDelta(const Label& label, const Label& base, size_t dataSize = 0) override;
+
+  //! \}
+
+  //! \name Comment
+  //! \{
+
+  ASMJIT_API Error comment(const char* data, size_t size = SIZE_MAX) override;
+
+  //! \}
+
+  //! \name Serialization
+  //! \{
+
+  //! Serializes everything the given emitter `dst`.
+  //!
+  //! Although not explicitly required the emitter will most probably be of Assembler type. The reason is that
+  //! there is no known use of serializing nodes held by Builder/Compiler into another Builder-like emitter.
+  ASMJIT_API Error serializeTo(BaseEmitter* dst);
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+};
+
+//! Base node.
+//!
+//! Every node represents a building-block used by \ref BaseBuilder. It can be instruction, data, label, comment,
+//! directive, or any other high-level representation that can be transformed to the building blocks mentioned.
+//! Every class that inherits \ref BaseBuilder can define its own high-level nodes that can be later lowered to
+//! basic nodes like instructions.
+class BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(BaseNode)
+
+  //! \name Members
+  //! \{
+
+  union {
+    struct {
+      //! Previous node.
+      BaseNode* _prev;
+      //! Next node.
+      BaseNode* _next;
+    };
+    //! Links (an alternative view to previous and next nodes).
+    BaseNode* _links[2];
+  };
+
+  //! Data shared between all types of nodes.
+  struct AnyData {
+    //! Node type.
+    NodeType _nodeType;
+    //! Node flags.
+    NodeFlags _nodeFlags;
+    //! Not used by BaseNode.
+    uint8_t _reserved0;
+    //! Not used by BaseNode.
+    uint8_t _reserved1;
+  };
+
+  //! Data used by \ref AlignNode.
+  struct AlignData {
+    //! Node type.
+    NodeType _nodeType;
+    //! Node flags.
+    NodeFlags _nodeFlags;
+    //! Align mode.
+    AlignMode _alignMode;
+    //! Not used by AlignNode.
+    uint8_t _reserved;
+  };
+
+  //! Data used by \ref InstNode.
+  struct InstData {
+    //! Node type.
+    NodeType _nodeType;
+    //! Node flags.
+    NodeFlags _nodeFlags;
+    //! Instruction operands count (used).
+    uint8_t _opCount;
+    //! Instruction operands capacity (allocated).
+    uint8_t _opCapacity;
+  };
+
+  //! Data used by \ref EmbedDataNode.
+  struct EmbedData {
+    //! Node type.
+    NodeType _nodeType;
+    //! Node flags.
+    NodeFlags _nodeFlags;
+    //! Type id.
+    TypeId _typeId;
+    //! Size of `_typeId`.
+    uint8_t _typeSize;
+  };
+
+  //! Data used by \ref SentinelNode.
+  struct SentinelData {
+    //! Node type.
+    NodeType _nodeType;
+    //! Node flags.
+    NodeFlags _nodeFlags;
+    //! Sentinel type.
+    SentinelType _sentinelType;
+    //! Not used by BaseNode.
+    uint8_t _reserved1;
+  };
+
+  //! Data that can have different meaning dependning on \ref NodeType.
+  union {
+    //! Data useful by any node type.
+    AnyData _any;
+    //! Data specific to \ref AlignNode.
+    AlignData _alignData;
+    //! Data specific to \ref InstNode.
+    InstData _inst;
+    //! Data specific to \ref EmbedDataNode.
+    EmbedData _embed;
+    //! Data specific to \ref SentinelNode.
+    SentinelData _sentinel;
+  };
+
+  //! Node position in code (should be unique).
+  uint32_t _position;
+
+  //! Value reserved for AsmJit users never touched by AsmJit itself.
+  union {
+    //! User data as 64-bit integer.
+    uint64_t _userDataU64;
+    //! User data as pointer.
+    void* _userDataPtr;
+  };
+
+  //! Data used exclusively by the current `Pass`.
+  void* _passData;
+
+  //! Inline comment/annotation or nullptr if not used.
+  const char* _inlineComment;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `BaseNode` - always use `BaseBuilder` to allocate nodes.
+  inline BaseNode(BaseBuilder* cb, NodeType nodeType, NodeFlags nodeFlags = NodeFlags::kNone) noexcept {
+    _prev = nullptr;
+    _next = nullptr;
+    _any._nodeType = nodeType;
+    _any._nodeFlags = nodeFlags | cb->_nodeFlags;
+    _any._reserved0 = 0;
+    _any._reserved1 = 0;
+    _position = 0;
+    _userDataU64 = 0;
+    _passData = nullptr;
+    _inlineComment = nullptr;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Casts this node to `T*`.
+  template<typename T>
+  inline T* as() noexcept { return static_cast<T*>(this); }
+  //! Casts this node to `const T*`.
+  template<typename T>
+  inline const T* as() const noexcept { return static_cast<const T*>(this); }
+
+  //! Returns previous node or `nullptr` if this node is either first or not
+  //! part of Builder/Compiler node-list.
+  inline BaseNode* prev() const noexcept { return _prev; }
+  //! Returns next node or `nullptr` if this node is either last or not part
+  //! of Builder/Compiler node-list.
+  inline BaseNode* next() const noexcept { return _next; }
+
+  //! Returns the type of the node, see `NodeType`.
+  inline NodeType type() const noexcept { return _any._nodeType; }
+
+  //! Sets the type of the node, see `NodeType` (internal).
+  //!
+  //! \remarks You should never set a type of a node to anything else than the initial value. This function is only
+  //! provided for users that use custom nodes and need to change the type either during construction or later.
+  inline void setType(NodeType type) noexcept { _any._nodeType = type; }
+
+  //! Tests whether this node is either `InstNode` or extends it.
+  inline bool isInst() const noexcept { return hasFlag(NodeFlags::kActsAsInst); }
+  //! Tests whether this node is `SectionNode`.
+  inline bool isSection() const noexcept { return type() == NodeType::kSection; }
+  //! Tests whether this node is either `LabelNode` or extends it.
+  inline bool isLabel() const noexcept { return hasFlag(NodeFlags::kActsAsLabel); }
+  //! Tests whether this node is `AlignNode`.
+  inline bool isAlign() const noexcept { return type() == NodeType::kAlign; }
+  //! Tests whether this node is `EmbedDataNode`.
+  inline bool isEmbedData() const noexcept { return type() == NodeType::kEmbedData; }
+  //! Tests whether this node is `EmbedLabelNode`.
+  inline bool isEmbedLabel() const noexcept { return type() == NodeType::kEmbedLabel; }
+  //! Tests whether this node is `EmbedLabelDeltaNode`.
+  inline bool isEmbedLabelDelta() const noexcept { return type() == NodeType::kEmbedLabelDelta; }
+  //! Tests whether this node is `ConstPoolNode`.
+  inline bool isConstPool() const noexcept { return type() == NodeType::kConstPool; }
+  //! Tests whether this node is `CommentNode`.
+  inline bool isComment() const noexcept { return type() == NodeType::kComment; }
+  //! Tests whether this node is `SentinelNode`.
+  inline bool isSentinel() const noexcept { return type() == NodeType::kSentinel; }
+
+  //! Tests whether this node is `FuncNode`.
+  inline bool isFunc() const noexcept { return type() == NodeType::kFunc; }
+  //! Tests whether this node is `FuncRetNode`.
+  inline bool isFuncRet() const noexcept { return type() == NodeType::kFuncRet; }
+  //! Tests whether this node is `InvokeNode`.
+  inline bool isInvoke() const noexcept { return type() == NodeType::kInvoke; }
+
+  //! Returns the node flags.
+  inline NodeFlags flags() const noexcept { return _any._nodeFlags; }
+  //! Tests whether the node has the given `flag` set.
+  inline bool hasFlag(NodeFlags flag) const noexcept { return Support::test(_any._nodeFlags, flag); }
+  //! Replaces node flags with `flags`.
+  inline void setFlags(NodeFlags flags) noexcept { _any._nodeFlags = flags; }
+  //! Adds the given `flags` to node flags.
+  inline void addFlags(NodeFlags flags) noexcept { _any._nodeFlags |= flags; }
+  //! Clears the given `flags` from node flags.
+  inline void clearFlags(NodeFlags flags) noexcept { _any._nodeFlags &= ~flags; }
+
+  //! Tests whether the node is code that can be executed.
+  inline bool isCode() const noexcept { return hasFlag(NodeFlags::kIsCode); }
+  //! Tests whether the node is data that cannot be executed.
+  inline bool isData() const noexcept { return hasFlag(NodeFlags::kIsData); }
+  //! Tests whether the node is informative only (is never encoded like comment, etc...).
+  inline bool isInformative() const noexcept { return hasFlag(NodeFlags::kIsInformative); }
+  //! Tests whether the node is removable if it's in an unreachable code block.
+  inline bool isRemovable() const noexcept { return hasFlag(NodeFlags::kIsRemovable); }
+  //! Tests whether the node has no effect when executed (label, .align, nop, ...).
+  inline bool hasNoEffect() const noexcept { return hasFlag(NodeFlags::kHasNoEffect); }
+  //! Tests whether the node is part of the code.
+  inline bool isActive() const noexcept { return hasFlag(NodeFlags::kIsActive); }
+
+  //! Tests whether the node has a position assigned.
+  //!
+  //! \remarks Returns `true` if node position is non-zero.
+  inline bool hasPosition() const noexcept { return _position != 0; }
+  //! Returns node position.
+  inline uint32_t position() const noexcept { return _position; }
+  //! Sets node position.
+  //!
+  //! Node position is a 32-bit unsigned integer that is used by Compiler to track where the node is relatively to
+  //! the start of the function. It doesn't describe a byte position in a binary, instead it's just a pseudo position
+  //! used by liveness analysis and other tools around Compiler.
+  //!
+  //! If you don't use Compiler then you may use `position()` and `setPosition()` freely for your own purposes if
+  //! the 32-bit value limit is okay for you.
+  inline void setPosition(uint32_t position) noexcept { _position = position; }
+
+  //! Returns user data casted to `T*`.
+  //!
+  //! User data is decicated to be used only by AsmJit users and not touched by the library. The data has a pointer
+  //! size so you can either store a pointer or `intptr_t` value through `setUserDataAsIntPtr()`.
+  template<typename T>
+  inline T* userDataAsPtr() const noexcept { return static_cast<T*>(_userDataPtr); }
+  //! Returns user data casted to `int64_t`.
+  inline int64_t userDataAsInt64() const noexcept { return int64_t(_userDataU64); }
+  //! Returns user data casted to `uint64_t`.
+  inline uint64_t userDataAsUInt64() const noexcept { return _userDataU64; }
+
+  //! Sets user data to `data`.
+  template<typename T>
+  inline void setUserDataAsPtr(T* data) noexcept { _userDataPtr = static_cast<void*>(data); }
+  //! Sets used data to the given 64-bit signed `value`.
+  inline void setUserDataAsInt64(int64_t value) noexcept { _userDataU64 = uint64_t(value); }
+  //! Sets used data to the given 64-bit unsigned `value`.
+  inline void setUserDataAsUInt64(uint64_t value) noexcept { _userDataU64 = value; }
+
+  //! Resets user data to zero / nullptr.
+  inline void resetUserData() noexcept { _userDataU64 = 0; }
+
+  //! Tests whether the node has an associated pass data.
+  inline bool hasPassData() const noexcept { return _passData != nullptr; }
+  //! Returns the node pass data - data used during processing & transformations.
+  template<typename T>
+  inline T* passData() const noexcept { return (T*)_passData; }
+  //! Sets the node pass data to `data`.
+  template<typename T>
+  inline void setPassData(T* data) noexcept { _passData = (void*)data; }
+  //! Resets the node pass data to nullptr.
+  inline void resetPassData() noexcept { _passData = nullptr; }
+
+  //! Tests whether the node has an inline comment/annotation.
+  inline bool hasInlineComment() const noexcept { return _inlineComment != nullptr; }
+  //! Returns an inline comment/annotation string.
+  inline const char* inlineComment() const noexcept { return _inlineComment; }
+  //! Sets an inline comment/annotation string to `s`.
+  inline void setInlineComment(const char* s) noexcept { _inlineComment = s; }
+  //! Resets an inline comment/annotation string to nullptr.
+  inline void resetInlineComment() noexcept { _inlineComment = nullptr; }
+
+  //! \}
+};
+
+//! Instruction node.
+//!
+//! Wraps an instruction with its options and operands.
+class InstNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(InstNode)
+
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    //! Count of embedded operands per `InstNode` that are always allocated as a part of the instruction. Minimum
+    //! embedded operands is 4, but in 32-bit more pointers are smaller and we can embed 5. The rest (up to 6 operands)
+    //! is always stored in `InstExNode`.
+    kBaseOpCapacity = uint32_t((128 - sizeof(BaseNode) - sizeof(BaseInst)) / sizeof(Operand_))
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Base instruction data.
+  BaseInst _baseInst;
+  //! First 4 or 5 operands (indexed from 0).
+  Operand_ _opArray[kBaseOpCapacity];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `InstNode` instance.
+  inline InstNode(BaseBuilder* cb, InstId instId, InstOptions options, uint32_t opCount, uint32_t opCapacity = kBaseOpCapacity) noexcept
+    : BaseNode(cb, NodeType::kInst, NodeFlags::kIsCode | NodeFlags::kIsRemovable | NodeFlags::kActsAsInst),
+      _baseInst(instId, options) {
+    _inst._opCapacity = uint8_t(opCapacity);
+    _inst._opCount = uint8_t(opCount);
+  }
+
+  //! \cond INTERNAL
+  //! Reset all built-in operands, including `extraReg`.
+  inline void _resetOps() noexcept {
+    _baseInst.resetExtraReg();
+    resetOpRange(0, opCapacity());
+  }
+  //! \endcond
+
+  //! \}
+
+  //! \name Instruction Object
+  //! \{
+
+  inline BaseInst& baseInst() noexcept { return _baseInst; }
+  inline const BaseInst& baseInst() const noexcept { return _baseInst; }
+
+  //! \}
+
+  //! \name Instruction Id
+  //! \{
+
+  //! Returns the instruction id, see `BaseInst::Id`.
+  inline InstId id() const noexcept { return _baseInst.id(); }
+  //! Returns the instruction real id, see `BaseInst::Id`.
+  inline InstId realId() const noexcept { return _baseInst.realId(); }
+
+  //! Sets the instruction id to `id`, see `BaseInst::Id`.
+  inline void setId(InstId id) noexcept { _baseInst.setId(id); }
+
+  //! \}
+
+  //! \name Instruction Options
+  //! \{
+
+  inline InstOptions options() const noexcept { return _baseInst.options(); }
+  inline bool hasOption(InstOptions option) const noexcept { return _baseInst.hasOption(option); }
+  inline void setOptions(InstOptions options) noexcept { _baseInst.setOptions(options); }
+  inline void addOptions(InstOptions options) noexcept { _baseInst.addOptions(options); }
+  inline void clearOptions(InstOptions options) noexcept { _baseInst.clearOptions(options); }
+  inline void resetOptions() noexcept { _baseInst.resetOptions(); }
+
+  //! \}
+
+  //! \name Extra Register
+  //! \{
+
+  //! Tests whether the node has an extra register operand.
+  inline bool hasExtraReg() const noexcept { return _baseInst.hasExtraReg(); }
+  //! Returns extra register operand.
+  inline RegOnly& extraReg() noexcept { return _baseInst.extraReg(); }
+  //! \overload
+  inline const RegOnly& extraReg() const noexcept { return _baseInst.extraReg(); }
+  //! Sets extra register operand to `reg`.
+  inline void setExtraReg(const BaseReg& reg) noexcept { _baseInst.setExtraReg(reg); }
+  //! Sets extra register operand to `reg`.
+  inline void setExtraReg(const RegOnly& reg) noexcept { _baseInst.setExtraReg(reg); }
+  //! Resets extra register operand.
+  inline void resetExtraReg() noexcept { _baseInst.resetExtraReg(); }
+
+  //! \}
+
+  //! \name Instruction Operands
+  //! \{
+
+  //! Returns operand count.
+  inline uint32_t opCount() const noexcept { return _inst._opCount; }
+  //! Returns operand capacity.
+  inline uint32_t opCapacity() const noexcept { return _inst._opCapacity; }
+
+  //! Sets operand count.
+  inline void setOpCount(uint32_t opCount) noexcept { _inst._opCount = uint8_t(opCount); }
+
+  //! Returns operands array.
+  inline Operand* operands() noexcept { return (Operand*)_opArray; }
+  //! Returns operands array (const).
+  inline const Operand* operands() const noexcept { return (const Operand*)_opArray; }
+
+  //! Returns operand at the given `index`.
+  inline Operand& op(uint32_t index) noexcept {
+    ASMJIT_ASSERT(index < opCapacity());
+    return _opArray[index].as<Operand>();
+  }
+
+  //! Returns operand at the given `index` (const).
+  inline const Operand& op(uint32_t index) const noexcept {
+    ASMJIT_ASSERT(index < opCapacity());
+    return _opArray[index].as<Operand>();
+  }
+
+  //! Sets operand at the given `index` to `op`.
+  inline void setOp(uint32_t index, const Operand_& op) noexcept {
+    ASMJIT_ASSERT(index < opCapacity());
+    _opArray[index].copyFrom(op);
+  }
+
+  //! Resets operand at the given `index` to none.
+  inline void resetOp(uint32_t index) noexcept {
+    ASMJIT_ASSERT(index < opCapacity());
+    _opArray[index].reset();
+  }
+
+  //! Resets operands at `[start, end)` range.
+  inline void resetOpRange(uint32_t start, uint32_t end) noexcept {
+    for (uint32_t i = start; i < end; i++)
+      _opArray[i].reset();
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline bool hasOpType(OperandType opType) const noexcept {
+    for (uint32_t i = 0, count = opCount(); i < count; i++)
+      if (_opArray[i].opType() == opType)
+        return true;
+    return false;
+  }
+
+  inline bool hasRegOp() const noexcept { return hasOpType(OperandType::kReg); }
+  inline bool hasMemOp() const noexcept { return hasOpType(OperandType::kMem); }
+  inline bool hasImmOp() const noexcept { return hasOpType(OperandType::kImm); }
+  inline bool hasLabelOp() const noexcept { return hasOpType(OperandType::kLabel); }
+
+  inline uint32_t indexOfOpType(OperandType opType) const noexcept {
+    uint32_t i = 0;
+    uint32_t count = opCount();
+
+    while (i < count) {
+      if (_opArray[i].opType() == opType)
+        break;
+      i++;
+    }
+
+    return i;
+  }
+
+  inline uint32_t indexOfMemOp() const noexcept { return indexOfOpType(OperandType::kMem); }
+  inline uint32_t indexOfImmOp() const noexcept { return indexOfOpType(OperandType::kImm); }
+  inline uint32_t indexOfLabelOp() const noexcept { return indexOfOpType(OperandType::kLabel); }
+
+  //! \}
+
+  //! \name Rewriting
+  //! \{
+
+  //! \cond INTERNAL
+  inline uint32_t* _getRewriteArray() noexcept { return &_baseInst._extraReg._id; }
+  inline const uint32_t* _getRewriteArray() const noexcept { return &_baseInst._extraReg._id; }
+
+  inline uint32_t getRewriteIndex(const uint32_t* id) const noexcept {
+    const uint32_t* array = _getRewriteArray();
+    ASMJIT_ASSERT(array <= id);
+
+    size_t index = (size_t)(id - array);
+    ASMJIT_ASSERT(index < 32);
+
+    return uint32_t(index);
+  }
+
+  inline void rewriteIdAtIndex(uint32_t index, uint32_t id) noexcept {
+    uint32_t* array = _getRewriteArray();
+    array[index] = id;
+  }
+  //! \endcond
+
+  //! \}
+
+  //! \name Static Functions
+  //! \{
+
+  //! \cond INTERNAL
+  static inline uint32_t capacityOfOpCount(uint32_t opCount) noexcept {
+    return opCount <= kBaseOpCapacity ? kBaseOpCapacity : Globals::kMaxOpCount;
+  }
+
+  static inline size_t nodeSizeOfOpCapacity(uint32_t opCapacity) noexcept {
+    size_t base = sizeof(InstNode) - kBaseOpCapacity * sizeof(Operand);
+    return base + opCapacity * sizeof(Operand);
+  }
+  //! \endcond
+
+  //! \}
+};
+
+//! Instruction node with maximum number of operands.
+//!
+//! This node is created automatically by Builder/Compiler in case that the required number of operands exceeds
+//! the default capacity of `InstNode`.
+class InstExNode : public InstNode {
+public:
+  ASMJIT_NONCOPYABLE(InstExNode)
+
+  //! \name Members
+  //! \{
+
+  //! Continued `_opArray[]` to hold up to `kMaxOpCount` operands.
+  Operand_ _opArrayEx[Globals::kMaxOpCount - kBaseOpCapacity];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `InstExNode` instance.
+  inline InstExNode(BaseBuilder* cb, InstId instId, InstOptions options, uint32_t opCapacity = Globals::kMaxOpCount) noexcept
+    : InstNode(cb, instId, options, opCapacity) {}
+
+  //! \}
+};
+
+//! Section node.
+class SectionNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(SectionNode)
+
+  //! \name Members
+  //! \{
+
+  //! Section id.
+  uint32_t _id;
+
+  //! Next section node that follows this section.
+  //!
+  //! This link is only valid when the section is active (is part of the code) and when `Builder::hasDirtySectionLinks()`
+  //! returns `false`. If you intend to use this field you should always call `Builder::updateSectionLinks()` before you
+  //! do so.
+  SectionNode* _nextSection;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `SectionNode` instance.
+  inline SectionNode(BaseBuilder* cb, uint32_t secionId = 0) noexcept
+    : BaseNode(cb, NodeType::kSection, NodeFlags::kHasNoEffect),
+      _id(secionId),
+      _nextSection(nullptr) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the section id.
+  inline uint32_t id() const noexcept { return _id; }
+
+  //! \}
+};
+
+//! Label node.
+class LabelNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(LabelNode)
+
+  //! \name Members
+  //! \{
+
+  //! Label identifier.
+  uint32_t _labelId;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `LabelNode` instance.
+  inline LabelNode(BaseBuilder* cb, uint32_t labelId = 0) noexcept
+    : BaseNode(cb, NodeType::kLabel, NodeFlags::kHasNoEffect | NodeFlags::kActsAsLabel),
+      _labelId(labelId) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns \ref Label representation of the \ref LabelNode.
+  inline Label label() const noexcept { return Label(_labelId); }
+  //! Returns the id of the label.
+  inline uint32_t labelId() const noexcept { return _labelId; }
+
+  //! \}
+};
+
+//! Align directive (BaseBuilder).
+//!
+//! Wraps `.align` directive.
+class AlignNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(AlignNode)
+
+  //! \name Members
+  //! \{
+
+  //! Alignment (in bytes).
+  uint32_t _alignment;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `AlignNode` instance.
+  inline AlignNode(BaseBuilder* cb, AlignMode alignMode, uint32_t alignment) noexcept
+    : BaseNode(cb, NodeType::kAlign, NodeFlags::kIsCode | NodeFlags::kHasNoEffect) {
+
+    _alignData._alignMode = alignMode;
+    _alignment = alignment;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns align mode.
+  inline AlignMode alignMode() const noexcept { return _alignData._alignMode; }
+  //! Sets align mode to `alignMode`.
+  inline void setAlignMode(AlignMode alignMode) noexcept { _alignData._alignMode = alignMode; }
+
+  //! Returns align offset in bytes.
+  inline uint32_t alignment() const noexcept { return _alignment; }
+  //! Sets align offset in bytes to `offset`.
+  inline void setAlignment(uint32_t alignment) noexcept { _alignment = alignment; }
+
+  //! \}
+};
+
+//! Embed data node.
+//!
+//! Wraps `.data` directive. The node contains data that will be placed at the node's position in the assembler
+//! stream. The data is considered to be RAW; no analysis nor byte-order conversion is performed on RAW data.
+class EmbedDataNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(EmbedDataNode)
+
+  //! \cond INTERNAL
+  enum : uint32_t {
+    kInlineBufferSize = 128 - (sizeof(BaseNode) + sizeof(size_t) * 2)
+  };
+  //! \endcond
+
+  //! \name Members
+  //! \{
+
+  size_t _itemCount;
+  size_t _repeatCount;
+
+  union {
+    uint8_t* _externalData;
+    uint8_t _inlineData[kInlineBufferSize];
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `EmbedDataNode` instance.
+  inline EmbedDataNode(BaseBuilder* cb) noexcept
+    : BaseNode(cb, NodeType::kEmbedData, NodeFlags::kIsData),
+      _itemCount(0),
+      _repeatCount(0) {
+    _embed._typeId = TypeId::kUInt8;
+    _embed._typeSize = uint8_t(1);
+    memset(_inlineData, 0, kInlineBufferSize);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns data type as \ref TypeId.
+  inline TypeId typeId() const noexcept { return _embed._typeId; }
+  //! Returns the size of a single data element.
+  inline uint32_t typeSize() const noexcept { return _embed._typeSize; }
+
+  //! Returns a pointer to the data casted to `uint8_t`.
+  inline uint8_t* data() const noexcept {
+    return dataSize() <= kInlineBufferSize ? const_cast<uint8_t*>(_inlineData) : _externalData;
+  }
+
+  //! Returns a pointer to the data casted to `T`.
+  template<typename T>
+  inline T* dataAs() const noexcept { return reinterpret_cast<T*>(data()); }
+
+  //! Returns the number of (typed) items in the array.
+  inline size_t itemCount() const noexcept { return _itemCount; }
+
+  //! Returns how many times the data is repeated (default 1).
+  //!
+  //! Repeated data is useful when defining constants for SIMD, for example.
+  inline size_t repeatCount() const noexcept { return _repeatCount; }
+
+  //! Returns the size of the data, not considering the number of times it repeats.
+  //!
+  //! \note The returned value is the same as `typeSize() * itemCount()`.
+  inline size_t dataSize() const noexcept { return typeSize() * _itemCount; }
+
+  //! \}
+};
+
+//! Label data node.
+class EmbedLabelNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(EmbedLabelNode)
+
+  //! \name Members
+  //! \{
+
+  uint32_t _labelId;
+  uint32_t _dataSize;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `EmbedLabelNode` instance.
+  inline EmbedLabelNode(BaseBuilder* cb, uint32_t labelId = 0, uint32_t dataSize = 0) noexcept
+    : BaseNode(cb, NodeType::kEmbedLabel, NodeFlags::kIsData),
+      _labelId(labelId),
+      _dataSize(dataSize) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the label to embed as \ref Label operand.
+  inline Label label() const noexcept { return Label(_labelId); }
+  //! Returns the id of the label.
+  inline uint32_t labelId() const noexcept { return _labelId; }
+
+  //! Sets the label id from `label` operand.
+  inline void setLabel(const Label& label) noexcept { setLabelId(label.id()); }
+  //! Sets the label id (use with caution, improper use can break a lot of things).
+  inline void setLabelId(uint32_t labelId) noexcept { _labelId = labelId; }
+
+  //! Returns the data size.
+  inline uint32_t dataSize() const noexcept { return _dataSize; }
+  //! Sets the data size.
+  inline void setDataSize(uint32_t dataSize) noexcept { _dataSize = dataSize; }
+
+  //! \}
+};
+
+//! Label data node.
+class EmbedLabelDeltaNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(EmbedLabelDeltaNode)
+
+  //! \name Members
+  //! \{
+
+  uint32_t _labelId;
+  uint32_t _baseLabelId;
+  uint32_t _dataSize;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `EmbedLabelDeltaNode` instance.
+  inline EmbedLabelDeltaNode(BaseBuilder* cb, uint32_t labelId = 0, uint32_t baseLabelId = 0, uint32_t dataSize = 0) noexcept
+    : BaseNode(cb, NodeType::kEmbedLabelDelta, NodeFlags::kIsData),
+      _labelId(labelId),
+      _baseLabelId(baseLabelId),
+      _dataSize(dataSize) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the label as `Label` operand.
+  inline Label label() const noexcept { return Label(_labelId); }
+  //! Returns the id of the label.
+  inline uint32_t labelId() const noexcept { return _labelId; }
+
+  //! Sets the label id from `label` operand.
+  inline void setLabel(const Label& label) noexcept { setLabelId(label.id()); }
+  //! Sets the label id.
+  inline void setLabelId(uint32_t labelId) noexcept { _labelId = labelId; }
+
+  //! Returns the base label as `Label` operand.
+  inline Label baseLabel() const noexcept { return Label(_baseLabelId); }
+  //! Returns the id of the base label.
+  inline uint32_t baseLabelId() const noexcept { return _baseLabelId; }
+
+  //! Sets the base label id from `label` operand.
+  inline void setBaseLabel(const Label& baseLabel) noexcept { setBaseLabelId(baseLabel.id()); }
+  //! Sets the base label id.
+  inline void setBaseLabelId(uint32_t baseLabelId) noexcept { _baseLabelId = baseLabelId; }
+
+  //! Returns the size of the embedded label address.
+  inline uint32_t dataSize() const noexcept { return _dataSize; }
+  //! Sets the size of the embedded label address.
+  inline void setDataSize(uint32_t dataSize) noexcept { _dataSize = dataSize; }
+
+  //! \}
+};
+
+//! A node that wraps `ConstPool`.
+class ConstPoolNode : public LabelNode {
+public:
+  ASMJIT_NONCOPYABLE(ConstPoolNode)
+
+  //! \name Members
+  //! \{
+
+  ConstPool _constPool;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `ConstPoolNode` instance.
+  inline ConstPoolNode(BaseBuilder* cb, uint32_t id = 0) noexcept
+    : LabelNode(cb, id),
+      _constPool(&cb->_codeZone) {
+
+    setType(NodeType::kConstPool);
+    addFlags(NodeFlags::kIsData);
+    clearFlags(NodeFlags::kIsCode | NodeFlags::kHasNoEffect);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the constant-pool is empty.
+  inline bool empty() const noexcept { return _constPool.empty(); }
+  //! Returns the size of the constant-pool in bytes.
+  inline size_t size() const noexcept { return _constPool.size(); }
+  //! Returns minimum alignment.
+  inline size_t alignment() const noexcept { return _constPool.alignment(); }
+
+  //! Returns the wrapped `ConstPool` instance.
+  inline ConstPool& constPool() noexcept { return _constPool; }
+  //! Returns the wrapped `ConstPool` instance (const).
+  inline const ConstPool& constPool() const noexcept { return _constPool; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! See `ConstPool::add()`.
+  inline Error add(const void* data, size_t size, size_t& dstOffset) noexcept {
+    return _constPool.add(data, size, dstOffset);
+  }
+
+  //! \}
+};
+
+//! Comment node.
+class CommentNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(CommentNode)
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `CommentNode` instance.
+  inline CommentNode(BaseBuilder* cb, const char* comment) noexcept
+    : BaseNode(cb, NodeType::kComment, NodeFlags::kIsInformative | NodeFlags::kHasNoEffect | NodeFlags::kIsRemovable) {
+    _inlineComment = comment;
+  }
+
+  //! \}
+};
+
+//! Sentinel node.
+//!
+//! Sentinel is a marker that is completely ignored by the code builder. It's used to remember a position in a code
+//! as it never gets removed by any pass.
+class SentinelNode : public BaseNode {
+public:
+  ASMJIT_NONCOPYABLE(SentinelNode)
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `SentinelNode` instance.
+  inline SentinelNode(BaseBuilder* cb, SentinelType sentinelType = SentinelType::kUnknown) noexcept
+    : BaseNode(cb, NodeType::kSentinel, NodeFlags::kIsInformative | NodeFlags::kHasNoEffect) {
+
+    _sentinel._sentinelType = sentinelType;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the type of the sentinel.
+  inline SentinelType sentinelType() const noexcept {
+    return _sentinel._sentinelType;
+  }
+
+  //! Sets the type of the sentinel.
+  inline void setSentinelType(SentinelType type) noexcept {
+    _sentinel._sentinelType = type;
+  }
+
+  //! \}
+};
+
+//! Pass can be used to implement code transformations, analysis, and lowering.
+class ASMJIT_VIRTAPI Pass {
+public:
+  ASMJIT_BASE_CLASS(Pass)
+  ASMJIT_NONCOPYABLE(Pass)
+
+  //! \name Members
+  //! \{
+
+  //! BaseBuilder this pass is assigned to.
+  BaseBuilder* _cb = nullptr;
+  //! Name of the pass.
+  const char* _name = nullptr;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API Pass(const char* name) noexcept;
+  ASMJIT_API virtual ~Pass() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns \ref BaseBuilder associated with the pass.
+  inline const BaseBuilder* cb() const noexcept { return _cb; }
+  //! Returns the name of the pass.
+  inline const char* name() const noexcept { return _name; }
+
+  //! \}
+
+  //! \name Pass Interface
+  //! \{
+
+  //! Processes the code stored in Builder or Compiler.
+  //!
+  //! This is the only function that is called by the `BaseBuilder` to process the code. It passes `zone`,
+  //! which will be reset after the `run()` finishes.
+  virtual Error run(Zone* zone, Logger* logger) = 0;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_BUILDER
+#endif // ASMJIT_CORE_BUILDER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/codebuffer.h b/lib/lepton/asmjit/core/codebuffer.h
new file mode 100644
index 0000000000..4946e7a06a
--- /dev/null
+++ b/lib/lepton/asmjit/core/codebuffer.h
@@ -0,0 +1,113 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_CODEBUFFER_H_INCLUDED
+#define ASMJIT_CORE_CODEBUFFER_H_INCLUDED
+
+#include "../core/globals.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! Flags used by \ref CodeBuffer.
+enum class CodeBufferFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+  //! Buffer is external (not allocated by asmjit).
+  kIsExternal = 0x00000001u,
+  //! Buffer is fixed (cannot be reallocated).
+  kIsFixed = 0x00000002u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(CodeBufferFlags)
+
+//! Code or data buffer.
+struct CodeBuffer {
+  //! \name Members
+  //! \{
+
+  //! The content of the buffer (data).
+  uint8_t* _data;
+  //! Number of bytes of `data` used.
+  size_t _size;
+  //! Buffer capacity (in bytes).
+  size_t _capacity;
+  //! Buffer flags.
+  CodeBufferFlags _flags;
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  //! Returns a referebce to the byte at the given `index`.
+  inline uint8_t& operator[](size_t index) noexcept {
+    ASMJIT_ASSERT(index < _size);
+    return _data[index];
+  }
+  //! \overload
+  inline const uint8_t& operator[](size_t index) const noexcept {
+    ASMJIT_ASSERT(index < _size);
+    return _data[index];
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns code buffer flags.
+  inline CodeBufferFlags flags() const noexcept { return _flags; }
+  //! Tests whether the code buffer has the given `flag` set.
+  inline bool hasFlag(CodeBufferFlags flag) const noexcept { return Support::test(_flags, flag); }
+
+  //! Tests whether this code buffer has a fixed size.
+  //!
+  //! Fixed size means that the code buffer is fixed and cannot grow.
+  inline bool isFixed() const noexcept { return hasFlag(CodeBufferFlags::kIsFixed); }
+
+  //! Tests whether the data in this code buffer is external.
+  //!
+  //! External data can only be provided by users, it's never used by AsmJit.
+  inline bool isExternal() const noexcept { return hasFlag(CodeBufferFlags::kIsExternal); }
+
+  //! Tests whether the data in this code buffer is allocated (non-null).
+  inline bool isAllocated() const noexcept { return _data != nullptr; }
+
+  //! Tests whether the code buffer is empty.
+  inline bool empty() const noexcept { return !_size; }
+
+  //! Returns the size of the data.
+  inline size_t size() const noexcept { return _size; }
+  //! Returns the capacity of the data.
+  inline size_t capacity() const noexcept { return _capacity; }
+
+  //! Returns the pointer to the data the buffer references.
+  inline uint8_t* data() noexcept { return _data; }
+  //! \overload
+  inline const uint8_t* data() const noexcept { return _data; }
+
+  //! \}
+
+  //! \name Iterators
+  //! \{
+
+  inline uint8_t* begin() noexcept { return _data; }
+  inline const uint8_t* begin() const noexcept { return _data; }
+
+  inline uint8_t* end() noexcept { return _data + _size; }
+  inline const uint8_t* end() const noexcept { return _data + _size; }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_CODEBUFFER_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/core/codeholder.cpp b/lib/lepton/asmjit/core/codeholder.cpp
new file mode 100644
index 0000000000..cf763cfff1
--- /dev/null
+++ b/lib/lepton/asmjit/core/codeholder.cpp
@@ -0,0 +1,1149 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/assembler.h"
+#include "../core/codewriter_p.h"
+#include "../core/logger.h"
+#include "../core/support.h"
+
+#include <algorithm>
+#include <tuple>
+
+ASMJIT_BEGIN_NAMESPACE
+
+// Globals
+// =======
+
+static const char CodeHolder_addrTabName[] = ".addrtab";
+
+//! Encode MOD byte.
+static inline uint32_t x86EncodeMod(uint32_t m, uint32_t o, uint32_t rm) noexcept {
+  return (m << 6) | (o << 3) | rm;
+}
+
+// LabelLinkIterator
+// =================
+
+class LabelLinkIterator {
+public:
+  inline LabelLinkIterator(LabelEntry* le) noexcept { reset(le); }
+
+  inline explicit operator bool() const noexcept { return isValid(); }
+  inline bool isValid() const noexcept { return _link != nullptr; }
+
+  inline LabelLink* link() const noexcept { return _link; }
+  inline LabelLink* operator->() const noexcept { return _link; }
+
+  inline void reset(LabelEntry* le) noexcept {
+    _pPrev = &le->_links;
+    _link = *_pPrev;
+  }
+
+  inline void next() noexcept {
+    _pPrev = &_link->next;
+    _link = *_pPrev;
+  }
+
+  inline void resolveAndNext(CodeHolder* code) noexcept {
+    LabelLink* linkToDelete = _link;
+
+    _link = _link->next;
+    *_pPrev = _link;
+
+    code->_unresolvedLinkCount--;
+    code->_allocator.release(linkToDelete, sizeof(LabelLink));
+  }
+
+  LabelLink** _pPrev;
+  LabelLink* _link;
+};
+
+// CodeHolder - Utilities
+// ======================
+
+static void CodeHolder_resetInternal(CodeHolder* self, ResetPolicy resetPolicy) noexcept {
+  uint32_t i;
+  const ZoneVector<BaseEmitter*>& emitters = self->emitters();
+
+  i = emitters.size();
+  while (i)
+    self->detach(emitters[--i]);
+
+  // Reset everything into its construction state.
+  self->_environment.reset();
+  self->_baseAddress = Globals::kNoBaseAddress;
+  self->_logger = nullptr;
+  self->_errorHandler = nullptr;
+
+  // Reset all sections.
+  uint32_t numSections = self->_sections.size();
+  for (i = 0; i < numSections; i++) {
+    Section* section = self->_sections[i];
+    if (section->_buffer.data() && !section->_buffer.isExternal())
+      ::free(section->_buffer._data);
+    section->_buffer._data = nullptr;
+    section->_buffer._capacity = 0;
+  }
+
+  // Reset zone allocator and all containers using it.
+  ZoneAllocator* allocator = self->allocator();
+
+  self->_emitters.reset();
+  self->_namedLabels.reset();
+  self->_relocations.reset();
+  self->_labelEntries.reset();
+  self->_sections.reset();
+  self->_sectionsByOrder.reset();
+
+  self->_unresolvedLinkCount = 0;
+  self->_addressTableSection = nullptr;
+  self->_addressTableEntries.reset();
+
+  allocator->reset(&self->_zone);
+  self->_zone.reset(resetPolicy);
+}
+
+static void CodeHolder_onSettingsUpdated(CodeHolder* self) noexcept {
+  // Notify all attached emitters about a settings update.
+  for (BaseEmitter* emitter : self->emitters()) {
+    emitter->onSettingsUpdated();
+  }
+}
+
+// CodeHolder - Construction & Destruction
+// =======================================
+
+CodeHolder::CodeHolder(const Support::Temporary* temporary) noexcept
+  : _environment(),
+    _baseAddress(Globals::kNoBaseAddress),
+    _logger(nullptr),
+    _errorHandler(nullptr),
+    _zone(16384 - Zone::kBlockOverhead, 1, temporary),
+    _allocator(&_zone),
+    _unresolvedLinkCount(0),
+    _addressTableSection(nullptr) {}
+
+CodeHolder::~CodeHolder() noexcept {
+  CodeHolder_resetInternal(this, ResetPolicy::kHard);
+}
+
+// CodeHolder - Init & Reset
+// =========================
+
+inline void CodeHolder_setSectionDefaultName(
+  Section* section,
+  char c0 = 0, char c1 = 0, char c2 = 0, char c3 = 0,
+  char c4 = 0, char c5 = 0, char c6 = 0, char c7 = 0) noexcept {
+
+  section->_name.u32[0] = Support::bytepack32_4x8(uint8_t(c0), uint8_t(c1), uint8_t(c2), uint8_t(c3));
+  section->_name.u32[1] = Support::bytepack32_4x8(uint8_t(c4), uint8_t(c5), uint8_t(c6), uint8_t(c7));
+}
+
+Error CodeHolder::init(const Environment& environment, uint64_t baseAddress) noexcept {
+  // Cannot reinitialize if it's locked or there is one or more emitter attached.
+  if (isInitialized())
+    return DebugUtils::errored(kErrorAlreadyInitialized);
+
+  // If we are just initializing there should be no emitters attached.
+  ASMJIT_ASSERT(_emitters.empty());
+
+  // Create a default section and insert it to the `_sections` array.
+  Error err = _sections.willGrow(&_allocator) |
+              _sectionsByOrder.willGrow(&_allocator);
+  if (err == kErrorOk) {
+    Section* section = _allocator.allocZeroedT<Section>();
+    if (ASMJIT_LIKELY(section)) {
+      section->_flags = SectionFlags::kExecutable | SectionFlags::kReadOnly;
+      CodeHolder_setSectionDefaultName(section, '.', 't', 'e', 'x', 't');
+      _sections.appendUnsafe(section);
+      _sectionsByOrder.appendUnsafe(section);
+    }
+    else {
+      err = DebugUtils::errored(kErrorOutOfMemory);
+    }
+  }
+
+  if (ASMJIT_UNLIKELY(err)) {
+    _zone.reset();
+    return err;
+  }
+  else {
+    _environment = environment;
+    _baseAddress = baseAddress;
+    return kErrorOk;
+  }
+}
+
+void CodeHolder::reset(ResetPolicy resetPolicy) noexcept {
+  CodeHolder_resetInternal(this, resetPolicy);
+}
+
+// CodeHolder - Attach / Detach
+// ============================
+
+Error CodeHolder::attach(BaseEmitter* emitter) noexcept {
+  // Catch a possible misuse of the API.
+  if (ASMJIT_UNLIKELY(!emitter))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  // Invalid emitter, this should not be possible.
+  EmitterType type = emitter->emitterType();
+  if (ASMJIT_UNLIKELY(type == EmitterType::kNone || uint32_t(type) > uint32_t(EmitterType::kMaxValue)))
+    return DebugUtils::errored(kErrorInvalidState);
+
+  uint64_t archMask = emitter->_archMask;
+  if (ASMJIT_UNLIKELY(!(archMask & (uint64_t(1) << uint32_t(arch())))))
+    return DebugUtils::errored(kErrorInvalidArch);
+
+  // This is suspicious, but don't fail if `emitter` is already attached
+  // to this code holder. This is not error, but it's not recommended.
+  if (emitter->_code != nullptr) {
+    if (emitter->_code == this)
+      return kErrorOk;
+    return DebugUtils::errored(kErrorInvalidState);
+  }
+
+  // Reserve the space now as we cannot fail after `onAttach()` succeeded.
+  ASMJIT_PROPAGATE(_emitters.willGrow(&_allocator, 1));
+  ASMJIT_PROPAGATE(emitter->onAttach(this));
+
+  // Connect CodeHolder <-> BaseEmitter.
+  ASMJIT_ASSERT(emitter->_code == this);
+  _emitters.appendUnsafe(emitter);
+
+  return kErrorOk;
+}
+
+Error CodeHolder::detach(BaseEmitter* emitter) noexcept {
+  if (ASMJIT_UNLIKELY(!emitter))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  if (ASMJIT_UNLIKELY(emitter->_code != this))
+    return DebugUtils::errored(kErrorInvalidState);
+
+  // NOTE: We always detach if we were asked to, if error happens during
+  // `emitter->onDetach()` we just propagate it, but the BaseEmitter will
+  // be detached.
+  Error err = kErrorOk;
+  if (!emitter->isDestroyed())
+    err = emitter->onDetach(this);
+
+  // Disconnect CodeHolder <-> BaseEmitter.
+  uint32_t index = _emitters.indexOf(emitter);
+  ASMJIT_ASSERT(index != Globals::kNotFound);
+
+  _emitters.removeAt(index);
+  emitter->_code = nullptr;
+
+  return err;
+}
+
+// CodeHolder - Logging
+// ====================
+
+void CodeHolder::setLogger(Logger* logger) noexcept {
+#ifndef ASMJIT_NO_LOGGING
+  _logger = logger;
+  CodeHolder_onSettingsUpdated(this);
+#else
+  DebugUtils::unused(logger);
+#endif
+}
+
+// CodeHolder - Error Handling
+// ===========================
+
+void CodeHolder::setErrorHandler(ErrorHandler* errorHandler) noexcept {
+  _errorHandler = errorHandler;
+  CodeHolder_onSettingsUpdated(this);
+}
+
+// CodeHolder - Code Buffer
+// ========================
+
+static Error CodeHolder_reserveInternal(CodeHolder* self, CodeBuffer* cb, size_t n) noexcept {
+  uint8_t* oldData = cb->_data;
+  uint8_t* newData;
+
+  if (oldData && !cb->isExternal())
+    newData = static_cast<uint8_t*>(::realloc(oldData, n));
+  else
+    newData = static_cast<uint8_t*>(::malloc(n));
+
+  if (ASMJIT_UNLIKELY(!newData))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  cb->_data = newData;
+  cb->_capacity = n;
+
+  // Update pointers used by assemblers, if attached.
+  for (BaseEmitter* emitter : self->emitters()) {
+    if (emitter->isAssembler()) {
+      BaseAssembler* a = static_cast<BaseAssembler*>(emitter);
+      if (&a->_section->_buffer == cb) {
+        size_t offset = a->offset();
+
+        a->_bufferData = newData;
+        a->_bufferEnd  = newData + n;
+        a->_bufferPtr  = newData + offset;
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error CodeHolder::growBuffer(CodeBuffer* cb, size_t n) noexcept {
+  // The size of the section must be valid.
+  size_t size = cb->size();
+  if (ASMJIT_UNLIKELY(n > std::numeric_limits<uintptr_t>::max() - size))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  // We can now check if growing the buffer is really necessary. It's unlikely
+  // that this function is called while there is still room for `n` bytes.
+  size_t capacity = cb->capacity();
+  size_t required = cb->size() + n;
+  if (ASMJIT_UNLIKELY(required <= capacity))
+    return kErrorOk;
+
+  if (cb->isFixed())
+    return DebugUtils::errored(kErrorTooLarge);
+
+  size_t kInitialCapacity = 8096;
+  if (capacity < kInitialCapacity)
+    capacity = kInitialCapacity;
+  else
+    capacity += Globals::kAllocOverhead;
+
+  do {
+    size_t old = capacity;
+    if (capacity < Globals::kGrowThreshold)
+      capacity *= 2;
+    else
+      capacity += Globals::kGrowThreshold;
+
+    // Overflow.
+    if (ASMJIT_UNLIKELY(old > capacity))
+      return DebugUtils::errored(kErrorOutOfMemory);
+  } while (capacity - Globals::kAllocOverhead < required);
+
+  return CodeHolder_reserveInternal(this, cb, capacity - Globals::kAllocOverhead);
+}
+
+Error CodeHolder::reserveBuffer(CodeBuffer* cb, size_t n) noexcept {
+  size_t capacity = cb->capacity();
+
+  if (n <= capacity)
+    return kErrorOk;
+
+  if (cb->isFixed())
+    return DebugUtils::errored(kErrorTooLarge);
+
+  return CodeHolder_reserveInternal(this, cb, n);
+}
+
+// CodeHolder - Sections
+// =====================
+
+Error CodeHolder::newSection(Section** sectionOut, const char* name, size_t nameSize, SectionFlags flags, uint32_t alignment, int32_t order) noexcept {
+  *sectionOut = nullptr;
+
+  if (nameSize == SIZE_MAX)
+    nameSize = strlen(name);
+
+  if (alignment == 0)
+    alignment = 1;
+
+  if (ASMJIT_UNLIKELY(!Support::isPowerOf2(alignment)))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  if (ASMJIT_UNLIKELY(nameSize > Globals::kMaxSectionNameSize))
+    return DebugUtils::errored(kErrorInvalidSectionName);
+
+  uint32_t sectionId = _sections.size();
+  if (ASMJIT_UNLIKELY(sectionId == Globals::kInvalidId))
+    return DebugUtils::errored(kErrorTooManySections);
+
+  ASMJIT_PROPAGATE(_sections.willGrow(&_allocator));
+  ASMJIT_PROPAGATE(_sectionsByOrder.willGrow(&_allocator));
+
+  Section* section = _allocator.allocZeroedT<Section>();
+  if (ASMJIT_UNLIKELY(!section))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  section->_id = sectionId;
+  section->_flags = flags;
+  section->_alignment = alignment;
+  section->_order = order;
+  memcpy(section->_name.str, name, nameSize);
+
+  Section** insertPosition = std::lower_bound(_sectionsByOrder.begin(), _sectionsByOrder.end(), section, [](const Section* a, const Section* b) {
+    return std::make_tuple(a->order(), a->id()) < std::make_tuple(b->order(), b->id());
+  });
+
+  _sections.appendUnsafe(section);
+  _sectionsByOrder.insertUnsafe((size_t)(insertPosition - _sectionsByOrder.data()), section);
+
+  *sectionOut = section;
+  return kErrorOk;
+}
+
+Section* CodeHolder::sectionByName(const char* name, size_t nameSize) const noexcept {
+  if (nameSize == SIZE_MAX)
+    nameSize = strlen(name);
+
+  // This could be also put in a hash-table similarly like we do with labels,
+  // however it's questionable as the number of sections should be pretty low
+  // in general. Create an issue if this becomes a problem.
+  if (nameSize <= Globals::kMaxSectionNameSize) {
+    for (Section* section : _sections)
+      if (memcmp(section->_name.str, name, nameSize) == 0 && section->_name.str[nameSize] == '\0')
+        return section;
+  }
+
+  return nullptr;
+}
+
+Section* CodeHolder::ensureAddressTableSection() noexcept {
+  if (_addressTableSection)
+    return _addressTableSection;
+
+  newSection(&_addressTableSection,
+             CodeHolder_addrTabName,
+             sizeof(CodeHolder_addrTabName) - 1,
+             SectionFlags::kNone,
+             _environment.registerSize(),
+             std::numeric_limits<int32_t>::max());
+  return _addressTableSection;
+}
+
+Error CodeHolder::addAddressToAddressTable(uint64_t address) noexcept {
+  AddressTableEntry* entry = _addressTableEntries.get(address);
+  if (entry)
+    return kErrorOk;
+
+  Section* section = ensureAddressTableSection();
+  if (ASMJIT_UNLIKELY(!section))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  entry = _zone.newT<AddressTableEntry>(address);
+  if (ASMJIT_UNLIKELY(!entry))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  _addressTableEntries.insert(entry);
+  section->_virtualSize += _environment.registerSize();
+
+  return kErrorOk;
+}
+
+// CodeHolder - Labels & Symbols
+// =============================
+
+//! Only used to lookup a label from `_namedLabels`.
+class LabelByName {
+public:
+  inline LabelByName(const char* key, size_t keySize, uint32_t hashCode, uint32_t parentId) noexcept
+    : _key(key),
+      _keySize(uint32_t(keySize)),
+      _hashCode(hashCode),
+      _parentId(parentId) {}
+
+  inline uint32_t hashCode() const noexcept { return _hashCode; }
+
+  inline bool matches(const LabelEntry* entry) const noexcept {
+    return entry->nameSize() == _keySize &&
+           entry->parentId() == _parentId &&
+           ::memcmp(entry->name(), _key, _keySize) == 0;
+  }
+
+  const char* _key;
+  uint32_t _keySize;
+  uint32_t _hashCode;
+  uint32_t _parentId;
+};
+
+// Returns a hash of `name` and fixes `nameSize` if it's `SIZE_MAX`.
+static uint32_t CodeHolder_hashNameAndGetSize(const char* name, size_t& nameSize) noexcept {
+  uint32_t hashCode = 0;
+  if (nameSize == SIZE_MAX) {
+    size_t i = 0;
+    for (;;) {
+      uint8_t c = uint8_t(name[i]);
+      if (!c) break;
+      hashCode = Support::hashRound(hashCode, c);
+      i++;
+    }
+    nameSize = i;
+  }
+  else {
+    for (size_t i = 0; i < nameSize; i++) {
+      uint8_t c = uint8_t(name[i]);
+      if (ASMJIT_UNLIKELY(!c)) return DebugUtils::errored(kErrorInvalidLabelName);
+      hashCode = Support::hashRound(hashCode, c);
+    }
+  }
+  return hashCode;
+}
+
+LabelLink* CodeHolder::newLabelLink(LabelEntry* le, uint32_t sectionId, size_t offset, intptr_t rel, const OffsetFormat& format) noexcept {
+  LabelLink* link = _allocator.allocT<LabelLink>();
+  if (ASMJIT_UNLIKELY(!link)) return nullptr;
+
+  link->next = le->_links;
+  le->_links = link;
+
+  link->sectionId = sectionId;
+  link->relocId = Globals::kInvalidId;
+  link->offset = offset;
+  link->rel = rel;
+  link->format = format;
+
+  _unresolvedLinkCount++;
+  return link;
+}
+
+Error CodeHolder::newLabelEntry(LabelEntry** entryOut) noexcept {
+  *entryOut = nullptr;
+
+  uint32_t labelId = _labelEntries.size();
+  if (ASMJIT_UNLIKELY(labelId == Globals::kInvalidId))
+    return DebugUtils::errored(kErrorTooManyLabels);
+
+  ASMJIT_PROPAGATE(_labelEntries.willGrow(&_allocator));
+  LabelEntry* le = _allocator.allocZeroedT<LabelEntry>();
+
+  if (ASMJIT_UNLIKELY(!le))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  le->_setId(labelId);
+  le->_parentId = Globals::kInvalidId;
+  le->_offset = 0;
+  _labelEntries.appendUnsafe(le);
+
+  *entryOut = le;
+  return kErrorOk;
+}
+
+Error CodeHolder::newNamedLabelEntry(LabelEntry** entryOut, const char* name, size_t nameSize, LabelType type, uint32_t parentId) noexcept {
+  *entryOut = nullptr;
+  uint32_t hashCode = CodeHolder_hashNameAndGetSize(name, nameSize);
+
+  if (ASMJIT_UNLIKELY(nameSize == 0)) {
+    if (type == LabelType::kAnonymous)
+      return newLabelEntry(entryOut);
+    else
+      return DebugUtils::errored(kErrorInvalidLabelName);
+  }
+
+  if (ASMJIT_UNLIKELY(nameSize > Globals::kMaxLabelNameSize))
+    return DebugUtils::errored(kErrorLabelNameTooLong);
+
+  switch (type) {
+    case LabelType::kAnonymous: {
+      // Anonymous labels cannot have a parent (or more specifically, parent is useless here).
+      if (ASMJIT_UNLIKELY(parentId != Globals::kInvalidId))
+        return DebugUtils::errored(kErrorInvalidParentLabel);
+
+      uint32_t labelId = _labelEntries.size();
+      if (ASMJIT_UNLIKELY(labelId == Globals::kInvalidId))
+        return DebugUtils::errored(kErrorTooManyLabels);
+
+      ASMJIT_PROPAGATE(_labelEntries.willGrow(&_allocator));
+      LabelEntry* le = _allocator.allocZeroedT<LabelEntry>();
+
+      if (ASMJIT_UNLIKELY(!le))
+        return DebugUtils::errored(kErrorOutOfMemory);
+
+      // NOTE: This LabelEntry has a name, but we leave its hashCode as zero as it's anonymous.
+      le->_setId(labelId);
+      le->_parentId = Globals::kInvalidId;
+      le->_offset = 0;
+      ASMJIT_PROPAGATE(le->_name.setData(&_zone, name, nameSize));
+
+      _labelEntries.appendUnsafe(le);
+
+      *entryOut = le;
+      return kErrorOk;
+    }
+
+    case LabelType::kLocal: {
+      if (ASMJIT_UNLIKELY(parentId >= _labelEntries.size()))
+        return DebugUtils::errored(kErrorInvalidParentLabel);
+
+      hashCode ^= parentId;
+      break;
+    }
+
+    case LabelType::kGlobal:
+    case LabelType::kExternal: {
+      if (ASMJIT_UNLIKELY(parentId != Globals::kInvalidId))
+        return DebugUtils::errored(kErrorInvalidParentLabel);
+      break;
+    }
+
+    default: {
+      return DebugUtils::errored(kErrorInvalidArgument);
+    }
+  }
+
+  // Don't allow to insert duplicates. Local labels allow duplicates that have
+  // different id, this is already accomplished by having a different hashes
+  // between the same label names having different parent labels.
+  LabelEntry* le = _namedLabels.get(LabelByName(name, nameSize, hashCode, parentId));
+  if (ASMJIT_UNLIKELY(le))
+    return DebugUtils::errored(kErrorLabelAlreadyDefined);
+
+  Error err = kErrorOk;
+  uint32_t labelId = _labelEntries.size();
+
+  if (ASMJIT_UNLIKELY(labelId == Globals::kInvalidId))
+    return DebugUtils::errored(kErrorTooManyLabels);
+
+  ASMJIT_PROPAGATE(_labelEntries.willGrow(&_allocator));
+  le = _allocator.allocZeroedT<LabelEntry>();
+
+  if (ASMJIT_UNLIKELY(!le))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  le->_hashCode = hashCode;
+  le->_setId(labelId);
+  le->_type = type;
+  le->_parentId = parentId;
+  le->_offset = 0;
+  ASMJIT_PROPAGATE(le->_name.setData(&_zone, name, nameSize));
+
+  _labelEntries.appendUnsafe(le);
+  _namedLabels.insert(allocator(), le);
+
+  *entryOut = le;
+  return err;
+}
+
+uint32_t CodeHolder::labelIdByName(const char* name, size_t nameSize, uint32_t parentId) noexcept {
+  uint32_t hashCode = CodeHolder_hashNameAndGetSize(name, nameSize);
+  if (ASMJIT_UNLIKELY(!nameSize))
+    return 0;
+
+  if (parentId != Globals::kInvalidId)
+    hashCode ^= parentId;
+
+  LabelEntry* le = _namedLabels.get(LabelByName(name, nameSize, hashCode, parentId));
+  return le ? le->id() : uint32_t(Globals::kInvalidId);
+}
+
+ASMJIT_API Error CodeHolder::resolveUnresolvedLinks() noexcept {
+  if (!hasUnresolvedLinks())
+    return kErrorOk;
+
+  Error err = kErrorOk;
+  for (LabelEntry* le : labelEntries()) {
+    if (!le->isBound())
+      continue;
+
+    LabelLinkIterator link(le);
+    if (link) {
+      Support::FastUInt8 of = 0;
+      Section* toSection = le->section();
+      uint64_t toOffset = Support::addOverflow(toSection->offset(), le->offset(), &of);
+
+      do {
+        uint32_t linkSectionId = link->sectionId;
+        if (link->relocId == Globals::kInvalidId) {
+          Section* fromSection = sectionById(linkSectionId);
+          size_t linkOffset = link->offset;
+
+          CodeBuffer& buf = _sections[linkSectionId]->buffer();
+          ASMJIT_ASSERT(linkOffset < buf.size());
+
+          // Calculate the offset relative to the start of the virtual base.
+          Support::FastUInt8 localOF = of;
+          uint64_t fromOffset = Support::addOverflow<uint64_t>(fromSection->offset(), linkOffset, &localOF);
+          int64_t displacement = int64_t(toOffset - fromOffset + uint64_t(int64_t(link->rel)));
+
+          if (!localOF) {
+            ASMJIT_ASSERT(size_t(linkOffset) < buf.size());
+            ASMJIT_ASSERT(buf.size() - size_t(linkOffset) >= link->format.valueSize());
+
+            // Overwrite a real displacement in the CodeBuffer.
+            if (CodeWriterUtils::writeOffset(buf._data + linkOffset, displacement, link->format)) {
+              link.resolveAndNext(this);
+              continue;
+            }
+          }
+
+          err = DebugUtils::errored(kErrorInvalidDisplacement);
+          // Falls through to `link.next()`.
+        }
+
+        link.next();
+      } while (link);
+    }
+  }
+
+  return err;
+}
+
+ASMJIT_API Error CodeHolder::bindLabel(const Label& label, uint32_t toSectionId, uint64_t toOffset) noexcept {
+  LabelEntry* le = labelEntry(label);
+  if (ASMJIT_UNLIKELY(!le))
+    return DebugUtils::errored(kErrorInvalidLabel);
+
+  if (ASMJIT_UNLIKELY(toSectionId > _sections.size()))
+    return DebugUtils::errored(kErrorInvalidSection);
+
+  // Label can be bound only once.
+  if (ASMJIT_UNLIKELY(le->isBound()))
+    return DebugUtils::errored(kErrorLabelAlreadyBound);
+
+  // Bind the label.
+  Section* section = _sections[toSectionId];
+  le->_section = section;
+  le->_offset = toOffset;
+
+  Error err = kErrorOk;
+  CodeBuffer& buf = section->buffer();
+
+  // Fix all links to this label we have collected so far if they are within
+  // the same section. We ignore any inter-section links as these have to be
+  // fixed later.
+  LabelLinkIterator link(le);
+  while (link) {
+    uint32_t linkSectionId = link->sectionId;
+    size_t linkOffset = link->offset;
+
+    uint32_t relocId = link->relocId;
+    if (relocId != Globals::kInvalidId) {
+      // Adjust relocation data only.
+      RelocEntry* re = _relocations[relocId];
+      re->_payload += toOffset;
+      re->_targetSectionId = toSectionId;
+    }
+    else {
+      if (linkSectionId != toSectionId) {
+        link.next();
+        continue;
+      }
+
+      ASMJIT_ASSERT(linkOffset < buf.size());
+      int64_t displacement = int64_t(toOffset - uint64_t(linkOffset) + uint64_t(int64_t(link->rel)));
+
+      // Size of the value we are going to patch. Only BYTE/DWORD is allowed.
+      ASMJIT_ASSERT(buf.size() - size_t(linkOffset) >= link->format.regionSize());
+
+      // Overwrite a real displacement in the CodeBuffer.
+      if (!CodeWriterUtils::writeOffset(buf._data + linkOffset, displacement, link->format)) {
+        err = DebugUtils::errored(kErrorInvalidDisplacement);
+        link.next();
+        continue;
+      }
+    }
+
+    link.resolveAndNext(this);
+  }
+
+  return err;
+}
+
+// CodeHolder - Relocations
+// ========================
+
+Error CodeHolder::newRelocEntry(RelocEntry** dst, RelocType relocType) noexcept {
+  ASMJIT_PROPAGATE(_relocations.willGrow(&_allocator));
+
+  uint32_t relocId = _relocations.size();
+  if (ASMJIT_UNLIKELY(relocId == Globals::kInvalidId))
+    return DebugUtils::errored(kErrorTooManyRelocations);
+
+  RelocEntry* re = _allocator.allocZeroedT<RelocEntry>();
+  if (ASMJIT_UNLIKELY(!re))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  re->_id = relocId;
+  re->_relocType = relocType;
+  re->_sourceSectionId = Globals::kInvalidId;
+  re->_targetSectionId = Globals::kInvalidId;
+  _relocations.appendUnsafe(re);
+
+  *dst = re;
+  return kErrorOk;
+}
+
+// CodeHolder - Expression Evaluation
+// ==================================
+
+static Error CodeHolder_evaluateExpression(CodeHolder* self, Expression* exp, uint64_t* out) noexcept {
+  uint64_t value[2];
+  for (size_t i = 0; i < 2; i++) {
+    uint64_t v;
+    switch (exp->valueType[i]) {
+      case ExpressionValueType::kNone: {
+        v = 0;
+        break;
+      }
+
+      case ExpressionValueType::kConstant: {
+        v = exp->value[i].constant;
+        break;
+      }
+
+      case ExpressionValueType::kLabel: {
+        LabelEntry* le = exp->value[i].label;
+        if (!le->isBound())
+          return DebugUtils::errored(kErrorExpressionLabelNotBound);
+        v = le->section()->offset() + le->offset();
+        break;
+      }
+
+      case ExpressionValueType::kExpression: {
+        Expression* nested = exp->value[i].expression;
+        ASMJIT_PROPAGATE(CodeHolder_evaluateExpression(self, nested, &v));
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidState);
+    }
+
+    value[i] = v;
+  }
+
+  uint64_t result;
+  uint64_t& a = value[0];
+  uint64_t& b = value[1];
+
+  switch (exp->opType) {
+    case ExpressionOpType::kAdd:
+      result = a + b;
+      break;
+
+    case ExpressionOpType::kSub:
+      result = a - b;
+      break;
+
+    case ExpressionOpType::kMul:
+      result = a * b;
+      break;
+
+    case ExpressionOpType::kSll:
+      result = (b > 63) ? uint64_t(0) : uint64_t(a << b);
+      break;
+
+    case ExpressionOpType::kSrl:
+      result = (b > 63) ? uint64_t(0) : uint64_t(a >> b);
+      break;
+
+    case ExpressionOpType::kSra:
+      result = Support::sar(a, Support::min<uint64_t>(b, 63));
+      break;
+
+    default:
+      return DebugUtils::errored(kErrorInvalidState);
+  }
+
+  *out = result;
+  return kErrorOk;
+}
+
+// CodeHolder - Utilities
+// ======================
+
+Error CodeHolder::flatten() noexcept {
+  uint64_t offset = 0;
+  for (Section* section : _sectionsByOrder) {
+    uint64_t realSize = section->realSize();
+    if (realSize) {
+      uint64_t alignedOffset = Support::alignUp(offset, section->alignment());
+      if (ASMJIT_UNLIKELY(alignedOffset < offset))
+        return DebugUtils::errored(kErrorTooLarge);
+
+      Support::FastUInt8 of = 0;
+      offset = Support::addOverflow(alignedOffset, realSize, &of);
+
+      if (ASMJIT_UNLIKELY(of))
+        return DebugUtils::errored(kErrorTooLarge);
+    }
+  }
+
+  // Now we know that we can assign offsets of all sections properly.
+  Section* prev = nullptr;
+  offset = 0;
+  for (Section* section : _sectionsByOrder) {
+    uint64_t realSize = section->realSize();
+    if (realSize)
+      offset = Support::alignUp(offset, section->alignment());
+    section->_offset = offset;
+
+    // Make sure the previous section extends a bit to cover the alignment.
+    if (prev)
+      prev->_virtualSize = offset - prev->_offset;
+
+    prev = section;
+    offset += realSize;
+  }
+
+  return kErrorOk;
+}
+
+size_t CodeHolder::codeSize() const noexcept {
+  Support::FastUInt8 of = 0;
+  uint64_t offset = 0;
+
+  for (Section* section : _sectionsByOrder) {
+    uint64_t realSize = section->realSize();
+
+    if (realSize) {
+      uint64_t alignedOffset = Support::alignUp(offset, section->alignment());
+      ASMJIT_ASSERT(alignedOffset >= offset);
+      offset = Support::addOverflow(alignedOffset, realSize, &of);
+    }
+  }
+
+  if ((sizeof(uint64_t) > sizeof(size_t) && offset > SIZE_MAX) || of)
+    return SIZE_MAX;
+
+  return size_t(offset);
+}
+
+Error CodeHolder::relocateToBase(uint64_t baseAddress) noexcept {
+  // Base address must be provided.
+  if (ASMJIT_UNLIKELY(baseAddress == Globals::kNoBaseAddress))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  _baseAddress = baseAddress;
+  uint32_t addressSize = _environment.registerSize();
+
+  Section* addressTableSection = _addressTableSection;
+  uint32_t addressTableEntryCount = 0;
+  uint8_t* addressTableEntryData = nullptr;
+
+  if (addressTableSection) {
+    ASMJIT_PROPAGATE(
+      reserveBuffer(&addressTableSection->_buffer, size_t(addressTableSection->virtualSize())));
+    addressTableEntryData = addressTableSection->_buffer.data();
+  }
+
+  // Relocate all recorded locations.
+  for (const RelocEntry* re : _relocations) {
+    // Possibly deleted or optimized-out entry.
+    if (re->relocType() == RelocType::kNone)
+      continue;
+
+    Section* sourceSection = sectionById(re->sourceSectionId());
+    Section* targetSection = nullptr;
+
+    if (re->targetSectionId() != Globals::kInvalidId)
+      targetSection = sectionById(re->targetSectionId());
+
+    uint64_t value = re->payload();
+    uint64_t sectionOffset = sourceSection->offset();
+    uint64_t sourceOffset = re->sourceOffset();
+
+    // Make sure that the `RelocEntry` doesn't go out of bounds.
+    size_t regionSize = re->format().regionSize();
+    if (ASMJIT_UNLIKELY(re->sourceOffset() >= sourceSection->bufferSize() ||
+                        sourceSection->bufferSize() - size_t(re->sourceOffset()) < regionSize))
+      return DebugUtils::errored(kErrorInvalidRelocEntry);
+
+    uint8_t* buffer = sourceSection->data();
+
+    switch (re->relocType()) {
+      case RelocType::kExpression: {
+        Expression* expression = (Expression*)(uintptr_t(value));
+        ASMJIT_PROPAGATE(CodeHolder_evaluateExpression(this, expression, &value));
+        break;
+      }
+
+      case RelocType::kAbsToAbs: {
+        break;
+      }
+
+      case RelocType::kRelToAbs: {
+        // Value is currently a relative offset from the start of its section.
+        // We have to convert it to an absolute offset (including base address).
+        if (ASMJIT_UNLIKELY(!targetSection))
+          return DebugUtils::errored(kErrorInvalidRelocEntry);
+
+        //value += baseAddress + sectionOffset + sourceOffset + regionSize;
+        value += baseAddress + targetSection->offset();
+        break;
+      }
+
+      case RelocType::kAbsToRel: {
+        value -= baseAddress + sectionOffset + sourceOffset + regionSize;
+
+        // Sign extend as we are not interested in the high 32-bit word in a 32-bit address space.
+        if (addressSize <= 4)
+          value = uint64_t(int64_t(int32_t(value & 0xFFFFFFFFu)));
+        else if (!Support::isInt32(int64_t(value)))
+          return DebugUtils::errored(kErrorRelocOffsetOutOfRange);
+
+        break;
+      }
+
+      case RelocType::kX64AddressEntry: {
+        size_t valueOffset = size_t(re->sourceOffset()) + re->format().valueOffset();
+        if (re->format().valueSize() != 4 || valueOffset < 2)
+          return DebugUtils::errored(kErrorInvalidRelocEntry);
+
+        // First try whether a relative 32-bit displacement would work.
+        value -= baseAddress + sectionOffset + sourceOffset + regionSize;
+        if (!Support::isInt32(int64_t(value))) {
+          // Relative 32-bit displacement is not possible, use '.addrtab' section.
+          AddressTableEntry* atEntry = _addressTableEntries.get(re->payload());
+          if (ASMJIT_UNLIKELY(!atEntry))
+            return DebugUtils::errored(kErrorInvalidRelocEntry);
+
+          // Cannot be null as we have just matched the `AddressTableEntry`.
+          ASMJIT_ASSERT(addressTableSection != nullptr);
+
+          if (!atEntry->hasAssignedSlot())
+            atEntry->_slot = addressTableEntryCount++;
+
+          size_t atEntryIndex = size_t(atEntry->slot()) * addressSize;
+          uint64_t addrSrc = sectionOffset + sourceOffset + regionSize;
+          uint64_t addrDst = addressTableSection->offset() + uint64_t(atEntryIndex);
+
+          value = addrDst - addrSrc;
+          if (!Support::isInt32(int64_t(value)))
+            return DebugUtils::errored(kErrorRelocOffsetOutOfRange);
+
+          // Bytes that replace [REX, OPCODE] bytes.
+          uint32_t byte0 = 0xFF;
+          uint32_t byte1 = buffer[valueOffset - 1];
+
+          if (byte1 == 0xE8) {
+            // Patch CALL/MOD byte to FF /2 (-> 0x15).
+            byte1 = x86EncodeMod(0, 2, 5);
+          }
+          else if (byte1 == 0xE9) {
+            // Patch JMP/MOD byte to FF /4 (-> 0x25).
+            byte1 = x86EncodeMod(0, 4, 5);
+          }
+          else {
+            return DebugUtils::errored(kErrorInvalidRelocEntry);
+          }
+
+          // Patch `jmp/call` instruction.
+          buffer[valueOffset - 2] = uint8_t(byte0);
+          buffer[valueOffset - 1] = uint8_t(byte1);
+
+          Support::writeU64uLE(addressTableEntryData + atEntryIndex, re->payload());
+        }
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidRelocEntry);
+    }
+
+    if (!CodeWriterUtils::writeOffset(buffer + re->sourceOffset(), int64_t(value), re->format())) {
+      return DebugUtils::errored(kErrorInvalidRelocEntry);
+    }
+  }
+
+  // Fixup the virtual size of the address table if it's the last section.
+  if (_sectionsByOrder.last() == addressTableSection) {
+    ASMJIT_ASSERT(addressTableSection != nullptr);
+
+    size_t addressTableSize = addressTableEntryCount * addressSize;
+    addressTableSection->_buffer._size = addressTableSize;
+    addressTableSection->_virtualSize = addressTableSize;
+  }
+
+  return kErrorOk;
+}
+
+Error CodeHolder::copySectionData(void* dst, size_t dstSize, uint32_t sectionId, CopySectionFlags copyFlags) noexcept {
+  if (ASMJIT_UNLIKELY(!isSectionValid(sectionId)))
+    return DebugUtils::errored(kErrorInvalidSection);
+
+  Section* section = sectionById(sectionId);
+  size_t bufferSize = section->bufferSize();
+
+  if (ASMJIT_UNLIKELY(dstSize < bufferSize))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  memcpy(dst, section->data(), bufferSize);
+
+  if (bufferSize < dstSize && Support::test(copyFlags, CopySectionFlags::kPadSectionBuffer)) {
+    size_t paddingSize = dstSize - bufferSize;
+    memset(static_cast<uint8_t*>(dst) + bufferSize, 0, paddingSize);
+  }
+
+  return kErrorOk;
+}
+
+Error CodeHolder::copyFlattenedData(void* dst, size_t dstSize, CopySectionFlags copyFlags) noexcept {
+  size_t end = 0;
+  for (Section* section : _sectionsByOrder) {
+    if (section->offset() > dstSize)
+      return DebugUtils::errored(kErrorInvalidArgument);
+
+    size_t bufferSize = section->bufferSize();
+    size_t offset = size_t(section->offset());
+
+    if (ASMJIT_UNLIKELY(dstSize - offset < bufferSize))
+      return DebugUtils::errored(kErrorInvalidArgument);
+
+    uint8_t* dstTarget = static_cast<uint8_t*>(dst) + offset;
+    size_t paddingSize = 0;
+    memcpy(dstTarget, section->data(), bufferSize);
+
+    if (Support::test(copyFlags, CopySectionFlags::kPadSectionBuffer) && bufferSize < section->virtualSize()) {
+      paddingSize = Support::min<size_t>(dstSize - offset, size_t(section->virtualSize())) - bufferSize;
+      memset(dstTarget + bufferSize, 0, paddingSize);
+    }
+
+    end = Support::max(end, offset + bufferSize + paddingSize);
+  }
+
+  if (end < dstSize && Support::test(copyFlags, CopySectionFlags::kPadTargetBuffer)) {
+    memset(static_cast<uint8_t*>(dst) + end, 0, dstSize - end);
+  }
+
+  return kErrorOk;
+}
+
+// CodeHolder - Tests
+// ==================
+
+#if defined(ASMJIT_TEST)
+UNIT(code_holder) {
+  CodeHolder code;
+
+  INFO("Verifying CodeHolder::init()");
+  Environment env;
+  env.init(Arch::kX86);
+
+  code.init(env);
+  EXPECT(code.arch() == Arch::kX86);
+
+  INFO("Verifying named labels");
+  LabelEntry* le;
+  EXPECT(code.newNamedLabelEntry(&le, "NamedLabel", SIZE_MAX, LabelType::kGlobal) == kErrorOk);
+  EXPECT(strcmp(le->name(), "NamedLabel") == 0);
+  EXPECT(code.labelIdByName("NamedLabel") == le->id());
+
+  INFO("Verifying section ordering");
+  Section* section1;
+  EXPECT(code.newSection(&section1, "high-priority", SIZE_MAX, SectionFlags::kNone, 1, -1) == kErrorOk);
+  EXPECT(code.sections()[1] == section1);
+  EXPECT(code.sectionsByOrder()[0] == section1);
+
+  Section* section0;
+  EXPECT(code.newSection(&section0, "higher-priority", SIZE_MAX, SectionFlags::kNone, 1, -2) == kErrorOk);
+  EXPECT(code.sections()[2] == section0);
+  EXPECT(code.sectionsByOrder()[0] == section0);
+  EXPECT(code.sectionsByOrder()[1] == section1);
+
+  Section* section3;
+  EXPECT(code.newSection(&section3, "low-priority", SIZE_MAX, SectionFlags::kNone, 1, 2) == kErrorOk);
+  EXPECT(code.sections()[3] == section3);
+  EXPECT(code.sectionsByOrder()[3] == section3);
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/codeholder.h b/lib/lepton/asmjit/core/codeholder.h
new file mode 100644
index 0000000000..6ed2ddf942
--- /dev/null
+++ b/lib/lepton/asmjit/core/codeholder.h
@@ -0,0 +1,1035 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_CODEHOLDER_H_INCLUDED
+#define ASMJIT_CORE_CODEHOLDER_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/codebuffer.h"
+#include "../core/errorhandler.h"
+#include "../core/operand.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/target.h"
+#include "../core/zone.h"
+#include "../core/zonehash.h"
+#include "../core/zonestring.h"
+#include "../core/zonetree.h"
+#include "../core/zonevector.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+class BaseEmitter;
+class CodeHolder;
+class LabelEntry;
+class Logger;
+
+//! Operator type that can be used within an \ref Expression.
+enum class ExpressionOpType : uint8_t {
+  //! Addition.
+  kAdd = 0,
+  //! Subtraction.
+  kSub = 1,
+  //! Multiplication
+  kMul = 2,
+  //! Logical left shift.
+  kSll = 3,
+  //! Logical right shift.
+  kSrl = 4,
+  //! Arithmetic right shift.
+  kSra = 5
+};
+
+//! Value tyoe that can be used within an \ref Expression.
+enum class ExpressionValueType : uint8_t {
+  //! No value or invalid.
+  kNone = 0,
+  //! Value is 64-bit unsigned integer (constant).
+  kConstant = 1,
+  //! Value is \ref LabelEntry, which references a \ref Label.
+  kLabel = 2,
+  //! Value is \ref Expression
+  kExpression = 3
+};
+
+//! Expression node that can reference constants, labels, and another expressions.
+struct Expression {
+  //! Expression value.
+  union Value {
+    //! Constant.
+    uint64_t constant;
+    //! Pointer to another expression.
+    Expression* expression;
+    //! Pointer to \ref LabelEntry.
+    LabelEntry* label;
+  };
+
+  //! \name Members
+  //! \{
+
+  //! Operation type.
+  ExpressionOpType opType;
+  //! Value types of \ref value.
+  ExpressionValueType valueType[2];
+  //! Reserved for future use, should be initialized to zero.
+  uint8_t reserved[5];
+  //! Expression left and right values.
+  Value value[2];
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Resets the whole expression.
+  //!
+  //! Changes both values to \ref ExpressionValueType::kNone.
+  inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+
+  //! Sets the value type at `index` to \ref ExpressionValueType::kConstant and its content to `constant`.
+  inline void setValueAsConstant(size_t index, uint64_t constant) noexcept {
+    valueType[index] = ExpressionValueType::kConstant;
+    value[index].constant = constant;
+  }
+
+  //! Sets the value type at `index` to \ref ExpressionValueType::kLabel and its content to `labelEntry`.
+  inline void setValueAsLabel(size_t index, LabelEntry* labelEntry) noexcept {
+    valueType[index] = ExpressionValueType::kLabel;
+    value[index].label = labelEntry;
+  }
+
+  //! Sets the value type at `index` to \ref ExpressionValueType::kExpression and its content to `expression`.
+  inline void setValueAsExpression(size_t index, Expression* expression) noexcept {
+    valueType[index] = ExpressionValueType::kExpression;
+    value[index].expression = expression;
+  }
+
+  //! \}
+};
+
+//! Section flags, used by \ref Section.
+enum class SectionFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+  //! Executable (.text sections).
+  kExecutable = 0x00000001u,
+  //! Read-only (.text and .data sections).
+  kReadOnly = 0x00000002u,
+  //! Zero initialized by the loader (BSS).
+  kZeroInitialized = 0x00000004u,
+  //! Info / comment flag.
+  kComment = 0x00000008u,
+  //! Section created implicitly, can be deleted by \ref Target.
+  kImplicit = 0x80000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(SectionFlags)
+
+//! Flags that can be used with \ref CodeHolder::copySectionData() and \ref CodeHolder::copyFlattenedData().
+enum class CopySectionFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+
+  //! If virtual size of a section is greater than the size of its \ref CodeBuffer then all bytes between the buffer
+  //! size and virtual size will be zeroed. If this option is not set then those bytes would be left as is, which
+  //! means that if the user didn't initialize them they would have a previous content, which may be unwanted.
+  kPadSectionBuffer = 0x00000001u,
+
+  //! Clears the target buffer if the flattened data is less than the destination size. This option works
+  //! only with \ref CodeHolder::copyFlattenedData() as it processes multiple sections. It is ignored by
+  //! \ref CodeHolder::copySectionData().
+  kPadTargetBuffer = 0x00000002u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(CopySectionFlags)
+
+//! Section entry.
+class Section {
+public:
+  //! \name Members
+  //! \{
+
+  //! Section id.
+  uint32_t _id;
+  //! Section flags.
+  SectionFlags _flags;
+  //! Section alignment requirements (0 if no requirements).
+  uint32_t _alignment;
+  //! Order (lower value means higher priority).
+  int32_t _order;
+  //! Offset of this section from base-address.
+  uint64_t _offset;
+  //! Virtual size of the section (zero initialized sections).
+  uint64_t _virtualSize;
+  //! Section name (max 35 characters, PE allows max 8).
+  FixedString<Globals::kMaxSectionNameSize + 1> _name;
+  //! Code or data buffer.
+  CodeBuffer _buffer;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the section id.
+  inline uint32_t id() const noexcept { return _id; }
+  //! Returns the section name, as a null terminated string.
+  inline const char* name() const noexcept { return _name.str; }
+
+  //! Returns the section data.
+  inline uint8_t* data() noexcept { return _buffer.data(); }
+  //! \overload
+  inline const uint8_t* data() const noexcept { return _buffer.data(); }
+
+  //! Returns the section flags.
+  inline SectionFlags flags() const noexcept { return _flags; }
+  //! Tests whether the section has the given `flag`.
+  inline bool hasFlag(SectionFlags flag) const noexcept { return Support::test(_flags, flag); }
+  //! Adds `flags` to the section flags.
+  inline void addFlags(SectionFlags flags) noexcept { _flags |= flags; }
+  //! Removes `flags` from the section flags.
+  inline void clearFlags(SectionFlags flags) noexcept { _flags &= ~flags; }
+
+  //! Returns the minimum section alignment
+  inline uint32_t alignment() const noexcept { return _alignment; }
+  //! Sets the minimum section alignment
+  inline void setAlignment(uint32_t alignment) noexcept { _alignment = alignment; }
+
+  //! Returns the section order, which has a higher priority than section id.
+  inline int32_t order() const noexcept { return _order; }
+
+  //! Returns the section offset, relative to base.
+  inline uint64_t offset() const noexcept { return _offset; }
+  //! Set the section offset.
+  inline void setOffset(uint64_t offset) noexcept { _offset = offset; }
+
+  //! Returns the virtual size of the section.
+  //!
+  //! Virtual size is initially zero and is never changed by AsmJit. It's normal if virtual size is smaller than
+  //! size returned by `bufferSize()` as the buffer stores real data emitted by assemblers or appended by users.
+  //!
+  //! Use `realSize()` to get the real and final size of this section.
+  inline uint64_t virtualSize() const noexcept { return _virtualSize; }
+  //! Sets the virtual size of the section.
+  inline void setVirtualSize(uint64_t virtualSize) noexcept { _virtualSize = virtualSize; }
+
+  //! Returns the buffer size of the section.
+  inline size_t bufferSize() const noexcept { return _buffer.size(); }
+  //! Returns the real size of the section calculated from virtual and buffer sizes.
+  inline uint64_t realSize() const noexcept { return Support::max<uint64_t>(virtualSize(), bufferSize()); }
+
+  //! Returns the `CodeBuffer` used by this section.
+  inline CodeBuffer& buffer() noexcept { return _buffer; }
+  //! Returns the `CodeBuffer` used by this section (const).
+  inline const CodeBuffer& buffer() const noexcept { return _buffer; }
+
+  //! \}
+};
+
+//! Entry in an address table.
+class AddressTableEntry : public ZoneTreeNodeT<AddressTableEntry> {
+public:
+  ASMJIT_NONCOPYABLE(AddressTableEntry)
+
+  //! \name Members
+  //! \{
+
+  //! Address.
+  uint64_t _address;
+  //! Slot.
+  uint32_t _slot;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline explicit AddressTableEntry(uint64_t address) noexcept
+    : _address(address),
+      _slot(0xFFFFFFFFu) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline uint64_t address() const noexcept { return _address; }
+  inline uint32_t slot() const noexcept { return _slot; }
+
+  inline bool hasAssignedSlot() const noexcept { return _slot != 0xFFFFFFFFu; }
+
+  inline bool operator<(const AddressTableEntry& other) const noexcept { return _address < other._address; }
+  inline bool operator>(const AddressTableEntry& other) const noexcept { return _address > other._address; }
+
+  inline bool operator<(uint64_t queryAddress) const noexcept { return _address < queryAddress; }
+  inline bool operator>(uint64_t queryAddress) const noexcept { return _address > queryAddress; }
+
+  //! \}
+};
+
+//! Offset format type, used by \ref OffsetFormat.
+enum class OffsetType : uint8_t {
+  //! A value having `_immBitCount` bits and shifted by `_immBitShift`.
+  //!
+  //! This offset type is sufficient for many targets that store offset as a continuous set bits within an
+  //! instruction word / sequence of bytes.
+  kSignedOffset,
+
+  //! An unsigned value having `_immBitCount` bits and shifted by `_immBitShift`.
+  kUnsignedOffset,
+
+  // AArch64 Specific Offset Formats
+  // -------------------------------
+
+  //! AARCH64 ADR format of `[.|immlo:2|.....|immhi:19|.....]`.
+  kAArch64_ADR,
+
+  //! AARCH64 ADRP format of `[.|immlo:2|.....|immhi:19|.....]` (4kB pages).
+  kAArch64_ADRP,
+
+  //! Maximum value of `OffsetFormatType`.
+  kMaxValue = kAArch64_ADRP
+};
+
+//! Provides information about formatting offsets, absolute addresses, or their parts. Offset format is used by both
+//! \ref RelocEntry and \ref LabelLink. The illustration below describes the relation of region size and offset size.
+//! Region size is the size of the whole unit whereas offset size is the size of the unit that will be patched.
+//!
+//! ```
+//! +-> Code buffer |   The subject of the relocation (region)  |
+//! |               | (Word-Offset)  (Word-Size)                |
+//! |xxxxxxxxxxxxxxx|................|*PATCHED*|................|xxxxxxxxxxxx->
+//!                                  |         |
+//!     [Word Offset points here]----+         +--- [WordOffset + WordSize]
+//! ```
+//!
+//! Once the offset word has been located it can be patched like this:
+//!
+//! ```
+//!                               |ImmDiscardLSB (discard LSB bits).
+//!                               |..
+//! [0000000000000iiiiiiiiiiiiiiiiiDD] - Offset value (32-bit)
+//! [000000000000000iiiiiiiiiiiiiiiii] - Offset value after discard LSB.
+//! [00000000000iiiiiiiiiiiiiiiii0000] - Offset value shifted by ImmBitShift.
+//! [xxxxxxxxxxxiiiiiiiiiiiiiiiiixxxx] - Patched word (32-bit)
+//!             |...............|
+//!               (ImmBitCount) +- ImmBitShift
+//! ```
+struct OffsetFormat {
+  //! \name Members
+  //! \{
+
+  //! Type of the offset.
+  OffsetType _type;
+  //! Encoding flags.
+  uint8_t _flags;
+  //! Size of the region (in bytes) containing the offset value, if the offset value is part of an instruction,
+  //! otherwise it would be the same as `_valueSize`.
+  uint8_t _regionSize;
+  //! Size of the offset value, in bytes (1, 2, 4, or 8).
+  uint8_t _valueSize;
+  //! Offset of the offset value, in bytes, relative to the start of the region or data. Value offset would be
+  //! zero if both region size and value size are equal.
+  uint8_t _valueOffset;
+  //! Size of the offset immediate value in bits.
+  uint8_t _immBitCount;
+  //! Shift of the offset immediate value in bits in the target word.
+  uint8_t _immBitShift;
+  //! Number of least significant bits to discard before writing the immediate to the destination. All discarded
+  //! bits must be zero otherwise the value is invalid.
+  uint8_t _immDiscardLsb;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the type of the offset.
+  inline OffsetType type() const noexcept { return _type; }
+
+  //! Returns flags.
+  inline uint32_t flags() const noexcept { return _flags; }
+
+  //! Returns the size of the region/instruction where the offset is encoded.
+  inline uint32_t regionSize() const noexcept { return _regionSize; }
+
+  //! Returns the offset of the word relative to the start of the region where the offset is.
+  inline uint32_t valueOffset() const noexcept { return _valueOffset; }
+
+  //! Returns the size of the data-type (word) that contains the offset, in bytes.
+  inline uint32_t valueSize() const noexcept { return _valueSize; }
+  //! Returns the count of bits of the offset value in the data it's stored in.
+  inline uint32_t immBitCount() const noexcept { return _immBitCount; }
+  //! Returns the bit-shift of the offset value in the data it's stored in.
+  inline uint32_t immBitShift() const noexcept { return _immBitShift; }
+  //! Returns the number of least significant bits of the offset value, that must be zero and that are not part of
+  //! the encoded data.
+  inline uint32_t immDiscardLsb() const noexcept { return _immDiscardLsb; }
+
+  //! Resets this offset format to a simple data value of `dataSize` bytes.
+  //!
+  //! The region will be the same size as data and immediate bits would correspond to `dataSize * 8`. There will be
+  //! no immediate bit shift or discarded bits.
+  inline void resetToSimpleValue(OffsetType type, size_t valueSize) noexcept {
+    ASMJIT_ASSERT(valueSize <= 8u);
+
+    _type = type;
+    _flags = uint8_t(0);
+    _regionSize = uint8_t(valueSize);
+    _valueSize = uint8_t(valueSize);
+    _valueOffset = uint8_t(0);
+    _immBitCount = uint8_t(valueSize * 8u);
+    _immBitShift = uint8_t(0);
+    _immDiscardLsb = uint8_t(0);
+  }
+
+  inline void resetToImmValue(OffsetType type, size_t valueSize, uint32_t immBitShift, uint32_t immBitCount, uint32_t immDiscardLsb) noexcept {
+    ASMJIT_ASSERT(valueSize <= 8u);
+    ASMJIT_ASSERT(immBitShift < valueSize * 8u);
+    ASMJIT_ASSERT(immBitCount <= 64u);
+    ASMJIT_ASSERT(immDiscardLsb <= 64u);
+
+    _type = type;
+    _flags = uint8_t(0);
+    _regionSize = uint8_t(valueSize);
+    _valueSize = uint8_t(valueSize);
+    _valueOffset = uint8_t(0);
+    _immBitCount = uint8_t(immBitCount);
+    _immBitShift = uint8_t(immBitShift);
+    _immDiscardLsb = uint8_t(immDiscardLsb);
+  }
+
+  inline void setRegion(size_t regionSize, size_t valueOffset) noexcept {
+    _regionSize = uint8_t(regionSize);
+    _valueOffset = uint8_t(valueOffset);
+  }
+
+  inline void setLeadingAndTrailingSize(size_t leadingSize, size_t trailingSize) noexcept {
+    _regionSize = uint8_t(leadingSize + trailingSize + _valueSize);
+    _valueOffset = uint8_t(leadingSize);
+  }
+
+  //! \}
+};
+
+//! Relocation type.
+enum class RelocType : uint32_t {
+  //! None/deleted (no relocation).
+  kNone = 0,
+  //! Expression evaluation, `_payload` is pointer to `Expression`.
+  kExpression = 1,
+  //! Relocate absolute to absolute.
+  kAbsToAbs = 2,
+  //! Relocate relative to absolute.
+  kRelToAbs = 3,
+  //! Relocate absolute to relative.
+  kAbsToRel = 4,
+  //! Relocate absolute to relative or use trampoline.
+  kX64AddressEntry = 5
+};
+
+//! Relocation entry.
+struct RelocEntry {
+  //! \name Members
+  //! \{
+
+  //! Relocation id.
+  uint32_t _id;
+  //! Type of the relocation.
+  RelocType _relocType;
+  //! Format of the relocated value.
+  OffsetFormat _format;
+  //! Source section id.
+  uint32_t _sourceSectionId;
+  //! Target section id.
+  uint32_t _targetSectionId;
+  //! Source offset (relative to start of the section).
+  uint64_t _sourceOffset;
+  //! Payload (target offset, target address, expression, etc).
+  uint64_t _payload;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline uint32_t id() const noexcept { return _id; }
+
+  inline RelocType relocType() const noexcept { return _relocType; }
+  inline const OffsetFormat& format() const noexcept { return _format; }
+
+  inline uint32_t sourceSectionId() const noexcept { return _sourceSectionId; }
+  inline uint32_t targetSectionId() const noexcept { return _targetSectionId; }
+
+  inline uint64_t sourceOffset() const noexcept { return _sourceOffset; }
+  inline uint64_t payload() const noexcept { return _payload; }
+
+  Expression* payloadAsExpression() const noexcept {
+    return reinterpret_cast<Expression*>(uintptr_t(_payload));
+  }
+
+  //! \}
+};
+
+//! Type of the \ref Label.
+enum class LabelType : uint8_t {
+  //! Anonymous label that can optionally have a name, which is only used for debugging purposes.
+  kAnonymous = 0,
+  //! Local label (always has parentId).
+  kLocal = 1,
+  //! Global label (never has parentId).
+  kGlobal = 2,
+  //! External label (references an external symbol).
+  kExternal = 3,
+
+  //! Maximum value of `LabelType`.
+  kMaxValue = kExternal
+};
+
+//! Data structure used to link either unbound labels or cross-section links.
+struct LabelLink {
+  //! Next link (single-linked list).
+  LabelLink* next;
+  //! Section id where the label is bound.
+  uint32_t sectionId;
+  //! Relocation id or Globals::kInvalidId.
+  uint32_t relocId;
+  //! Label offset relative to the start of the section.
+  size_t offset;
+  //! Inlined rel8/rel32.
+  intptr_t rel;
+  //! Offset format information.
+  OffsetFormat format;
+};
+
+//! Label entry.
+//!
+//! Contains the following properties:
+//!   - Label id - This is the only thing that is set to the `Label` operand.
+//!   - Label name - Optional, used mostly to create executables and libraries.
+//!   - Label type - Type of the label, default `LabelType::kAnonymous`.
+//!   - Label parent id - Derived from many assemblers that allow to define a local label that falls under a global
+//!     label. This allows to define many labels of the same name that have different parent (global) label.
+//!   - Offset - offset of the label bound by `Assembler`.
+//!   - Links - single-linked list that contains locations of code that has to be patched when the label gets bound.
+//!     Every use of unbound label adds one link to `_links` list.
+//!   - HVal - Hash value of label's name and optionally parentId.
+//!   - HashNext - Hash-table implementation detail.
+class LabelEntry : public ZoneHashNode {
+public:
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    //! SSO size of \ref _name.
+    //!
+    //! \cond INTERNAL
+    //! Let's round the size of `LabelEntry` to 64 bytes (as `ZoneAllocator` has granularity of 32 bytes anyway). This
+    //! gives `_name` the remaining space, which is should be 16 bytes on 64-bit and 28 bytes on 32-bit architectures.
+    //! \endcond
+    kStaticNameSize = 64 - (sizeof(ZoneHashNode) + 8 + sizeof(Section*) + sizeof(size_t) + sizeof(LabelLink*))
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Type of the label.
+  LabelType _type;
+  //! Must be zero.
+  uint8_t _reserved[3];
+  //! Label parent id or zero.
+  uint32_t _parentId;
+  //! Label offset relative to the start of the `_section`.
+  uint64_t _offset;
+  //! Section where the label was bound.
+  Section* _section;
+  //! Label links.
+  LabelLink* _links;
+  //! Label name.
+  ZoneString<kStaticNameSize> _name;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  // NOTE: Label id is stored in `_customData`, which is provided by ZoneHashNode to fill a padding that a C++
+  // compiler targeting 64-bit CPU will add to align the structure to 64-bits.
+
+  //! Returns label id.
+  inline uint32_t id() const noexcept { return _customData; }
+  //! Sets label id (internal, used only by `CodeHolder`).
+  inline void _setId(uint32_t id) noexcept { _customData = id; }
+
+  //! Returns label type.
+  inline LabelType type() const noexcept { return _type; }
+
+  //! Tests whether the label has a parent label.
+  inline bool hasParent() const noexcept { return _parentId != Globals::kInvalidId; }
+  //! Returns label's parent id.
+  inline uint32_t parentId() const noexcept { return _parentId; }
+
+  //! Returns the section where the label was bound.
+  //!
+  //! If the label was not yet bound the return value is `nullptr`.
+  inline Section* section() const noexcept { return _section; }
+
+  //! Tests whether the label has name.
+  inline bool hasName() const noexcept { return !_name.empty(); }
+
+  //! Returns the label's name.
+  //!
+  //! \note Local labels will return their local name without their parent part, for example ".L1".
+  inline const char* name() const noexcept { return _name.data(); }
+
+  //! Returns size of label's name.
+  //!
+  //! \note Label name is always null terminated, so you can use `strlen()` to get it, however, it's also cached in
+  //! `LabelEntry` itself, so if you want to know the size the fastest way is to call `LabelEntry::nameSize()`.
+  inline uint32_t nameSize() const noexcept { return _name.size(); }
+
+  //! Returns links associated with this label.
+  inline LabelLink* links() const noexcept { return _links; }
+
+  //! Tests whether the label is bound.
+  inline bool isBound() const noexcept { return _section != nullptr; }
+  //! Tests whether the label is bound to a the given `sectionId`.
+  inline bool isBoundTo(Section* section) const noexcept { return _section == section; }
+
+  //! Returns the label offset (only useful if the label is bound).
+  inline uint64_t offset() const noexcept { return _offset; }
+
+  //! Returns the hash-value of label's name and its parent label (if any).
+  //!
+  //! Label hash is calculated as `HASH(Name) ^ ParentId`. The hash function is implemented in `Support::hashString()`
+  //! and `Support::hashRound()`.
+  inline uint32_t hashCode() const noexcept { return _hashCode; }
+
+  //! \}
+};
+
+//! Holds assembled code and data (including sections, labels, and relocation information).
+//!
+//! CodeHolder connects emitters with their targets. It provides them interface that can be used to query information
+//! about the target environment (architecture, etc...) and API to create labels, sections, relocations, and to write
+//! data to a \ref CodeBuffer, which is always part of \ref Section. More than one emitter can be attached to a single
+//! CodeHolder instance at a time, which is used in practice
+//!
+//! CodeHolder provides interface for all emitter types. Assemblers use CodeHolder to write into \ref CodeBuffer, and
+//! higher level emitters like Builder and Compiler use CodeHolder to manage labels and sections so higher level code
+//! can be serialized to Assembler by \ref BaseEmitter::finalize() and \ref BaseBuilder::serializeTo().
+//!
+//! In order to use CodeHolder, it must be first initialized by \ref init(). After the CodeHolder has been successfully
+//! initialized it can be used to hold assembled code, sections, labels, relocations, and to attach / detach code
+//! emitters. After the end of code generation it can be used to query physical locations of labels and to relocate
+//! the assembled code into the right address.
+//!
+//! \note \ref CodeHolder has an ability to attach an \ref ErrorHandler, however, the error handler is not triggered
+//! by \ref CodeHolder itself, it's instead propagated to all emitters that attach to it.
+class CodeHolder {
+public:
+  ASMJIT_NONCOPYABLE(CodeHolder)
+
+  //! \name Members
+  //! \{
+
+  //! Environment information.
+  Environment _environment;
+  //! Base address or \ref Globals::kNoBaseAddress.
+  uint64_t _baseAddress;
+
+  //! Attached `Logger`, used by all consumers.
+  Logger* _logger;
+  //! Attached `ErrorHandler`.
+  ErrorHandler* _errorHandler;
+
+  //! Code zone (used to allocate core structures).
+  Zone _zone;
+  //! Zone allocator, used to manage internal containers.
+  ZoneAllocator _allocator;
+
+  //! Attached emitters.
+  ZoneVector<BaseEmitter*> _emitters;
+  //! Section entries.
+  ZoneVector<Section*> _sections;
+  //! Section entries sorted by section order and then section id.
+  ZoneVector<Section*> _sectionsByOrder;
+  //! Label entries.
+  ZoneVector<LabelEntry*> _labelEntries;
+  //! Relocation entries.
+  ZoneVector<RelocEntry*> _relocations;
+  //! Label name -> LabelEntry (only named labels).
+  ZoneHash<LabelEntry> _namedLabels;
+
+  //! Count of label links, which are not resolved.
+  size_t _unresolvedLinkCount;
+  //! Pointer to an address table section (or null if this section doesn't exist).
+  Section* _addressTableSection;
+  //! Address table entries.
+  ZoneTree<AddressTableEntry> _addressTableEntries;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates an uninitialized CodeHolder (you must init() it before it can be used).
+  //!
+  //! An optional `temporary` argument can be used to initialize the first block of \ref Zone that the CodeHolder
+  //! uses into a temporary memory provided by the user.
+  ASMJIT_API explicit CodeHolder(const Support::Temporary* temporary = nullptr) noexcept;
+
+  //! \overload
+  inline explicit CodeHolder(const Support::Temporary& temporary) noexcept
+    : CodeHolder(&temporary) {}
+
+  //! Destroys the CodeHolder and frees all resources it has allocated.
+  ASMJIT_API ~CodeHolder() noexcept;
+
+  //! Tests whether the `CodeHolder` has been initialized.
+  //!
+  //! Emitters can be only attached to initialized `CodeHolder` instances.
+  inline bool isInitialized() const noexcept { return _environment.isInitialized(); }
+
+  //! Initializes CodeHolder to hold code described by the given `environment` and `baseAddress`.
+  ASMJIT_API Error init(const Environment& environment, uint64_t baseAddress = Globals::kNoBaseAddress) noexcept;
+  //! Detaches all code-generators attached and resets the `CodeHolder`.
+  ASMJIT_API void reset(ResetPolicy resetPolicy = ResetPolicy::kSoft) noexcept;
+
+  //! \}
+
+  //! \name Attach & Detach
+  //! \{
+
+  //! Attaches an emitter to this `CodeHolder`.
+  ASMJIT_API Error attach(BaseEmitter* emitter) noexcept;
+  //! Detaches an emitter from this `CodeHolder`.
+  ASMJIT_API Error detach(BaseEmitter* emitter) noexcept;
+
+  //! \}
+
+  //! \name Allocators
+  //! \{
+
+  //! Returns the allocator that the `CodeHolder` uses.
+  //!
+  //! \note This should be only used for AsmJit's purposes. Code holder uses arena allocator to allocate everything,
+  //! so anything allocated through this allocator will be invalidated by \ref CodeHolder::reset() or by CodeHolder's
+  //! destructor.
+  inline ZoneAllocator* allocator() const noexcept { return const_cast<ZoneAllocator*>(&_allocator); }
+
+  //! \}
+
+  //! \name Code & Architecture
+  //! \{
+
+  //! Returns the target environment information.
+  inline const Environment& environment() const noexcept { return _environment; }
+
+  //! Returns the target architecture.
+  inline Arch arch() const noexcept { return environment().arch(); }
+  //! Returns the target sub-architecture.
+  inline SubArch subArch() const noexcept { return environment().subArch(); }
+
+  //! Tests whether a static base-address is set.
+  inline bool hasBaseAddress() const noexcept { return _baseAddress != Globals::kNoBaseAddress; }
+  //! Returns a static base-address or \ref Globals::kNoBaseAddress, if not set.
+  inline uint64_t baseAddress() const noexcept { return _baseAddress; }
+
+  //! \}
+
+  //! \name Emitters
+  //! \{
+
+  //! Returns a vector of attached emitters.
+  inline const ZoneVector<BaseEmitter*>& emitters() const noexcept { return _emitters; }
+
+  //! \}
+
+  //! \name Logging
+  //! \{
+
+  //! Returns the attached logger.
+  inline Logger* logger() const noexcept { return _logger; }
+  //! Attaches a `logger` to CodeHolder and propagates it to all attached emitters.
+  ASMJIT_API void setLogger(Logger* logger) noexcept;
+  //! Resets the logger to none.
+  inline void resetLogger() noexcept { setLogger(nullptr); }
+
+  //! \name Error Handling
+  //! \{
+
+  //! Tests whether the CodeHolder has an attached error handler, see \ref ErrorHandler.
+  inline bool hasErrorHandler() const noexcept { return _errorHandler != nullptr; }
+  //! Returns the attached error handler.
+  inline ErrorHandler* errorHandler() const noexcept { return _errorHandler; }
+  //! Attach an error handler to this `CodeHolder`.
+  ASMJIT_API void setErrorHandler(ErrorHandler* errorHandler) noexcept;
+  //! Resets the error handler to none.
+  inline void resetErrorHandler() noexcept { setErrorHandler(nullptr); }
+
+  //! \}
+
+  //! \name Code Buffer
+  //! \{
+
+  //! Makes sure that at least `n` bytes can be added to CodeHolder's buffer `cb`.
+  //!
+  //! \note The buffer `cb` must be managed by `CodeHolder` - otherwise the behavior of the function is undefined.
+  ASMJIT_API Error growBuffer(CodeBuffer* cb, size_t n) noexcept;
+
+  //! Reserves the size of `cb` to at least `n` bytes.
+  //!
+  //! \note The buffer `cb` must be managed by `CodeHolder` - otherwise the behavior of the function is undefined.
+  ASMJIT_API Error reserveBuffer(CodeBuffer* cb, size_t n) noexcept;
+
+  //! \}
+
+  //! \name Sections
+  //! \{
+
+  //! Returns an array of `Section*` records.
+  inline const ZoneVector<Section*>& sections() const noexcept { return _sections; }
+  //! Returns an array of `Section*` records sorted according to section order first, then section id.
+  inline const ZoneVector<Section*>& sectionsByOrder() const noexcept { return _sectionsByOrder; }
+  //! Returns the number of sections.
+  inline uint32_t sectionCount() const noexcept { return _sections.size(); }
+
+  //! Tests whether the given `sectionId` is valid.
+  inline bool isSectionValid(uint32_t sectionId) const noexcept { return sectionId < _sections.size(); }
+
+  //! Creates a new section and return its pointer in `sectionOut`.
+  //!
+  //! Returns `Error`, does not report a possible error to `ErrorHandler`.
+  ASMJIT_API Error newSection(Section** sectionOut, const char* name, size_t nameSize = SIZE_MAX, SectionFlags flags = SectionFlags::kNone, uint32_t alignment = 1, int32_t order = 0) noexcept;
+
+  //! Returns a section entry of the given index.
+  inline Section* sectionById(uint32_t sectionId) const noexcept { return _sections[sectionId]; }
+
+  //! Returns section-id that matches the given `name`.
+  //!
+  //! If there is no such section `Section::kInvalidId` is returned.
+  ASMJIT_API Section* sectionByName(const char* name, size_t nameSize = SIZE_MAX) const noexcept;
+
+  //! Returns '.text' section (section that commonly represents code).
+  //!
+  //! \note Text section is always the first section in \ref CodeHolder::sections() array.
+  inline Section* textSection() const noexcept { return _sections[0]; }
+
+  //! Tests whether '.addrtab' section exists.
+  inline bool hasAddressTable() const noexcept { return _addressTableSection != nullptr; }
+
+  //! Returns '.addrtab' section.
+  //!
+  //! This section is used exclusively by AsmJit to store absolute 64-bit
+  //! addresses that cannot be encoded in instructions like 'jmp' or 'call'.
+  //!
+  //! \note This section is created on demand, the returned pointer can be null.
+  inline Section* addressTableSection() const noexcept { return _addressTableSection; }
+
+  //! Ensures that '.addrtab' section exists (creates it if it doesn't) and
+  //! returns it. Can return `nullptr` on out of memory condition.
+  ASMJIT_API Section* ensureAddressTableSection() noexcept;
+
+  //! Used to add an address to an address table.
+  //!
+  //! This implicitly calls `ensureAddressTableSection()` and then creates `AddressTableEntry` that is inserted
+  //! to `_addressTableEntries`. If the address already exists this operation does nothing as the same addresses
+  //! use the same slot.
+  //!
+  //! This function should be considered internal as it's used by assemblers to insert an absolute address into the
+  //! address table. Inserting address into address table without creating a particula relocation entry makes no sense.
+  ASMJIT_API Error addAddressToAddressTable(uint64_t address) noexcept;
+
+  //! \}
+
+  //! \name Labels & Symbols
+  //! \{
+
+  //! Returns array of `LabelEntry*` records.
+  inline const ZoneVector<LabelEntry*>& labelEntries() const noexcept { return _labelEntries; }
+
+  //! Returns number of labels created.
+  inline uint32_t labelCount() const noexcept { return _labelEntries.size(); }
+
+  //! Tests whether the label having `id` is valid (i.e. created by `newLabelEntry()`).
+  inline bool isLabelValid(uint32_t labelId) const noexcept {
+    return labelId < _labelEntries.size();
+  }
+
+  //! Tests whether the `label` is valid (i.e. created by `newLabelEntry()`).
+  inline bool isLabelValid(const Label& label) const noexcept {
+    return label.id() < _labelEntries.size();
+  }
+
+  //! \overload
+  inline bool isLabelBound(uint32_t labelId) const noexcept {
+    return isLabelValid(labelId) && _labelEntries[labelId]->isBound();
+  }
+
+  //! Tests whether the `label` is already bound.
+  //!
+  //! Returns `false` if the `label` is not valid.
+  inline bool isLabelBound(const Label& label) const noexcept {
+    return isLabelBound(label.id());
+  }
+
+  //! Returns LabelEntry of the given label `id`.
+  inline LabelEntry* labelEntry(uint32_t labelId) const noexcept {
+    return isLabelValid(labelId) ? _labelEntries[labelId] : static_cast<LabelEntry*>(nullptr);
+  }
+
+  //! Returns LabelEntry of the given `label`.
+  inline LabelEntry* labelEntry(const Label& label) const noexcept {
+    return labelEntry(label.id());
+  }
+
+  //! Returns offset of a `Label` by its `labelId`.
+  //!
+  //! The offset returned is relative to the start of the section. Zero offset is returned for unbound labels,
+  //! which is their initial offset value.
+  inline uint64_t labelOffset(uint32_t labelId) const noexcept {
+    ASMJIT_ASSERT(isLabelValid(labelId));
+    return _labelEntries[labelId]->offset();
+  }
+
+  //! \overload
+  inline uint64_t labelOffset(const Label& label) const noexcept {
+    return labelOffset(label.id());
+  }
+
+  //! Returns offset of a label by it's `labelId` relative to the base offset.
+  //!
+  //! \remarks The offset of the section where the label is bound must be valid in order to use this function,
+  //! otherwise the value returned will not be reliable.
+  inline uint64_t labelOffsetFromBase(uint32_t labelId) const noexcept {
+    ASMJIT_ASSERT(isLabelValid(labelId));
+    const LabelEntry* le = _labelEntries[labelId];
+    return (le->isBound() ? le->section()->offset() : uint64_t(0)) + le->offset();
+  }
+
+  //! \overload
+  inline uint64_t labelOffsetFromBase(const Label& label) const noexcept {
+    return labelOffsetFromBase(label.id());
+  }
+
+  //! Creates a new anonymous label and return its id in `idOut`.
+  //!
+  //! Returns `Error`, does not report error to `ErrorHandler`.
+  ASMJIT_API Error newLabelEntry(LabelEntry** entryOut) noexcept;
+
+  //! Creates a new named \ref LabelEntry of the given label `type`.
+  //!
+  //! \param entryOut Where to store the created \ref LabelEntry.
+  //! \param name The name of the label.
+  //! \param nameSize The length of `name` argument, or `SIZE_MAX` if `name` is a null terminated string, which
+  //!        means that the `CodeHolder` will use `strlen()` to determine the length.
+  //! \param type The type of the label to create, see \ref LabelType.
+  //! \param parentId Parent id of a local label, otherwise it must be \ref Globals::kInvalidId.
+  //! \retval Always returns \ref Error, does not report a possible error to the attached \ref ErrorHandler.
+  //!
+  //! AsmJit has a support for local labels (\ref LabelType::kLocal) which require a parent label id (parentId).
+  //! The names of local labels can conflict with names of other local labels that have a different parent. In
+  //! addition, AsmJit supports named anonymous labels, which are useful only for debugging purposes as the
+  //! anonymous name will have a name, which will be formatted, but the label itself cannot be queried by such
+  //! name.
+  ASMJIT_API Error newNamedLabelEntry(LabelEntry** entryOut, const char* name, size_t nameSize, LabelType type, uint32_t parentId = Globals::kInvalidId) noexcept;
+
+  //! Returns a label by name.
+  //!
+  //! If the named label doesn't a default constructed \ref Label is returned,
+  //! which has its id set to \ref Globals::kInvalidId.
+  inline Label labelByName(const char* name, size_t nameSize = SIZE_MAX, uint32_t parentId = Globals::kInvalidId) noexcept {
+    return Label(labelIdByName(name, nameSize, parentId));
+  }
+
+  //! Returns a label id by name.
+  //!
+  //! If the named label doesn't exist \ref Globals::kInvalidId is returned.
+  ASMJIT_API uint32_t labelIdByName(const char* name, size_t nameSize = SIZE_MAX, uint32_t parentId = Globals::kInvalidId) noexcept;
+
+  //! Tests whether there are any unresolved label links.
+  inline bool hasUnresolvedLinks() const noexcept { return _unresolvedLinkCount != 0; }
+  //! Returns the number of label links, which are unresolved.
+  inline size_t unresolvedLinkCount() const noexcept { return _unresolvedLinkCount; }
+
+  //! Creates a new label-link used to store information about yet unbound labels.
+  //!
+  //! Returns `null` if the allocation failed.
+  ASMJIT_API LabelLink* newLabelLink(LabelEntry* le, uint32_t sectionId, size_t offset, intptr_t rel, const OffsetFormat& format) noexcept;
+
+  //! Resolves cross-section links (`LabelLink`) associated with each label that was used as a destination in code
+  //! of a different section. It's only useful to people that use multiple sections as it will do nothing if the code
+  //! only contains a single section in which cross-section links are not possible.
+  ASMJIT_API Error resolveUnresolvedLinks() noexcept;
+
+  //! Binds a label to a given `sectionId` and `offset` (relative to start of the section).
+  //!
+  //! This function is generally used by `BaseAssembler::bind()` to do the heavy lifting.
+  ASMJIT_API Error bindLabel(const Label& label, uint32_t sectionId, uint64_t offset) noexcept;
+
+  //! \}
+
+  //! \name Relocations
+  //! \{
+
+  //! Tests whether the code contains relocation entries.
+  inline bool hasRelocEntries() const noexcept { return !_relocations.empty(); }
+  //! Returns array of `RelocEntry*` records.
+  inline const ZoneVector<RelocEntry*>& relocEntries() const noexcept { return _relocations; }
+
+  //! Returns a RelocEntry of the given `id`.
+  inline RelocEntry* relocEntry(uint32_t id) const noexcept { return _relocations[id]; }
+
+  //! Creates a new relocation entry of type `relocType`.
+  //!
+  //! Additional fields can be set after the relocation entry was created.
+  ASMJIT_API Error newRelocEntry(RelocEntry** dst, RelocType relocType) noexcept;
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Flattens all sections by recalculating their offsets, starting at 0.
+  //!
+  //! \note This should never be called more than once.
+  ASMJIT_API Error flatten() noexcept;
+
+  //! Returns computed the size of code & data of all sections.
+  //!
+  //! \note All sections will be iterated over and the code size returned would represent the minimum code size of
+  //! all combined sections after applying minimum alignment. Code size may decrease after calling `flatten()` and
+  //! `relocateToBase()`.
+  ASMJIT_API size_t codeSize() const noexcept;
+
+  //! Relocates the code to the given `baseAddress`.
+  //!
+  //! \param baseAddress Absolute base address where the code will be relocated to. Please note that nothing is
+  //! copied to such base address, it's just an absolute value used by the relocator to resolve all stored relocations.
+  //!
+  //! \note This should never be called more than once.
+  ASMJIT_API Error relocateToBase(uint64_t baseAddress) noexcept;
+
+  //! Copies a single section into `dst`.
+  ASMJIT_API Error copySectionData(void* dst, size_t dstSize, uint32_t sectionId, CopySectionFlags copyFlags = CopySectionFlags::kNone) noexcept;
+
+  //! Copies all sections into `dst`.
+  //!
+  //! This should only be used if the data was flattened and there are no gaps between the sections. The `dstSize`
+  //! is always checked and the copy will never write anything outside the provided buffer.
+  ASMJIT_API Error copyFlattenedData(void* dst, size_t dstSize, CopySectionFlags copyFlags = CopySectionFlags::kNone) noexcept;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_CODEHOLDER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/codewriter.cpp b/lib/lepton/asmjit/core/codewriter.cpp
new file mode 100644
index 0000000000..1babc5f172
--- /dev/null
+++ b/lib/lepton/asmjit/core/codewriter.cpp
@@ -0,0 +1,175 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/codeholder.h"
+#include "../core/codewriter_p.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+bool CodeWriterUtils::encodeOffset32(uint32_t* dst, int64_t offset64, const OffsetFormat& format) noexcept {
+  uint32_t bitCount = format.immBitCount();
+  uint32_t bitShift = format.immBitShift();
+  uint32_t discardLsb = format.immDiscardLsb();
+
+  // Invalid offset (should not happen).
+  if (!bitCount || bitCount > format.valueSize() * 8u)
+    return false;
+
+  uint32_t value;
+
+  // First handle all unsigned offset types.
+  if (format.type() == OffsetType::kUnsignedOffset) {
+    if (discardLsb) {
+      ASMJIT_ASSERT(discardLsb <= 32);
+      if ((offset64 & Support::lsbMask<uint32_t>(discardLsb)) != 0)
+        return false;
+      offset64 = int64_t(uint64_t(offset64) >> discardLsb);
+    }
+
+    value = uint32_t(offset64 & Support::lsbMask<uint32_t>(bitCount));
+    if (value != offset64)
+      return false;
+  }
+  else {
+    // The rest of OffsetType options are all signed.
+    if (discardLsb) {
+      ASMJIT_ASSERT(discardLsb <= 32);
+      if ((offset64 & Support::lsbMask<uint32_t>(discardLsb)) != 0)
+        return false;
+      offset64 >>= discardLsb;
+    }
+
+    if (!Support::isInt32(offset64))
+      return false;
+
+    value = uint32_t(int32_t(offset64));
+    if (!Support::isEncodableOffset32(int32_t(value), bitCount))
+      return false;
+  }
+
+  switch (format.type()) {
+    case OffsetType::kSignedOffset:
+    case OffsetType::kUnsignedOffset: {
+      *dst = (value & Support::lsbMask<uint32_t>(bitCount)) << bitShift;
+      return true;
+    }
+
+    case OffsetType::kAArch64_ADR:
+    case OffsetType::kAArch64_ADRP: {
+      // Sanity checks.
+      if (format.valueSize() != 4 || bitCount != 21 || bitShift != 5)
+        return false;
+
+      uint32_t immLo = value & 0x3u;
+      uint32_t immHi = (value >> 2) & Support::lsbMask<uint32_t>(19);
+
+      *dst = (immLo << 29) | (immHi << 5);
+      return true;
+    }
+
+    default:
+      return false;
+  }
+}
+
+bool CodeWriterUtils::encodeOffset64(uint64_t* dst, int64_t offset64, const OffsetFormat& format) noexcept {
+  uint32_t bitCount = format.immBitCount();
+  uint32_t discardLsb = format.immDiscardLsb();
+
+  if (!bitCount || bitCount > format.valueSize() * 8u)
+    return false;
+
+  uint64_t value;
+
+  // First handle all unsigned offset types.
+  if (format.type() == OffsetType::kUnsignedOffset) {
+    if (discardLsb) {
+      ASMJIT_ASSERT(discardLsb <= 32);
+      if ((offset64 & Support::lsbMask<uint32_t>(discardLsb)) != 0)
+        return false;
+      offset64 = int64_t(uint64_t(offset64) >> discardLsb);
+    }
+
+    value = uint64_t(offset64) & Support::lsbMask<uint64_t>(bitCount);
+    if (value != uint64_t(offset64))
+      return false;
+  }
+  else {
+    // The rest of OffsetType options are all signed.
+    if (discardLsb) {
+      ASMJIT_ASSERT(discardLsb <= 32);
+      if ((offset64 & Support::lsbMask<uint32_t>(discardLsb)) != 0)
+        return false;
+      offset64 >>= discardLsb;
+    }
+
+    if (!Support::isEncodableOffset64(offset64, bitCount))
+      return false;
+
+    value = uint64_t(offset64);
+  }
+
+  switch (format.type()) {
+    case OffsetType::kSignedOffset:
+    case OffsetType::kUnsignedOffset: {
+      *dst = (value & Support::lsbMask<uint64_t>(bitCount)) << format.immBitShift();
+      return true;
+    }
+
+    default:
+      return false;
+  }
+}
+
+bool CodeWriterUtils::writeOffset(void* dst, int64_t offset64, const OffsetFormat& format) noexcept {
+  // Offset the destination by ValueOffset so the `dst` points to the
+  // patched word instead of the beginning of the patched region.
+  dst = static_cast<char*>(dst) + format.valueOffset();
+
+  switch (format.valueSize()) {
+    case 1: {
+      uint32_t mask;
+      if (!encodeOffset32(&mask, offset64, format))
+        return false;
+
+      Support::writeU8(dst, uint8_t(Support::readU8(dst) | mask));
+      return true;
+    }
+
+    case 2: {
+      uint32_t mask;
+      if (!encodeOffset32(&mask, offset64, format))
+        return false;
+
+      Support::writeU16uLE(dst, uint16_t(Support::readU16uLE(dst) | mask));
+      return true;
+    }
+
+    case 4: {
+      uint32_t mask;
+      if (!encodeOffset32(&mask, offset64, format)) {
+        return false;
+      }
+
+      Support::writeU32uLE(dst, Support::readU32uLE(dst) | mask);
+      return true;
+    }
+
+    case 8: {
+      uint64_t mask;
+      if (!encodeOffset64(&mask, offset64, format))
+        return false;
+
+      Support::writeU64uLE(dst, Support::readU64uLE(dst) | mask);
+      return true;
+    }
+
+    default:
+      return false;
+  }
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/codewriter_p.h b/lib/lepton/asmjit/core/codewriter_p.h
new file mode 100644
index 0000000000..c799241490
--- /dev/null
+++ b/lib/lepton/asmjit/core/codewriter_p.h
@@ -0,0 +1,179 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_CODEBUFFERWRITER_P_H_INCLUDED
+#define ASMJIT_CORE_CODEBUFFERWRITER_P_H_INCLUDED
+
+#include "../core/assembler.h"
+#include "../core/codebuffer.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_assembler
+//! \{
+
+struct OffsetFormat;
+
+//! Helper that is used to write into a \ref CodeBuffer held by \ref BaseAssembler.
+class CodeWriter {
+public:
+  uint8_t* _cursor;
+
+  ASMJIT_FORCE_INLINE explicit CodeWriter(BaseAssembler* a) noexcept
+    : _cursor(a->_bufferPtr) {}
+
+  ASMJIT_FORCE_INLINE Error ensureSpace(BaseAssembler* a, size_t n) noexcept {
+    size_t remainingSpace = (size_t)(a->_bufferEnd - _cursor);
+    if (ASMJIT_UNLIKELY(remainingSpace < n)) {
+      CodeBuffer& buffer = a->_section->_buffer;
+      Error err = a->_code->growBuffer(&buffer, n);
+      if (ASMJIT_UNLIKELY(err))
+        return a->reportError(err);
+      _cursor = a->_bufferPtr;
+    }
+    return kErrorOk;
+  }
+
+  ASMJIT_FORCE_INLINE uint8_t* cursor() const noexcept { return _cursor; }
+  ASMJIT_FORCE_INLINE void setCursor(uint8_t* cursor) noexcept { _cursor = cursor; }
+  ASMJIT_FORCE_INLINE void advance(size_t n) noexcept { _cursor += n; }
+
+  ASMJIT_FORCE_INLINE size_t offsetFrom(uint8_t* from) const noexcept {
+    ASMJIT_ASSERT(_cursor >= from);
+    return (size_t)(_cursor - from);
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emit8(T val) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    _cursor[0] = uint8_t(U(val) & U(0xFF));
+    _cursor++;
+  }
+
+  template<typename T, typename Y>
+  ASMJIT_FORCE_INLINE void emit8If(T val, Y cond) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    ASMJIT_ASSERT(size_t(cond) <= 1u);
+
+    _cursor[0] = uint8_t(U(val) & U(0xFF));
+    _cursor += size_t(cond);
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emit16uLE(T val) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    Support::writeU16uLE(_cursor, uint16_t(U(val) & 0xFFFFu));
+    _cursor += 2;
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emit16uBE(T val) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    Support::writeU16uBE(_cursor, uint16_t(U(val) & 0xFFFFu));
+    _cursor += 2;
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emit32uLE(T val) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    Support::writeU32uLE(_cursor, uint32_t(U(val) & 0xFFFFFFFFu));
+    _cursor += 4;
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emit32uBE(T val) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    Support::writeU32uBE(_cursor, uint32_t(U(val) & 0xFFFFFFFFu));
+    _cursor += 4;
+  }
+
+  ASMJIT_FORCE_INLINE void emitData(const void* data, size_t size) noexcept {
+    ASMJIT_ASSERT(size != 0);
+    memcpy(_cursor, data, size);
+    _cursor += size;
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emitValueLE(const T& value, size_t size) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    ASMJIT_ASSERT(size <= sizeof(T));
+
+    U v = U(value);
+    for (uint32_t i = 0; i < size; i++) {
+      _cursor[i] = uint8_t(v & 0xFFu);
+      v >>= 8;
+    }
+    _cursor += size;
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void emitValueBE(const T& value, size_t size) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+    ASMJIT_ASSERT(size <= sizeof(T));
+
+    U v = U(value);
+    for (uint32_t i = 0; i < size; i++) {
+      _cursor[i] = uint8_t(v >> (sizeof(T) - 8));
+      v <<= 8;
+    }
+    _cursor += size;
+  }
+
+  ASMJIT_FORCE_INLINE void emitZeros(size_t size) noexcept {
+    ASMJIT_ASSERT(size != 0);
+    memset(_cursor, 0, size);
+    _cursor += size;
+  }
+
+  ASMJIT_FORCE_INLINE void remove8(uint8_t* where) noexcept {
+    ASMJIT_ASSERT(where < _cursor);
+
+    uint8_t* p = where;
+    while (++p != _cursor)
+      p[-1] = p[0];
+    _cursor--;
+  }
+
+  template<typename T>
+  ASMJIT_FORCE_INLINE void insert8(uint8_t* where, T val) noexcept {
+    uint8_t* p = _cursor;
+
+    while (p != where) {
+      p[0] = p[-1];
+      p--;
+    }
+
+    *p = uint8_t(val & 0xFF);
+    _cursor++;
+  }
+
+  ASMJIT_FORCE_INLINE void done(BaseAssembler* a) noexcept {
+    CodeBuffer& buffer = a->_section->_buffer;
+    size_t newSize = (size_t)(_cursor - a->_bufferData);
+    ASMJIT_ASSERT(newSize <= buffer.capacity());
+
+    a->_bufferPtr = _cursor;
+    buffer._size = Support::max(buffer._size, newSize);
+  }
+};
+
+//! Code writer utilities.
+namespace CodeWriterUtils {
+
+bool encodeOffset32(uint32_t* dst, int64_t offset64, const OffsetFormat& format) noexcept;
+bool encodeOffset64(uint64_t* dst, int64_t offset64, const OffsetFormat& format) noexcept;
+
+bool writeOffset(void* dst, int64_t offset64, const OffsetFormat& format) noexcept;
+
+} // {CodeWriterUtils}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_CODEBUFFERWRITER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/compiler.cpp b/lib/lepton/asmjit/core/compiler.cpp
new file mode 100644
index 0000000000..b1c6b803b2
--- /dev/null
+++ b/lib/lepton/asmjit/core/compiler.cpp
@@ -0,0 +1,582 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/assembler.h"
+#include "../core/compiler.h"
+#include "../core/cpuinfo.h"
+#include "../core/logger.h"
+#include "../core/rapass_p.h"
+#include "../core/rastack_p.h"
+#include "../core/support.h"
+#include "../core/type.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// GlobalConstPoolPass
+// ===================
+
+class GlobalConstPoolPass : public Pass {
+public:
+  typedef Pass Base;
+public:
+  ASMJIT_NONCOPYABLE(GlobalConstPoolPass)
+
+  GlobalConstPoolPass() noexcept : Pass("GlobalConstPoolPass") {}
+
+  Error run(Zone* zone, Logger* logger) override {
+    DebugUtils::unused(zone, logger);
+
+    // Flush the global constant pool.
+    BaseCompiler* compiler = static_cast<BaseCompiler*>(_cb);
+    ConstPoolNode* globalConstPool = compiler->_constPools[uint32_t(ConstPoolScope::kGlobal)];
+
+    if (globalConstPool) {
+      compiler->addAfter(globalConstPool, compiler->lastNode());
+      compiler->_constPools[uint32_t(ConstPoolScope::kGlobal)] = nullptr;
+    }
+
+    return kErrorOk;
+  }
+};
+
+// BaseCompiler - Construction & Destruction
+// =========================================
+
+BaseCompiler::BaseCompiler() noexcept
+  : BaseBuilder(),
+    _func(nullptr),
+    _vRegZone(4096 - Zone::kBlockOverhead),
+    _vRegArray(),
+    _constPools { nullptr, nullptr } {
+  _emitterType = EmitterType::kCompiler;
+  _validationFlags = ValidationFlags::kEnableVirtRegs;
+}
+BaseCompiler::~BaseCompiler() noexcept {}
+
+// BaseCompiler - Function Management
+// ==================================
+
+Error BaseCompiler::newFuncNode(FuncNode** out, const FuncSignature& signature) {
+  *out = nullptr;
+
+  // Create FuncNode together with all the required surrounding nodes.
+  FuncNode* funcNode;
+  ASMJIT_PROPAGATE(_newNodeT<FuncNode>(&funcNode));
+  ASMJIT_PROPAGATE(newLabelNode(&funcNode->_exitNode));
+  ASMJIT_PROPAGATE(_newNodeT<SentinelNode>(&funcNode->_end, SentinelType::kFuncEnd));
+
+  // Initialize the function's detail info.
+  Error err = funcNode->detail().init(signature, environment());
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  // If the Target guarantees greater stack alignment than required by the calling convention
+  // then override it as we can prevent having to perform dynamic stack alignment
+  uint32_t environmentStackAlignment = _environment.stackAlignment();
+
+  if (funcNode->_funcDetail._callConv.naturalStackAlignment() < environmentStackAlignment)
+    funcNode->_funcDetail._callConv.setNaturalStackAlignment(environmentStackAlignment);
+
+  // Initialize the function frame.
+  err = funcNode->_frame.init(funcNode->_funcDetail);
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  // Allocate space for function arguments.
+  funcNode->_args = nullptr;
+  if (funcNode->argCount() != 0) {
+    funcNode->_args = _allocator.allocT<FuncNode::ArgPack>(funcNode->argCount() * sizeof(FuncNode::ArgPack));
+    if (ASMJIT_UNLIKELY(!funcNode->_args))
+      return reportError(DebugUtils::errored(kErrorOutOfMemory));
+    memset(funcNode->_args, 0, funcNode->argCount() * sizeof(FuncNode::ArgPack));
+  }
+
+  ASMJIT_PROPAGATE(registerLabelNode(funcNode));
+
+  *out = funcNode;
+  return kErrorOk;
+}
+
+Error BaseCompiler::addFuncNode(FuncNode** out, const FuncSignature& signature) {
+  ASMJIT_PROPAGATE(newFuncNode(out, signature));
+  ASMJIT_ASSUME(*out != nullptr);
+
+  addFunc(*out);
+  return kErrorOk;
+}
+
+Error BaseCompiler::newFuncRetNode(FuncRetNode** out, const Operand_& o0, const Operand_& o1) {
+  uint32_t opCount = !o1.isNone() ? 2u : !o0.isNone() ? 1u : 0u;
+  FuncRetNode* node;
+
+  ASMJIT_PROPAGATE(_newNodeT<FuncRetNode>(&node));
+  ASMJIT_ASSUME(node != nullptr);
+
+  node->setOpCount(opCount);
+  node->setOp(0, o0);
+  node->setOp(1, o1);
+  node->resetOpRange(2, node->opCapacity());
+
+  *out = node;
+  return kErrorOk;
+}
+
+Error BaseCompiler::addFuncRetNode(FuncRetNode** out, const Operand_& o0, const Operand_& o1) {
+  ASMJIT_PROPAGATE(newFuncRetNode(out, o0, o1));
+  addNode(*out);
+  return kErrorOk;
+}
+
+FuncNode* BaseCompiler::addFunc(FuncNode* func) {
+  _func = func;
+
+  addNode(func);                 // Function node.
+  BaseNode* prev = cursor();     // {CURSOR}.
+  addNode(func->exitNode());     // Function exit label.
+  addNode(func->endNode());      // Function end sentinel.
+
+  _setCursor(prev);
+  return func;
+}
+
+Error BaseCompiler::endFunc() {
+  FuncNode* func = _func;
+
+  if (ASMJIT_UNLIKELY(!func))
+    return reportError(DebugUtils::errored(kErrorInvalidState));
+
+  // Add the local constant pool at the end of the function (if exists).
+  ConstPoolNode* localConstPool = _constPools[uint32_t(ConstPoolScope::kLocal)];
+  if (localConstPool) {
+    setCursor(func->endNode()->prev());
+    addNode(localConstPool);
+    _constPools[uint32_t(ConstPoolScope::kLocal)] = nullptr;
+  }
+
+  // Mark as finished.
+  _func = nullptr;
+
+  SentinelNode* end = func->endNode();
+  setCursor(end);
+
+  return kErrorOk;
+}
+
+// BaseCompiler - Function Invocation
+// ==================================
+
+Error BaseCompiler::newInvokeNode(InvokeNode** out, InstId instId, const Operand_& o0, const FuncSignature& signature) {
+  InvokeNode* node;
+  ASMJIT_PROPAGATE(_newNodeT<InvokeNode>(&node, instId, InstOptions::kNone));
+
+  node->setOpCount(1);
+  node->setOp(0, o0);
+  node->resetOpRange(1, node->opCapacity());
+
+  Error err = node->detail().init(signature, environment());
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  // Skip the allocation if there are no arguments.
+  uint32_t argCount = signature.argCount();
+  if (argCount) {
+    node->_args = static_cast<InvokeNode::OperandPack*>(_allocator.alloc(argCount * sizeof(InvokeNode::OperandPack)));
+    if (!node->_args)
+      return reportError(DebugUtils::errored(kErrorOutOfMemory));
+    memset(node->_args, 0, argCount * sizeof(InvokeNode::OperandPack));
+  }
+
+  *out = node;
+  return kErrorOk;
+}
+
+Error BaseCompiler::addInvokeNode(InvokeNode** out, InstId instId, const Operand_& o0, const FuncSignature& signature) {
+  ASMJIT_PROPAGATE(newInvokeNode(out, instId, o0, signature));
+  addNode(*out);
+  return kErrorOk;
+}
+
+// BaseCompiler - Virtual Registers
+// ================================
+
+static void BaseCompiler_assignGenericName(BaseCompiler* self, VirtReg* vReg) {
+  uint32_t index = unsigned(Operand::virtIdToIndex(vReg->_id));
+
+  char buf[64];
+  int size = snprintf(buf, ASMJIT_ARRAY_SIZE(buf), "%%%u", unsigned(index));
+
+  ASMJIT_ASSERT(size > 0 && size < int(ASMJIT_ARRAY_SIZE(buf)));
+  vReg->_name.setData(&self->_dataZone, buf, unsigned(size));
+}
+
+Error BaseCompiler::newVirtReg(VirtReg** out, TypeId typeId, OperandSignature signature, const char* name) {
+  *out = nullptr;
+  uint32_t index = _vRegArray.size();
+
+  if (ASMJIT_UNLIKELY(index >= uint32_t(Operand::kVirtIdCount)))
+    return reportError(DebugUtils::errored(kErrorTooManyVirtRegs));
+
+  if (ASMJIT_UNLIKELY(_vRegArray.willGrow(&_allocator) != kErrorOk))
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+  VirtReg* vReg = _vRegZone.allocZeroedT<VirtReg>();
+  if (ASMJIT_UNLIKELY(!vReg))
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+  uint32_t size = TypeUtils::sizeOf(typeId);
+  uint32_t alignment = Support::min<uint32_t>(size, 64);
+
+  vReg = new(vReg) VirtReg(signature, Operand::indexToVirtId(index), size, alignment, typeId);
+
+#ifndef ASMJIT_NO_LOGGING
+  if (name && name[0] != '\0')
+    vReg->_name.setData(&_dataZone, name, SIZE_MAX);
+  else
+    BaseCompiler_assignGenericName(this, vReg);
+#else
+  DebugUtils::unused(name);
+#endif
+
+  _vRegArray.appendUnsafe(vReg);
+  *out = vReg;
+
+  return kErrorOk;
+}
+
+Error BaseCompiler::_newReg(BaseReg* out, TypeId typeId, const char* name) {
+  OperandSignature regSignature;
+  out->reset();
+
+  Error err = ArchUtils::typeIdToRegSignature(arch(), typeId, &typeId, &regSignature);
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  VirtReg* vReg;
+  ASMJIT_PROPAGATE(newVirtReg(&vReg, typeId, regSignature, name));
+  ASMJIT_ASSUME(vReg != nullptr);
+
+  out->_initReg(regSignature, vReg->id());
+  return kErrorOk;
+}
+
+Error BaseCompiler::_newRegFmt(BaseReg* out, TypeId typeId, const char* fmt, ...) {
+  va_list ap;
+  StringTmp<256> sb;
+
+  va_start(ap, fmt);
+  sb.appendVFormat(fmt, ap);
+  va_end(ap);
+
+  return _newReg(out, typeId, sb.data());
+}
+
+Error BaseCompiler::_newReg(BaseReg* out, const BaseReg& ref, const char* name) {
+  out->reset();
+
+  OperandSignature regSignature;
+  TypeId typeId;
+
+  if (isVirtRegValid(ref)) {
+    VirtReg* vRef = virtRegByReg(ref);
+    typeId = vRef->typeId();
+
+    // NOTE: It's possible to cast one register type to another if it's the same register group. However, VirtReg
+    // always contains the TypeId that was used to create the register. This means that in some cases we may end
+    // up having different size of `ref` and `vRef`. In such case we adjust the TypeId to match the `ref` register
+    // type instead of the original register type, which should be the expected behavior.
+    uint32_t typeSize = TypeUtils::sizeOf(typeId);
+    uint32_t refSize = ref.size();
+
+    if (typeSize != refSize) {
+      if (TypeUtils::isInt(typeId)) {
+        // GP register - change TypeId to match `ref`, but keep sign of `vRef`.
+        switch (refSize) {
+          case  1: typeId = TypeId(uint32_t(TypeId::kInt8 ) | (uint32_t(typeId) & 1)); break;
+          case  2: typeId = TypeId(uint32_t(TypeId::kInt16) | (uint32_t(typeId) & 1)); break;
+          case  4: typeId = TypeId(uint32_t(TypeId::kInt32) | (uint32_t(typeId) & 1)); break;
+          case  8: typeId = TypeId(uint32_t(TypeId::kInt64) | (uint32_t(typeId) & 1)); break;
+          default: typeId = TypeId::kVoid; break;
+        }
+      }
+      else if (TypeUtils::isMmx(typeId)) {
+        // MMX register - always use 64-bit.
+        typeId = TypeId::kMmx64;
+      }
+      else if (TypeUtils::isMask(typeId)) {
+        // Mask register - change TypeId to match `ref` size.
+        switch (refSize) {
+          case  1: typeId = TypeId::kMask8; break;
+          case  2: typeId = TypeId::kMask16; break;
+          case  4: typeId = TypeId::kMask32; break;
+          case  8: typeId = TypeId::kMask64; break;
+          default: typeId = TypeId::kVoid; break;
+        }
+      }
+      else {
+        // Vector register - change TypeId to match `ref` size, keep vector metadata.
+        TypeId scalarTypeId = TypeUtils::scalarOf(typeId);
+        switch (refSize) {
+          case 16: typeId = TypeUtils::scalarToVector(scalarTypeId, TypeId::_kVec128Start); break;
+          case 32: typeId = TypeUtils::scalarToVector(scalarTypeId, TypeId::_kVec256Start); break;
+          case 64: typeId = TypeUtils::scalarToVector(scalarTypeId, TypeId::_kVec512Start); break;
+          default: typeId = TypeId::kVoid; break;
+        }
+      }
+
+      if (typeId == TypeId::kVoid)
+        return reportError(DebugUtils::errored(kErrorInvalidState));
+    }
+  }
+  else {
+    typeId = ArchTraits::byArch(arch()).regTypeToTypeId(ref.type());
+  }
+
+  Error err = ArchUtils::typeIdToRegSignature(arch(), typeId, &typeId, &regSignature);
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  VirtReg* vReg;
+  ASMJIT_PROPAGATE(newVirtReg(&vReg, typeId, regSignature, name));
+  ASMJIT_ASSUME(vReg != nullptr);
+
+  out->_initReg(regSignature, vReg->id());
+  return kErrorOk;
+}
+
+Error BaseCompiler::_newRegFmt(BaseReg* out, const BaseReg& ref, const char* fmt, ...) {
+  va_list ap;
+  StringTmp<256> sb;
+
+  va_start(ap, fmt);
+  sb.appendVFormat(fmt, ap);
+  va_end(ap);
+
+  return _newReg(out, ref, sb.data());
+}
+
+Error BaseCompiler::_newStack(BaseMem* out, uint32_t size, uint32_t alignment, const char* name) {
+  out->reset();
+
+  if (size == 0)
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (alignment == 0)
+    alignment = 1;
+
+  if (!Support::isPowerOf2(alignment))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (alignment > 64)
+    alignment = 64;
+
+  VirtReg* vReg;
+  ASMJIT_PROPAGATE(newVirtReg(&vReg, TypeId::kVoid, OperandSignature{0}, name));
+  ASMJIT_ASSUME(vReg != nullptr);
+
+  vReg->_virtSize = size;
+  vReg->_isStack = true;
+  vReg->_alignment = uint8_t(alignment);
+
+  // Set the memory operand to GPD/GPQ and its id to VirtReg.
+  *out = BaseMem(OperandSignature::fromOpType(OperandType::kMem) |
+                 OperandSignature::fromMemBaseType(_gpSignature.regType()) |
+                 OperandSignature::fromBits(OperandSignature::kMemRegHomeFlag),
+                 vReg->id(), 0, 0);
+  return kErrorOk;
+}
+
+Error BaseCompiler::setStackSize(uint32_t virtId, uint32_t newSize, uint32_t newAlignment) {
+  if (!isVirtIdValid(virtId))
+    return DebugUtils::errored(kErrorInvalidVirtId);
+
+  if (newAlignment && !Support::isPowerOf2(newAlignment))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (newAlignment > 64)
+    newAlignment = 64;
+
+  VirtReg* vReg = virtRegById(virtId);
+  if (newSize)
+    vReg->_virtSize = newSize;
+
+  if (newAlignment)
+    vReg->_alignment = uint8_t(newAlignment);
+
+  // This is required if the RAPass is already running. There is a chance that a stack-slot has been already
+  // allocated and in that case it has to be updated as well, otherwise we would allocate wrong amount of memory.
+  RAWorkReg* workReg = vReg->_workReg;
+  if (workReg && workReg->_stackSlot) {
+    workReg->_stackSlot->_size = vReg->_virtSize;
+    workReg->_stackSlot->_alignment = vReg->_alignment;
+  }
+
+  return kErrorOk;
+}
+
+Error BaseCompiler::_newConst(BaseMem* out, ConstPoolScope scope, const void* data, size_t size) {
+  out->reset();
+
+  if (uint32_t(scope) > 1)
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (!_constPools[uint32_t(scope)])
+    ASMJIT_PROPAGATE(newConstPoolNode(&_constPools[uint32_t(scope)]));
+
+  ConstPoolNode* pool = _constPools[uint32_t(scope)];
+  size_t off;
+  Error err = pool->add(data, size, off);
+
+  if (ASMJIT_UNLIKELY(err))
+    return reportError(err);
+
+  *out = BaseMem(OperandSignature::fromOpType(OperandType::kMem) |
+                 OperandSignature::fromMemBaseType(RegType::kLabelTag) |
+                 OperandSignature::fromSize(uint32_t(size)),
+                 pool->labelId(), 0, int32_t(off));
+  return kErrorOk;
+}
+
+void BaseCompiler::rename(const BaseReg& reg, const char* fmt, ...) {
+  if (!reg.isVirtReg()) return;
+
+  VirtReg* vReg = virtRegById(reg.id());
+  if (!vReg) return;
+
+  if (fmt && fmt[0] != '\0') {
+    char buf[128];
+    va_list ap;
+
+    va_start(ap, fmt);
+    vsnprintf(buf, ASMJIT_ARRAY_SIZE(buf), fmt, ap);
+    va_end(ap);
+
+    vReg->_name.setData(&_dataZone, buf, SIZE_MAX);
+  }
+  else {
+    BaseCompiler_assignGenericName(this, vReg);
+  }
+}
+
+// BaseCompiler - Jump Annotations
+// ===============================
+
+Error BaseCompiler::newJumpNode(JumpNode** out, InstId instId, InstOptions instOptions, const Operand_& o0, JumpAnnotation* annotation) {
+  JumpNode* node = _allocator.allocT<JumpNode>();
+  uint32_t opCount = 1;
+
+  *out = node;
+  if (ASMJIT_UNLIKELY(!node))
+    return reportError(DebugUtils::errored(kErrorOutOfMemory));
+
+  node = new(node) JumpNode(this, instId, instOptions, opCount, annotation);
+  node->setOp(0, o0);
+  node->resetOpRange(opCount, JumpNode::kBaseOpCapacity);
+
+  return kErrorOk;
+}
+
+Error BaseCompiler::emitAnnotatedJump(InstId instId, const Operand_& o0, JumpAnnotation* annotation) {
+  InstOptions options = instOptions() | forcedInstOptions();
+  RegOnly extra = extraReg();
+  const char* comment = inlineComment();
+
+  resetInstOptions();
+  resetInlineComment();
+  resetExtraReg();
+
+  JumpNode* node;
+  ASMJIT_PROPAGATE(newJumpNode(&node, instId, options, o0, annotation));
+
+  node->setExtraReg(extra);
+  if (comment)
+    node->setInlineComment(static_cast<char*>(_dataZone.dup(comment, strlen(comment), true)));
+
+  addNode(node);
+  return kErrorOk;
+}
+
+JumpAnnotation* BaseCompiler::newJumpAnnotation() {
+  if (_jumpAnnotations.grow(&_allocator, 1) != kErrorOk) {
+    reportError(DebugUtils::errored(kErrorOutOfMemory));
+    return nullptr;
+  }
+
+  uint32_t id = _jumpAnnotations.size();
+  JumpAnnotation* jumpAnnotation = _allocator.newT<JumpAnnotation>(this, id);
+
+  if (!jumpAnnotation) {
+    reportError(DebugUtils::errored(kErrorOutOfMemory));
+    return nullptr;
+  }
+
+  _jumpAnnotations.appendUnsafe(jumpAnnotation);
+  return jumpAnnotation;
+}
+
+// BaseCompiler - Events
+// =====================
+
+Error BaseCompiler::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+
+  const ArchTraits& archTraits = ArchTraits::byArch(code->arch());
+  RegType nativeRegType = Environment::is32Bit(code->arch()) ? RegType::kGp32 : RegType::kGp64;
+  _gpSignature = archTraits.regTypeToSignature(nativeRegType);
+
+  Error err = addPassT<GlobalConstPoolPass>();
+  if (ASMJIT_UNLIKELY(err)) {
+    onDetach(code);
+    return err;
+  }
+
+  return kErrorOk;
+}
+
+Error BaseCompiler::onDetach(CodeHolder* code) noexcept {
+  _func = nullptr;
+  _constPools[uint32_t(ConstPoolScope::kLocal)] = nullptr;
+  _constPools[uint32_t(ConstPoolScope::kGlobal)] = nullptr;
+
+  _vRegArray.reset();
+  _vRegZone.reset();
+
+  return Base::onDetach(code);
+}
+
+// FuncPass - Construction & Destruction
+// =====================================
+
+FuncPass::FuncPass(const char* name) noexcept
+  : Pass(name) {}
+
+// FuncPass - Run
+// ==============
+
+Error FuncPass::run(Zone* zone, Logger* logger) {
+  BaseNode* node = cb()->firstNode();
+  if (!node) return kErrorOk;
+
+  do {
+    if (node->type() == NodeType::kFunc) {
+      FuncNode* func = node->as<FuncNode>();
+      node = func->endNode();
+      ASMJIT_PROPAGATE(runOnFunction(zone, logger, func));
+    }
+
+    // Find a function by skipping all nodes that are not `NodeType::kFunc`.
+    do {
+      node = node->next();
+    } while (node && node->type() != NodeType::kFunc);
+  } while (node);
+
+  return kErrorOk;
+}
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/core/compiler.h b/lib/lepton/asmjit/core/compiler.h
new file mode 100644
index 0000000000..709fd952ac
--- /dev/null
+++ b/lib/lepton/asmjit/core/compiler.h
@@ -0,0 +1,737 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_COMPILER_H_INCLUDED
+#define ASMJIT_CORE_COMPILER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/assembler.h"
+#include "../core/builder.h"
+#include "../core/constpool.h"
+#include "../core/compilerdefs.h"
+#include "../core/func.h"
+#include "../core/inst.h"
+#include "../core/operand.h"
+#include "../core/support.h"
+#include "../core/zone.h"
+#include "../core/zonevector.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+class JumpAnnotation;
+class JumpNode;
+class FuncNode;
+class FuncRetNode;
+class InvokeNode;
+
+//! \addtogroup asmjit_compiler
+//! \{
+
+//! Code emitter that uses virtual registers and performs register allocation.
+//!
+//! Compiler is a high-level code-generation tool that provides register allocation and automatic handling of function
+//! calling conventions. It was primarily designed for merging multiple parts of code into a function without worrying
+//! about registers and function calling conventions.
+//!
+//! BaseCompiler can be used, with a minimum effort, to handle 32-bit and 64-bit code generation within a single code
+//! base.
+//!
+//! BaseCompiler is based on BaseBuilder and contains all the features it provides. It means that the code it stores
+//! can be modified (removed, added, injected) and analyzed. When the code is finalized the compiler can emit the code
+//! into an Assembler to translate the abstract representation into a machine code.
+//!
+//! Check out architecture specific compilers for more details and examples:
+//!
+//!   - \ref x86::Compiler - X86/X64 compiler implementation.
+class ASMJIT_VIRTAPI BaseCompiler : public BaseBuilder {
+public:
+  ASMJIT_NONCOPYABLE(BaseCompiler)
+  typedef BaseBuilder Base;
+
+  //! \name Members
+  //! \{
+
+  //! Current function.
+  FuncNode* _func;
+  //! Allocates `VirtReg` objects.
+  Zone _vRegZone;
+  //! Stores array of `VirtReg` pointers.
+  ZoneVector<VirtReg*> _vRegArray;
+  //! Stores jump annotations.
+  ZoneVector<JumpAnnotation*> _jumpAnnotations;
+
+  //! Local and global constant pools.
+  //!
+  //! Local constant pool is flushed with each function, global constant pool is flushed only by \ref finalize().
+  ConstPoolNode* _constPools[2];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `BaseCompiler` instance.
+  ASMJIT_API BaseCompiler() noexcept;
+  //! Destroys the `BaseCompiler` instance.
+  ASMJIT_API virtual ~BaseCompiler() noexcept;
+
+  //! \}
+
+  //! \name Function Management
+  //! \{
+
+  //! Creates a new \ref FuncNode.
+  ASMJIT_API Error newFuncNode(FuncNode** ASMJIT_NONNULL(out), const FuncSignature& signature);
+  //! Creates a new \ref FuncNode adds it to the instruction stream.
+  ASMJIT_API Error addFuncNode(FuncNode** ASMJIT_NONNULL(out), const FuncSignature& signature);
+
+  //! Creates a new \ref FuncRetNode.
+  ASMJIT_API Error newFuncRetNode(FuncRetNode** ASMJIT_NONNULL(out), const Operand_& o0, const Operand_& o1);
+  //! Creates a new \ref FuncRetNode and adds it to the instruction stream.
+  ASMJIT_API Error addFuncRetNode(FuncRetNode** ASMJIT_NONNULL(out), const Operand_& o0, const Operand_& o1);
+
+  //! Returns the current function.
+  inline FuncNode* func() const noexcept { return _func; }
+
+  //! Creates a new \ref FuncNode with the given `signature` and returns it.
+  inline FuncNode* newFunc(const FuncSignature& signature) {
+    FuncNode* node;
+    newFuncNode(&node, signature);
+    return node;
+  }
+
+  //! Creates a new \ref FuncNode with the given `signature`, adds it to the instruction stream by using
+  //! the \ref addFunc(FuncNode*) overload, and returns it.
+  inline FuncNode* addFunc(const FuncSignature& signature) {
+    FuncNode* node;
+    addFuncNode(&node, signature);
+    return node;
+  }
+
+  //! Adds a function `node` to the instruction stream.
+  ASMJIT_API FuncNode* addFunc(FuncNode* ASMJIT_NONNULL(func));
+  //! Emits a sentinel that marks the end of the current function.
+  ASMJIT_API Error endFunc();
+
+#if !defined(ASMJIT_NO_DEPRECATED)
+  inline Error _setArg(size_t argIndex, size_t valueIndex, const BaseReg& reg);
+
+  //! Sets a function argument at `argIndex` to `reg`.
+  ASMJIT_DEPRECATED("Setting arguments through Compiler is deprecated, use FuncNode->setArg() instead")
+  inline Error setArg(size_t argIndex, const BaseReg& reg) { return _setArg(argIndex, 0, reg); }
+
+  //! Sets a function argument at `argIndex` at `valueIndex` to `reg`.
+  ASMJIT_DEPRECATED("Setting arguments through Compiler is deprecated, use FuncNode->setArg() instead")
+  inline Error setArg(size_t argIndex, size_t valueIndex, const BaseReg& reg) { return _setArg(argIndex, valueIndex, reg); }
+#endif
+
+  inline Error addRet(const Operand_& o0, const Operand_& o1) {
+    FuncRetNode* node;
+    return addFuncRetNode(&node, o0, o1);
+  }
+
+  //! \}
+
+  //! \name Function Invocation
+  //! \{
+
+  //! Creates a new \ref InvokeNode.
+  ASMJIT_API Error newInvokeNode(InvokeNode** ASMJIT_NONNULL(out), InstId instId, const Operand_& o0, const FuncSignature& signature);
+  //! Creates a new \ref InvokeNode and adds it to the instruction stream.
+  ASMJIT_API Error addInvokeNode(InvokeNode** ASMJIT_NONNULL(out), InstId instId, const Operand_& o0, const FuncSignature& signature);
+
+  //! \}
+
+  //! \name Virtual Registers
+  //! \{
+
+  //! Creates a new virtual register representing the given `typeId` and `signature`.
+  //!
+  //! \note This function is public, but it's not generally recommended to be used by AsmJit users, use architecture
+  //! specific `newReg()` functionality instead or functions like \ref _newReg() and \ref _newRegFmt().
+  ASMJIT_API Error newVirtReg(VirtReg** ASMJIT_NONNULL(out), TypeId typeId, OperandSignature signature, const char* name);
+
+  //! Creates a new virtual register of the given `typeId` and stores it to `out` operand.
+  ASMJIT_API Error _newReg(BaseReg* ASMJIT_NONNULL(out), TypeId typeId, const char* name = nullptr);
+
+  //! Creates a new virtual register of the given `typeId` and stores it to `out` operand.
+  //!
+  //! \note This version accepts a snprintf() format `fmt` followed by a variadic arguments.
+  ASMJIT_API Error _newRegFmt(BaseReg* ASMJIT_NONNULL(out), TypeId typeId, const char* fmt, ...);
+
+  //! Creates a new virtual register compatible with the provided reference register `ref`.
+  ASMJIT_API Error _newReg(BaseReg* ASMJIT_NONNULL(out), const BaseReg& ref, const char* name = nullptr);
+
+  //! Creates a new virtual register compatible with the provided reference register `ref`.
+  //!
+  //! \note This version accepts a snprintf() format `fmt` followed by a variadic arguments.
+  ASMJIT_API Error _newRegFmt(BaseReg* ASMJIT_NONNULL(out), const BaseReg& ref, const char* fmt, ...);
+
+  //! Tests whether the given `id` is a valid virtual register id.
+  inline bool isVirtIdValid(uint32_t id) const noexcept {
+    uint32_t index = Operand::virtIdToIndex(id);
+    return index < _vRegArray.size();
+  }
+  //! Tests whether the given `reg` is a virtual register having a valid id.
+  inline bool isVirtRegValid(const BaseReg& reg) const noexcept {
+    return isVirtIdValid(reg.id());
+  }
+
+  //! Returns \ref VirtReg associated with the given `id`.
+  inline VirtReg* virtRegById(uint32_t id) const noexcept {
+    ASMJIT_ASSERT(isVirtIdValid(id));
+    return _vRegArray[Operand::virtIdToIndex(id)];
+  }
+
+  //! Returns \ref VirtReg associated with the given `reg`.
+  inline VirtReg* virtRegByReg(const BaseReg& reg) const noexcept { return virtRegById(reg.id()); }
+
+  //! Returns \ref VirtReg associated with the given virtual register `index`.
+  //!
+  //! \note This is not the same as virtual register id. The conversion between id and its index is implemented
+  //! by \ref Operand_::virtIdToIndex() and \ref Operand_::indexToVirtId() functions.
+  inline VirtReg* virtRegByIndex(uint32_t index) const noexcept { return _vRegArray[index]; }
+
+  //! Returns an array of all virtual registers managed by the Compiler.
+  inline const ZoneVector<VirtReg*>& virtRegs() const noexcept { return _vRegArray; }
+
+  //! \name Stack
+  //! \{
+
+  //! Creates a new stack of the given `size` and `alignment` and stores it to `out`.
+  //!
+  //! \note `name` can be used to give the stack a name, for debugging purposes.
+  ASMJIT_API Error _newStack(BaseMem* ASMJIT_NONNULL(out), uint32_t size, uint32_t alignment, const char* name = nullptr);
+
+  //! Updates the stack size of a stack created by `_newStack()` by its `virtId`.
+  ASMJIT_API Error setStackSize(uint32_t virtId, uint32_t newSize, uint32_t newAlignment = 0);
+
+  //! Updates the stack size of a stack created by `_newStack()`.
+  inline Error setStackSize(const BaseMem& mem, uint32_t newSize, uint32_t newAlignment = 0) {
+    return setStackSize(mem.id(), newSize, newAlignment);
+  }
+
+  //! \}
+
+  //! \name Constants
+  //! \{
+
+  //! Creates a new constant of the given `scope` (see \ref ConstPoolScope).
+  //!
+  //! This function adds a constant of the given `size` to the built-in \ref ConstPool and stores the reference to that
+  //! constant to the `out` operand.
+  ASMJIT_API Error _newConst(BaseMem* ASMJIT_NONNULL(out), ConstPoolScope scope, const void* data, size_t size);
+
+  //! \}
+
+  //! \name Miscellaneous
+  //! \{
+
+  //! Rename the given virtual register `reg` to a formatted string `fmt`.
+  ASMJIT_API void rename(const BaseReg& reg, const char* fmt, ...);
+
+  //! \}
+
+  //! \name Jump Annotations
+  //! \{
+
+  inline const ZoneVector<JumpAnnotation*>& jumpAnnotations() const noexcept {
+    return _jumpAnnotations;
+  }
+
+  ASMJIT_API Error newJumpNode(JumpNode** ASMJIT_NONNULL(out), InstId instId, InstOptions instOptions, const Operand_& o0, JumpAnnotation* annotation);
+  ASMJIT_API Error emitAnnotatedJump(InstId instId, const Operand_& o0, JumpAnnotation* annotation);
+
+  //! Returns a new `JumpAnnotation` instance, which can be used to aggregate possible targets of a jump where the
+  //! target is not a label, for example to implement jump tables.
+  ASMJIT_API JumpAnnotation* newJumpAnnotation();
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+};
+
+//! Jump annotation used to annotate jumps.
+//!
+//! \ref BaseCompiler allows to emit jumps where the target is either register or memory operand. Such jumps cannot be
+//! trivially inspected, so instead of doing heuristics AsmJit allows to annotate such jumps with possible targets.
+//! Register allocator then uses the annotation to construct control-flow, which is then used by liveness analysis and
+//! other tools to prepare ground for register allocation.
+class JumpAnnotation {
+public:
+  ASMJIT_NONCOPYABLE(JumpAnnotation)
+
+  //! \name Members
+  //! \{
+
+  //! Compiler that owns this JumpAnnotation.
+  BaseCompiler* _compiler;
+  //! Annotation identifier.
+  uint32_t _annotationId;
+  //! Vector of label identifiers, see \ref labelIds().
+  ZoneVector<uint32_t> _labelIds;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline JumpAnnotation(BaseCompiler* ASMJIT_NONNULL(compiler), uint32_t annotationId) noexcept
+    : _compiler(compiler),
+      _annotationId(annotationId) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the compiler that owns this JumpAnnotation.
+  inline BaseCompiler* compiler() const noexcept { return _compiler; }
+  //! Returns the annotation id.
+  inline uint32_t annotationId() const noexcept { return _annotationId; }
+  //! Returns a vector of label identifiers that lists all targets of the jump.
+  const ZoneVector<uint32_t>& labelIds() const noexcept { return _labelIds; }
+
+  //! Tests whether the given `label` is a target of this JumpAnnotation.
+  inline bool hasLabel(const Label& label) const noexcept { return hasLabelId(label.id()); }
+  //! Tests whether the given `labelId` is a target of this JumpAnnotation.
+  inline bool hasLabelId(uint32_t labelId) const noexcept { return _labelIds.contains(labelId); }
+
+  //! \}
+
+  //! \name Annotation Building API
+  //! \{
+
+  //! Adds the `label` to the list of targets of this JumpAnnotation.
+  inline Error addLabel(const Label& label) noexcept { return addLabelId(label.id()); }
+  //! Adds the `labelId` to the list of targets of this JumpAnnotation.
+  inline Error addLabelId(uint32_t labelId) noexcept { return _labelIds.append(&_compiler->_allocator, labelId); }
+
+  //! \}
+};
+
+//! Jump instruction with \ref JumpAnnotation.
+//!
+//! \note This node should be only used to represent jump where the jump target cannot be deduced by examining
+//! instruction operands. For example if the jump target is register or memory location. This pattern is often
+//! used to perform indirect jumps that use jump table, e.g. to implement `switch{}` statement.
+class JumpNode : public InstNode {
+public:
+  ASMJIT_NONCOPYABLE(JumpNode)
+
+  //! \name Members
+  //! \{
+
+  JumpAnnotation* _annotation;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline JumpNode(BaseCompiler* ASMJIT_NONNULL(cc), InstId instId, InstOptions options, uint32_t opCount, JumpAnnotation* annotation) noexcept
+    : InstNode(cc, instId, options, opCount, kBaseOpCapacity),
+      _annotation(annotation) {
+    setType(NodeType::kJump);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether this JumpNode has associated a \ref JumpAnnotation.
+  inline bool hasAnnotation() const noexcept { return _annotation != nullptr; }
+  //! Returns the \ref JumpAnnotation associated with this jump, or `nullptr`.
+  inline JumpAnnotation* annotation() const noexcept { return _annotation; }
+  //! Sets the \ref JumpAnnotation associated with this jump to `annotation`.
+  inline void setAnnotation(JumpAnnotation* annotation) noexcept { _annotation = annotation; }
+
+  //! \}
+};
+
+//! Function node represents a function used by \ref BaseCompiler.
+//!
+//! A function is composed of the following:
+//!
+//!   - Function entry, \ref FuncNode acts as a label, so the entry is implicit. To get the entry, simply use
+//!     \ref FuncNode::label(), which is the same as \ref LabelNode::label().
+//!
+//!   - Function exit, which is represented by \ref FuncNode::exitNode(). A helper function
+//!     \ref FuncNode::exitLabel() exists and returns an exit label instead of node.
+//!
+//!   - Function \ref FuncNode::endNode() sentinel. This node marks the end of a function - there should be no
+//!     code that belongs to the function after this node, but the Compiler doesn't enforce that at the moment.
+//!
+//!   - Function detail, see \ref FuncNode::detail().
+//!
+//!   - Function frame, see \ref FuncNode::frame().
+//!
+//!   - Function arguments mapped to virtual registers, see \ref FuncNode::argPacks().
+//!
+//! In a node list, the function and its body looks like the following:
+//!
+//! \code{.unparsed}
+//! [...]       - Anything before the function.
+//!
+//! [FuncNode]  - Entry point of the function, acts as a label as well.
+//!   <Prolog>  - Prolog inserted by the register allocator.
+//!   {...}     - Function body - user code basically.
+//! [ExitLabel] - Exit label
+//!   <Epilog>  - Epilog inserted by the register allocator.
+//!   <Return>  - Return inserted by the register allocator.
+//!   {...}     - Can contain data or user code (error handling, special cases, ...).
+//! [FuncEnd]   - End sentinel
+//!
+//! [...]       - Anything after the function.
+//! \endcode
+//!
+//! When a function is added to the instruction stream by \ref BaseCompiler::addFunc() it actually inserts 3 nodes
+//! (FuncNode, ExitLabel, and FuncEnd) and sets the current cursor to be FuncNode. When \ref BaseCompiler::endFunc()
+//! is called the cursor is set to FuncEnd. This guarantees that user can use ExitLabel as a marker after additional
+//! code or data can be placed, which is a common practice.
+class FuncNode : public LabelNode {
+public:
+  ASMJIT_NONCOPYABLE(FuncNode)
+
+  //! Arguments pack.
+  struct ArgPack {
+    RegOnly _data[Globals::kMaxValuePack];
+
+    inline void reset() noexcept {
+      for (size_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++)
+        _data[valueIndex].reset();
+    }
+
+    inline RegOnly& operator[](size_t valueIndex) noexcept { return _data[valueIndex]; }
+    inline const RegOnly& operator[](size_t valueIndex) const noexcept { return _data[valueIndex]; }
+  };
+
+  //! \name Members
+  //! \{
+
+  //! Function detail.
+  FuncDetail _funcDetail;
+  //! Function frame.
+  FuncFrame _frame;
+  //! Function exit label.
+  LabelNode* _exitNode;
+  //! Function end (sentinel).
+  SentinelNode* _end;
+  //! Argument packs.
+  ArgPack* _args;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `FuncNode` instance.
+  //!
+  //! Always use `BaseCompiler::addFunc()` to create a new `FuncNode`.
+  inline FuncNode(BaseBuilder* ASMJIT_NONNULL(cb)) noexcept
+    : LabelNode(cb),
+      _funcDetail(),
+      _frame(),
+      _exitNode(nullptr),
+      _end(nullptr),
+      _args(nullptr) {
+    setType(NodeType::kFunc);
+  }
+
+  //! \}
+
+  //! \{
+  //! \name Accessors
+
+  //! Returns function exit `LabelNode`.
+  inline LabelNode* exitNode() const noexcept { return _exitNode; }
+  //! Returns function exit label.
+  inline Label exitLabel() const noexcept { return _exitNode->label(); }
+
+  //! Returns "End of Func" sentinel node.
+  inline SentinelNode* endNode() const noexcept { return _end; }
+
+  //! Returns function detail.
+  inline FuncDetail& detail() noexcept { return _funcDetail; }
+  //! Returns function detail.
+  inline const FuncDetail& detail() const noexcept { return _funcDetail; }
+
+  //! Returns function frame.
+  inline FuncFrame& frame() noexcept { return _frame; }
+  //! Returns function frame.
+  inline const FuncFrame& frame() const noexcept { return _frame; }
+
+  //! Returns function attributes.
+  inline FuncAttributes attributes() const noexcept { return _frame.attributes(); }
+  //! Adds `attrs` to the function attributes.
+  inline void addAttributes(FuncAttributes attrs) noexcept { _frame.addAttributes(attrs); }
+
+  //! Returns arguments count.
+  inline uint32_t argCount() const noexcept { return _funcDetail.argCount(); }
+  //! Returns argument packs.
+  inline ArgPack* argPacks() const noexcept { return _args; }
+
+  //! Tests whether the function has a return value.
+  inline bool hasRet() const noexcept { return _funcDetail.hasRet(); }
+
+  //! Returns argument pack at `argIndex`.
+  inline ArgPack& argPack(size_t argIndex) const noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    return _args[argIndex];
+  }
+
+  //! Sets argument at `argIndex`.
+  inline void setArg(size_t argIndex, const BaseReg& vReg) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex][0].init(vReg);
+  }
+
+  //! \overload
+  inline void setArg(size_t argIndex, const RegOnly& vReg) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex][0].init(vReg);
+  }
+
+  //! Sets argument at `argIndex` and `valueIndex`.
+  inline void setArg(size_t argIndex, size_t valueIndex, const BaseReg& vReg) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex][valueIndex].init(vReg);
+  }
+
+  //! \overload
+  inline void setArg(size_t argIndex, size_t valueIndex, const RegOnly& vReg) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex][valueIndex].init(vReg);
+  }
+
+  //! Resets argument pack at `argIndex`.
+  inline void resetArg(size_t argIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex].reset();
+  }
+
+  //! Resets argument pack at `argIndex`.
+  inline void resetArg(size_t argIndex, size_t valueIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex][valueIndex].reset();
+  }
+
+  //! \}
+};
+
+//! Function return, used by \ref BaseCompiler.
+class FuncRetNode : public InstNode {
+public:
+  ASMJIT_NONCOPYABLE(FuncRetNode)
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `FuncRetNode` instance.
+  inline FuncRetNode(BaseBuilder* ASMJIT_NONNULL(cb)) noexcept : InstNode(cb, BaseInst::kIdAbstract, InstOptions::kNone, 0) {
+    _any._nodeType = NodeType::kFuncRet;
+  }
+
+  //! \}
+};
+
+//! Function invocation, used by \ref BaseCompiler.
+class InvokeNode : public InstNode {
+public:
+  ASMJIT_NONCOPYABLE(InvokeNode)
+
+  //! Operand pack provides multiple operands that can be associated with a single return value of function
+  //! argument. Sometims this is necessary to express an argument or return value that requires multiple
+  //! registers, for example 64-bit value in 32-bit mode or passing / returning homogeneous data structures.
+  struct OperandPack {
+    //! Operands.
+    Operand_ _data[Globals::kMaxValuePack];
+
+    //! Reset the pack by resetting all operands in the pack.
+    inline void reset() noexcept {
+      for (size_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++)
+        _data[valueIndex].reset();
+    }
+
+    //! Returns an operand at the given `valueIndex`.
+    inline Operand& operator[](size_t valueIndex) noexcept {
+      ASMJIT_ASSERT(valueIndex < Globals::kMaxValuePack);
+      return _data[valueIndex].as<Operand>();
+    }
+
+    //! Returns an operand at the given `valueIndex` (const).
+    const inline Operand& operator[](size_t valueIndex) const noexcept {
+      ASMJIT_ASSERT(valueIndex < Globals::kMaxValuePack);
+      return _data[valueIndex].as<Operand>();
+    }
+  };
+
+  //! \name Members
+  //! \{
+
+  //! Function detail.
+  FuncDetail _funcDetail;
+  //! Function return value(s).
+  OperandPack _rets;
+  //! Function arguments.
+  OperandPack* _args;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `InvokeNode` instance.
+  inline InvokeNode(BaseBuilder* ASMJIT_NONNULL(cb), InstId instId, InstOptions options) noexcept
+    : InstNode(cb, instId, options, kBaseOpCapacity),
+      _funcDetail(),
+      _args(nullptr) {
+    setType(NodeType::kInvoke);
+    _resetOps();
+    _rets.reset();
+    addFlags(NodeFlags::kIsRemovable);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Sets the function signature.
+  inline Error init(const FuncSignature& signature, const Environment& environment) noexcept {
+    return _funcDetail.init(signature, environment);
+  }
+
+  //! Returns the function detail.
+  inline FuncDetail& detail() noexcept { return _funcDetail; }
+  //! Returns the function detail.
+  inline const FuncDetail& detail() const noexcept { return _funcDetail; }
+
+  //! Returns the target operand.
+  inline Operand& target() noexcept { return _opArray[0].as<Operand>(); }
+  //! \overload
+  inline const Operand& target() const noexcept { return _opArray[0].as<Operand>(); }
+
+  //! Returns the number of function return values.
+  inline bool hasRet() const noexcept { return _funcDetail.hasRet(); }
+  //! Returns the number of function arguments.
+  inline uint32_t argCount() const noexcept { return _funcDetail.argCount(); }
+
+  //! Returns operand pack representing function return value(s).
+  inline OperandPack& retPack() noexcept { return _rets; }
+  //! Returns operand pack representing function return value(s).
+  inline const OperandPack& retPack() const noexcept { return _rets; }
+
+  //! Returns the return value at the given `valueIndex`.
+  inline Operand& ret(size_t valueIndex = 0) noexcept { return _rets[valueIndex]; }
+  //! \overload
+  inline const Operand& ret(size_t valueIndex = 0) const noexcept { return _rets[valueIndex]; }
+
+  //! Returns operand pack representing function return value(s).
+  inline OperandPack& argPack(size_t argIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    return _args[argIndex];
+  }
+  //! \overload
+  inline const OperandPack& argPack(size_t argIndex) const noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    return _args[argIndex];
+  }
+
+  //! Returns a function argument at the given `argIndex`.
+  inline Operand& arg(size_t argIndex, size_t valueIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    return _args[argIndex][valueIndex];
+  }
+  //! \overload
+  inline const Operand& arg(size_t argIndex, size_t valueIndex) const noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    return _args[argIndex][valueIndex];
+  }
+
+  //! Sets the function return value at `i` to `op`.
+  inline void _setRet(size_t valueIndex, const Operand_& op) noexcept { _rets[valueIndex] = op; }
+  //! Sets the function argument at `i` to `op`.
+  inline void _setArg(size_t argIndex, size_t valueIndex, const Operand_& op) noexcept {
+    ASMJIT_ASSERT(argIndex < argCount());
+    _args[argIndex][valueIndex] = op;
+  }
+
+  //! Sets the function return value at `valueIndex` to `reg`.
+  inline void setRet(size_t valueIndex, const BaseReg& reg) noexcept { _setRet(valueIndex, reg); }
+
+  //! Sets the first function argument in a value-pack at `argIndex` to `reg`.
+  inline void setArg(size_t argIndex, const BaseReg& reg) noexcept { _setArg(argIndex, 0, reg); }
+  //! Sets the first function argument in a value-pack at `argIndex` to `imm`.
+  inline void setArg(size_t argIndex, const Imm& imm) noexcept { _setArg(argIndex, 0, imm); }
+
+  //! Sets the function argument at `argIndex` and `valueIndex` to `reg`.
+  inline void setArg(size_t argIndex, size_t valueIndex, const BaseReg& reg) noexcept { _setArg(argIndex, valueIndex, reg); }
+  //! Sets the function argument at `argIndex` and `valueIndex` to `imm`.
+  inline void setArg(size_t argIndex, size_t valueIndex, const Imm& imm) noexcept { _setArg(argIndex, valueIndex, imm); }
+
+  //! \}
+};
+
+//! Function pass extends \ref Pass with \ref FuncPass::runOnFunction().
+class ASMJIT_VIRTAPI FuncPass : public Pass {
+public:
+  ASMJIT_NONCOPYABLE(FuncPass)
+  typedef Pass Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API FuncPass(const char* name) noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the associated `BaseCompiler`.
+  inline BaseCompiler* cc() const noexcept { return static_cast<BaseCompiler*>(_cb); }
+
+  //! \}
+
+  //! \name Pass Interface
+  //! \{
+
+  //! Calls `runOnFunction()` on each `FuncNode` node found.
+  ASMJIT_API Error run(Zone* zone, Logger* logger) override;
+
+  //! Called once per `FuncNode`.
+  virtual Error runOnFunction(Zone* zone, Logger* logger, FuncNode* func) = 0;
+
+  //! \}
+};
+
+#if !defined(ASMJIT_NO_DEPRECATED)
+inline Error BaseCompiler::_setArg(size_t argIndex, size_t valueIndex, const BaseReg& reg) {
+  FuncNode* func = _func;
+
+  if (ASMJIT_UNLIKELY(!func))
+    return reportError(DebugUtils::errored(kErrorInvalidState));
+
+  func->setArg(argIndex, valueIndex, reg);
+  return kErrorOk;
+}
+#endif
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_CORE_COMPILER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/compilerdefs.h b/lib/lepton/asmjit/core/compilerdefs.h
new file mode 100644
index 0000000000..1870e688bc
--- /dev/null
+++ b/lib/lepton/asmjit/core/compilerdefs.h
@@ -0,0 +1,173 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_COMPILERDEFS_H_INCLUDED
+#define ASMJIT_CORE_COMPILERDEFS_H_INCLUDED
+
+#include "../core/api-config.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+#include "../core/zonestring.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+class RAWorkReg;
+
+//! \addtogroup asmjit_compiler
+//! \{
+
+//! Virtual register data, managed by \ref BaseCompiler.
+class VirtReg {
+public:
+  ASMJIT_NONCOPYABLE(VirtReg)
+
+  //! \name Members
+  //! \{
+
+  //! Virtual register signature.
+  OperandSignature _signature {};
+  //! Virtual register id.
+  uint32_t _id = 0;
+  //! Virtual register size (can be smaller than `_signature._size`).
+  uint32_t _virtSize = 0;
+  //! Virtual register alignment (for spilling).
+  uint8_t _alignment = 0;
+  //! Type-id.
+  TypeId _typeId = TypeId::kVoid;
+  //! Virtual register weight for alloc/spill decisions.
+  uint8_t _weight = 1;
+  //! True if this is a fixed register, never reallocated.
+  uint8_t _isFixed : 1;
+  //! True if the virtual register is only used as a stack (never accessed as register).
+  uint8_t _isStack : 1;
+  //! True if this virtual register has assigned stack offset (can be only valid after register allocation pass).
+  uint8_t _hasStackSlot : 1;
+  uint8_t _reservedBits : 5;
+
+  //! Stack offset assigned by the register allocator relative to stack pointer (can be negative as well).
+  int32_t _stackOffset = 0;
+
+  //! Reserved for future use (padding).
+  uint32_t _reservedU32 = 0;
+
+  //! Virtual register name (user provided or automatically generated).
+  ZoneString<16> _name {};
+
+  // The following members are used exclusively by RAPass. They are initialized when the VirtReg is created to
+  // null pointers and then changed during RAPass execution. RAPass sets them back to NULL before it returns.
+
+  //! Reference to `RAWorkReg`, used during register allocation.
+  RAWorkReg* _workReg = nullptr;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline VirtReg(OperandSignature signature, uint32_t id, uint32_t virtSize, uint32_t alignment, TypeId typeId) noexcept
+    : _signature(signature),
+      _id(id),
+      _virtSize(virtSize),
+      _alignment(uint8_t(alignment)),
+      _typeId(typeId),
+      _isFixed(false),
+      _isStack(false),
+      _hasStackSlot(false),
+      _reservedBits(0),
+      _stackOffset(0),
+      _reservedU32(0) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the virtual register id.
+  inline uint32_t id() const noexcept { return _id; }
+
+  //! Returns the virtual register name.
+  inline const char* name() const noexcept { return _name.data(); }
+  //! Returns the size of the virtual register name.
+  inline uint32_t nameSize() const noexcept { return _name.size(); }
+
+  //! Returns a register signature of this virtual register.
+  inline OperandSignature signature() const noexcept { return _signature; }
+  //! Returns a virtual register type (maps to the physical register type as well).
+  inline RegType type() const noexcept { return _signature.regType(); }
+  //! Returns a virtual register group (maps to the physical register group as well).
+  inline RegGroup group() const noexcept { return _signature.regGroup(); }
+
+  //! Returns a real size of the register this virtual register maps to.
+  //!
+  //! For example if this is a 128-bit SIMD register used for a scalar single precision floating point value then
+  //! its virtSize would be 4, however, the `regSize` would still say 16 (128-bits), because it's the smallest size
+  //! of that register type.
+  inline uint32_t regSize() const noexcept { return _signature.size(); }
+
+  //! Returns the virtual register size.
+  //!
+  //! The virtual register size describes how many bytes the virtual register needs to store its content. It can be
+  //! smaller than the physical register size, see `regSize()`.
+  inline uint32_t virtSize() const noexcept { return _virtSize; }
+
+  //! Returns the virtual register alignment.
+  inline uint32_t alignment() const noexcept { return _alignment; }
+
+  //! Returns the virtual register type id.
+  inline TypeId typeId() const noexcept { return _typeId; }
+
+  //! Returns the virtual register weight - the register allocator can use it as explicit hint for alloc/spill
+  //! decisions.
+  inline uint32_t weight() const noexcept { return _weight; }
+  //! Sets the virtual register weight (0 to 255) - the register allocator can use it as explicit hint for
+  //! alloc/spill decisions and initial bin-packing.
+  inline void setWeight(uint32_t weight) noexcept { _weight = uint8_t(weight); }
+
+  //! Returns whether the virtual register is always allocated to a fixed physical register (and never reallocated).
+  //!
+  //! \note This is only used for special purposes and it's mostly internal.
+  inline bool isFixed() const noexcept { return bool(_isFixed); }
+
+  //! Tests whether the virtual register is in fact a stack that only uses the virtual register id.
+  //!
+  //! \note It's an error if a stack is accessed as a register.
+  inline bool isStack() const noexcept { return bool(_isStack); }
+
+  //! Tests whether this virtual register (or stack) has assigned a stack offset.
+  //!
+  //! If this is a virtual register that was never allocated on stack, it would return false, otherwise if
+  //! it's a virtual register that was spilled or explicitly allocated stack, the return value would be true.
+  inline bool hasStackSlot() const noexcept { return bool(_hasStackSlot); }
+
+  //! Assigns a stack offset of this virtual register to `stackOffset` and sets `_hasStackSlot` to true.
+  inline void assignStackSlot(int32_t stackOffset) noexcept {
+    _hasStackSlot = 1;
+    _stackOffset = stackOffset;
+  }
+
+  //! Returns a stack offset associated with a virtual register or explicit stack allocation.
+  //!
+  //! \note Always verify that the stack offset has been assigned by calling \ref hasStackSlot(). The return
+  //! value will be zero when the stack offset was not assigned.
+  inline int32_t stackOffset() const noexcept { return _stackOffset; }
+
+  //! Tests whether the virtual register has an associated `RAWorkReg` at the moment.
+  inline bool hasWorkReg() const noexcept { return _workReg != nullptr; }
+  //! Returns an associated RAWorkReg with this virtual register (only valid during register allocation).
+  inline RAWorkReg* workReg() const noexcept { return _workReg; }
+  //! Associates a RAWorkReg with this virtual register (used by register allocator).
+  inline void setWorkReg(RAWorkReg* workReg) noexcept { _workReg = workReg; }
+  //! Reset the RAWorkReg association (used by register allocator).
+  inline void resetWorkReg() noexcept { _workReg = nullptr; }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_COMPILERDEFS_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/core/constpool.cpp b/lib/lepton/asmjit/core/constpool.cpp
new file mode 100644
index 0000000000..ad5fe4f2fc
--- /dev/null
+++ b/lib/lepton/asmjit/core/constpool.cpp
@@ -0,0 +1,363 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/constpool.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// ConstPool - Construction & Destruction
+// ======================================
+
+ConstPool::ConstPool(Zone* zone) noexcept { reset(zone); }
+ConstPool::~ConstPool() noexcept {}
+
+// ConstPool - Reset
+// =================
+
+void ConstPool::reset(Zone* zone) noexcept {
+  _zone = zone;
+
+  size_t dataSize = 1;
+  for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_tree); i++) {
+    _tree[i].reset();
+    _tree[i].setDataSize(dataSize);
+    _gaps[i] = nullptr;
+    dataSize <<= 1;
+  }
+
+  _gapPool = nullptr;
+  _size = 0;
+  _alignment = 0;
+  _minItemSize = 0;
+}
+
+// ConstPool - Operations
+// ======================
+
+static inline ConstPool::Gap* ConstPool_allocGap(ConstPool* self) noexcept {
+  ConstPool::Gap* gap = self->_gapPool;
+  if (!gap)
+    return self->_zone->allocT<ConstPool::Gap>();
+
+  self->_gapPool = gap->_next;
+  return gap;
+}
+
+static inline void ConstPool_freeGap(ConstPool* self, ConstPool::Gap* gap) noexcept {
+  gap->_next = self->_gapPool;
+  self->_gapPool = gap;
+}
+
+static void ConstPool_addGap(ConstPool* self, size_t offset, size_t size) noexcept {
+  ASMJIT_ASSERT(size > 0);
+
+  while (size > 0) {
+    size_t gapIndex;
+    size_t gapSize;
+
+    if (size >= 32 && Support::isAligned<size_t>(offset, 32)) {
+      gapIndex = ConstPool::kIndex32;
+      gapSize = 32;
+    }
+    else if (size >= 16 && Support::isAligned<size_t>(offset, 16)) {
+      gapIndex = ConstPool::kIndex16;
+      gapSize = 16;
+    }
+    else if (size >= 8 && Support::isAligned<size_t>(offset, 8)) {
+      gapIndex = ConstPool::kIndex8;
+      gapSize = 8;
+    }
+    else if (size >= 4 && Support::isAligned<size_t>(offset, 4)) {
+      gapIndex = ConstPool::kIndex4;
+      gapSize = 4;
+    }
+    else if (size >= 2 && Support::isAligned<size_t>(offset, 2)) {
+      gapIndex = ConstPool::kIndex2;
+      gapSize = 2;
+    }
+    else {
+      gapIndex = ConstPool::kIndex1;
+      gapSize = 1;
+    }
+
+    // We don't have to check for errors here, if this failed nothing really happened (just the gap won't be
+    // visible) and it will fail again at place where the same check would generate `kErrorOutOfMemory` error.
+    ConstPool::Gap* gap = ConstPool_allocGap(self);
+    if (!gap)
+      return;
+
+    gap->_next = self->_gaps[gapIndex];
+    self->_gaps[gapIndex] = gap;
+
+    gap->_offset = offset;
+    gap->_size = gapSize;
+
+    offset += gapSize;
+    size -= gapSize;
+  }
+}
+
+Error ConstPool::add(const void* data, size_t size, size_t& dstOffset) noexcept {
+  size_t treeIndex;
+
+  if (size == 64)
+    treeIndex = kIndex64;
+  else if (size == 32)
+    treeIndex = kIndex32;
+  else if (size == 16)
+    treeIndex = kIndex16;
+  else if (size == 8)
+    treeIndex = kIndex8;
+  else if (size == 4)
+    treeIndex = kIndex4;
+  else if (size == 2)
+    treeIndex = kIndex2;
+  else if (size == 1)
+    treeIndex = kIndex1;
+  else
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  ConstPool::Node* node = _tree[treeIndex].get(data);
+  if (node) {
+    dstOffset = node->_offset;
+    return kErrorOk;
+  }
+
+  // Before incrementing the current offset try if there is a gap that can be used for the requested data.
+  size_t offset = ~size_t(0);
+  size_t gapIndex = treeIndex;
+
+  while (gapIndex != kIndexCount - 1) {
+    ConstPool::Gap* gap = _gaps[treeIndex];
+
+    // Check if there is a gap.
+    if (gap) {
+      size_t gapOffset = gap->_offset;
+      size_t gapSize = gap->_size;
+
+      // Destroy the gap for now.
+      _gaps[treeIndex] = gap->_next;
+      ConstPool_freeGap(this, gap);
+
+      offset = gapOffset;
+      ASMJIT_ASSERT(Support::isAligned<size_t>(offset, size));
+
+      gapSize -= size;
+      if (gapSize > 0)
+        ConstPool_addGap(this, gapOffset, gapSize);
+    }
+
+    gapIndex++;
+  }
+
+  if (offset == ~size_t(0)) {
+    // Get how many bytes have to be skipped so the address is aligned accordingly to the 'size'.
+    size_t diff = Support::alignUpDiff<size_t>(_size, size);
+
+    if (diff != 0) {
+      ConstPool_addGap(this, _size, diff);
+      _size += diff;
+    }
+
+    offset = _size;
+    _size += size;
+  }
+
+  // Add the initial node to the right index.
+  node = ConstPool::Tree::_newNode(_zone, data, size, offset, false);
+  if (ASMJIT_UNLIKELY(!node))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  _tree[treeIndex].insert(node);
+  _alignment = Support::max<size_t>(_alignment, size);
+
+  dstOffset = offset;
+
+  // Now create a bunch of shared constants that are based on the data pattern. We stop at size 4,
+  // it probably doesn't make sense to split constants down to 1 byte.
+  size_t pCount = 1;
+  size_t smallerSize = size;
+
+  while (smallerSize > 4) {
+    pCount <<= 1;
+    smallerSize >>= 1;
+
+    ASMJIT_ASSERT(treeIndex != 0);
+    treeIndex--;
+
+    const uint8_t* pData = static_cast<const uint8_t*>(data);
+    for (size_t i = 0; i < pCount; i++, pData += smallerSize) {
+      node = _tree[treeIndex].get(pData);
+      if (node) continue;
+
+      node = ConstPool::Tree::_newNode(_zone, pData, smallerSize, offset + (i * smallerSize), true);
+      _tree[treeIndex].insert(node);
+    }
+  }
+
+  if (_minItemSize == 0)
+    _minItemSize = size;
+  else
+    _minItemSize = Support::min(_minItemSize, size);
+
+  return kErrorOk;
+}
+
+// ConstPool - Reset
+// =================
+
+struct ConstPoolFill {
+  inline ConstPoolFill(uint8_t* dst, size_t dataSize) noexcept :
+    _dst(dst),
+    _dataSize(dataSize) {}
+
+  inline void operator()(const ConstPool::Node* node) noexcept {
+    if (!node->_shared)
+      memcpy(_dst + node->_offset, node->data(), _dataSize);
+  }
+
+  uint8_t* _dst;
+  size_t _dataSize;
+};
+
+void ConstPool::fill(void* dst) const noexcept {
+  // Clears possible gaps, asmjit should never emit garbage to the output.
+  memset(dst, 0, _size);
+
+  ConstPoolFill filler(static_cast<uint8_t*>(dst), 1);
+  for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_tree); i++) {
+    _tree[i].forEach(filler);
+    filler._dataSize <<= 1;
+  }
+}
+
+// ConstPool - Tests
+// =================
+
+#if defined(ASMJIT_TEST)
+UNIT(const_pool) {
+  Zone zone(32384 - Zone::kBlockOverhead);
+  ConstPool pool(&zone);
+
+  uint32_t i;
+  uint32_t kCount = BrokenAPI::hasArg("--quick") ? 1000 : 1000000;
+
+  INFO("Adding %u constants to the pool", kCount);
+  {
+    size_t prevOffset;
+    size_t curOffset;
+    uint64_t c = 0x0101010101010101u;
+
+    EXPECT(pool.add(&c, 8, prevOffset) == kErrorOk);
+    EXPECT(prevOffset == 0);
+
+    for (i = 1; i < kCount; i++) {
+      c++;
+      EXPECT(pool.add(&c, 8, curOffset) == kErrorOk);
+      EXPECT(prevOffset + 8 == curOffset);
+      EXPECT(pool.size() == (i + 1) * 8);
+      prevOffset = curOffset;
+    }
+
+    EXPECT(pool.alignment() == 8);
+  }
+
+  INFO("Retrieving %u constants from the pool", kCount);
+  {
+    uint64_t c = 0x0101010101010101u;
+
+    for (i = 0; i < kCount; i++) {
+      size_t offset;
+      EXPECT(pool.add(&c, 8, offset) == kErrorOk);
+      EXPECT(offset == i * 8);
+      c++;
+    }
+  }
+
+  INFO("Checking if the constants were split into 4-byte patterns");
+  {
+    uint32_t c = 0x01010101;
+    for (i = 0; i < kCount; i++) {
+      size_t offset;
+      EXPECT(pool.add(&c, 4, offset) == kErrorOk);
+      EXPECT(offset == i * 8);
+      c++;
+    }
+  }
+
+  INFO("Adding 2 byte constant to misalign the current offset");
+  {
+    uint16_t c = 0xFFFF;
+    size_t offset;
+
+    EXPECT(pool.add(&c, 2, offset) == kErrorOk);
+    EXPECT(offset == kCount * 8);
+    EXPECT(pool.alignment() == 8);
+  }
+
+  INFO("Adding 8 byte constant to check if pool gets aligned again");
+  {
+    uint64_t c = 0xFFFFFFFFFFFFFFFFu;
+    size_t offset;
+
+    EXPECT(pool.add(&c, 8, offset) == kErrorOk);
+    EXPECT(offset == kCount * 8 + 8);
+  }
+
+  INFO("Adding 2 byte constant to verify the gap is filled");
+  {
+    uint16_t c = 0xFFFE;
+    size_t offset;
+
+    EXPECT(pool.add(&c, 2, offset) == kErrorOk);
+    EXPECT(offset == kCount * 8 + 2);
+    EXPECT(pool.alignment() == 8);
+  }
+
+  INFO("Checking reset functionality");
+  {
+    pool.reset(&zone);
+    zone.reset();
+
+    EXPECT(pool.size() == 0);
+    EXPECT(pool.alignment() == 0);
+  }
+
+  INFO("Checking pool alignment when combined constants are added");
+  {
+    uint8_t bytes[32] = { 0 };
+    size_t offset;
+
+    pool.add(bytes, 1, offset);
+    EXPECT(pool.size() == 1);
+    EXPECT(pool.alignment() == 1);
+    EXPECT(offset == 0);
+
+    pool.add(bytes, 2, offset);
+    EXPECT(pool.size() == 4);
+    EXPECT(pool.alignment() == 2);
+    EXPECT(offset == 2);
+
+    pool.add(bytes, 4, offset);
+    EXPECT(pool.size() == 8);
+    EXPECT(pool.alignment() == 4);
+    EXPECT(offset == 4);
+
+    pool.add(bytes, 4, offset);
+    EXPECT(pool.size() == 8);
+    EXPECT(pool.alignment() == 4);
+    EXPECT(offset == 4);
+
+    pool.add(bytes, 32, offset);
+    EXPECT(pool.size() == 64);
+    EXPECT(pool.alignment() == 32);
+    EXPECT(offset == 32);
+  }
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/constpool.h b/lib/lepton/asmjit/core/constpool.h
new file mode 100644
index 0000000000..32b84b1065
--- /dev/null
+++ b/lib/lepton/asmjit/core/constpool.h
@@ -0,0 +1,250 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_CONSTPOOL_H_INCLUDED
+#define ASMJIT_CORE_CONSTPOOL_H_INCLUDED
+
+#include "../core/support.h"
+#include "../core/zone.h"
+#include "../core/zonetree.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_utilities
+//! \{
+
+//! Constant pool scope.
+enum class ConstPoolScope : uint32_t {
+  //! Local constant, always embedded right after the current function.
+  kLocal = 0,
+  //! Global constant, embedded at the end of the currently compiled code.
+  kGlobal = 1,
+
+  //! Maximum value of `ConstPoolScope`.
+  kMaxValue = kGlobal
+};
+
+//! Constant pool.
+class ConstPool {
+public:
+  ASMJIT_NONCOPYABLE(ConstPool)
+
+  //! \cond INTERNAL
+
+  //! Index of a given size in const-pool table.
+  enum Index : uint32_t {
+    kIndex1 = 0,
+    kIndex2 = 1,
+    kIndex4 = 2,
+    kIndex8 = 3,
+    kIndex16 = 4,
+    kIndex32 = 5,
+    kIndex64 = 6,
+    kIndexCount = 7
+  };
+
+  //! Zone-allocated const-pool gap created by two differently aligned constants.
+  struct Gap {
+    //! Pointer to the next gap
+    Gap* _next;
+    //! Offset of the gap.
+    size_t _offset;
+    //! Remaining bytes of the gap (basically a gap size).
+    size_t _size;
+  };
+
+  //! Zone-allocated const-pool node.
+  class Node : public ZoneTreeNodeT<Node> {
+  public:
+    ASMJIT_NONCOPYABLE(Node)
+
+    //! If this constant is shared with another.
+    uint32_t _shared : 1;
+    //! Data offset from the beginning of the pool.
+    uint32_t _offset;
+
+    inline Node(size_t offset, bool shared) noexcept
+      : ZoneTreeNodeT<Node>(),
+        _shared(shared),
+        _offset(uint32_t(offset)) {}
+
+    inline void* data() const noexcept {
+      return static_cast<void*>(const_cast<ConstPool::Node*>(this) + 1);
+    }
+  };
+
+  //! Data comparer used internally.
+  class Compare {
+  public:
+    size_t _dataSize;
+
+    inline Compare(size_t dataSize) noexcept
+      : _dataSize(dataSize) {}
+
+    inline int operator()(const Node& a, const Node& b) const noexcept {
+      return ::memcmp(a.data(), b.data(), _dataSize);
+    }
+
+    inline int operator()(const Node& a, const void* data) const noexcept {
+      return ::memcmp(a.data(), data, _dataSize);
+    }
+  };
+
+  //! Zone-allocated const-pool tree.
+  struct Tree {
+    //! RB tree.
+    ZoneTree<Node> _tree;
+    //! Size of the tree (number of nodes).
+    size_t _size;
+    //! Size of the data.
+    size_t _dataSize;
+
+    inline explicit Tree(size_t dataSize = 0) noexcept
+      : _tree(),
+        _size(0),
+        _dataSize(dataSize) {}
+
+    inline void reset() noexcept {
+      _tree.reset();
+      _size = 0;
+    }
+
+    inline bool empty() const noexcept { return _size == 0; }
+    inline size_t size() const noexcept { return _size; }
+
+    inline void setDataSize(size_t dataSize) noexcept {
+      ASMJIT_ASSERT(empty());
+      _dataSize = dataSize;
+    }
+
+    inline Node* get(const void* data) noexcept {
+      Compare cmp(_dataSize);
+      return _tree.get(data, cmp);
+    }
+
+    inline void insert(Node* node) noexcept {
+      Compare cmp(_dataSize);
+      _tree.insert(node, cmp);
+      _size++;
+    }
+
+    template<typename Visitor>
+    inline void forEach(Visitor& visitor) const noexcept {
+      Node* node = _tree.root();
+      if (!node) return;
+
+      Node* stack[Globals::kMaxTreeHeight];
+      size_t top = 0;
+
+      for (;;) {
+        Node* left = node->left();
+        if (left != nullptr) {
+          ASMJIT_ASSERT(top != Globals::kMaxTreeHeight);
+          stack[top++] = node;
+
+          node = left;
+          continue;
+        }
+
+        for (;;) {
+          visitor(node);
+          node = node->right();
+
+          if (node != nullptr)
+            break;
+
+          if (top == 0)
+            return;
+
+          node = stack[--top];
+        }
+      }
+    }
+
+    static inline Node* _newNode(Zone* zone, const void* data, size_t size, size_t offset, bool shared) noexcept {
+      Node* node = zone->allocT<Node>(sizeof(Node) + size);
+      if (ASMJIT_UNLIKELY(!node)) return nullptr;
+
+      node = new(node) Node(offset, shared);
+      memcpy(node->data(), data, size);
+      return node;
+    }
+  };
+
+  //! \endcond
+
+  //! \name Members
+  //! \{
+
+  //! Zone allocator.
+  Zone* _zone;
+  //! Tree per size.
+  Tree _tree[kIndexCount];
+  //! Gaps per size.
+  Gap* _gaps[kIndexCount];
+  //! Gaps pool
+  Gap* _gapPool;
+
+  //! Size of the pool (in bytes).
+  size_t _size;
+  //! Required pool alignment.
+  size_t _alignment;
+  //! Minimum item size in the pool.
+  size_t _minItemSize;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API ConstPool(Zone* zone) noexcept;
+  ASMJIT_API ~ConstPool() noexcept;
+
+  ASMJIT_API void reset(Zone* zone) noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the constant-pool is empty.
+  inline bool empty() const noexcept { return _size == 0; }
+  //! Returns the size of the constant-pool in bytes.
+  inline size_t size() const noexcept { return _size; }
+  //! Returns minimum alignment.
+  inline size_t alignment() const noexcept { return _alignment; }
+  //! Returns the minimum size of all items added to the constant pool.
+  inline size_t minItemSize() const noexcept { return _minItemSize; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Adds a constant to the constant pool.
+  //!
+  //! The constant must have known size, which is 1, 2, 4, 8, 16 or 32 bytes. The constant is added to the pool only
+  //! if it doesn't not exist, otherwise cached value is returned.
+  //!
+  //! AsmJit is able to subdivide added constants, so for example if you add 8-byte constant 0x1122334455667788 it
+  //! will create the following slots:
+  //!
+  //!   8-byte: 0x1122334455667788
+  //!   4-byte: 0x11223344, 0x55667788
+  //!
+  //! The reason is that when combining MMX/SSE/AVX code some patterns are used frequently. However, AsmJit is not
+  //! able to reallocate a constant that has been already added. For example if you try to add 4-byte constant and
+  //! then 8-byte constant having the same 4-byte pattern as the previous one, two independent slots will be used.
+  ASMJIT_API Error add(const void* data, size_t size, size_t& dstOffset) noexcept;
+
+  //! Fills the destination with the content of this constant pool.
+  ASMJIT_API void fill(void* dst) const noexcept;
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_CONSTPOOL_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/cpuinfo.cpp b/lib/lepton/asmjit/core/cpuinfo.cpp
new file mode 100644
index 0000000000..7bf7407f00
--- /dev/null
+++ b/lib/lepton/asmjit/core/cpuinfo.cpp
@@ -0,0 +1,1162 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/cpuinfo.h"
+#include "../core/support.h"
+
+#if !defined(_WIN32)
+  #include <errno.h>
+  #include <sys/utsname.h>
+  #include <unistd.h>
+#endif
+
+// Required by `getauxval()` on Linux.
+#if defined(__linux__)
+  #include <sys/auxv.h>
+#endif
+
+//! Required to detect CPU and features on Apple platforms.
+#if defined(__APPLE__)
+  #include <mach/machine.h>
+  #include <sys/types.h>
+  #include <sys/sysctl.h>
+#endif
+
+// Required by `__cpuidex()` and `_xgetbv()`.
+#if defined(_MSC_VER)
+  #include <intrin.h>
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+// CpuInfo - Detect - HW-Thread Count
+// ==================================
+
+#if defined(_WIN32)
+static inline uint32_t detectHWThreadCount() noexcept {
+  SYSTEM_INFO info;
+  ::GetSystemInfo(&info);
+  return info.dwNumberOfProcessors;
+}
+#elif defined(_SC_NPROCESSORS_ONLN)
+static inline uint32_t detectHWThreadCount() noexcept {
+  long res = ::sysconf(_SC_NPROCESSORS_ONLN);
+  return res <= 0 ? uint32_t(1) : uint32_t(res);
+}
+#else
+static inline uint32_t detectHWThreadCount() noexcept {
+  return 1;
+}
+#endif
+
+// CpuInfo - Detect - X86
+// ======================
+
+#if ASMJIT_ARCH_X86
+
+struct cpuid_t { uint32_t eax, ebx, ecx, edx; };
+struct xgetbv_t { uint32_t eax, edx; };
+
+// Executes `cpuid` instruction.
+static inline void cpuidQuery(cpuid_t* out, uint32_t inEax, uint32_t inEcx = 0) noexcept {
+#if defined(_MSC_VER)
+  __cpuidex(reinterpret_cast<int*>(out), inEax, inEcx);
+#elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 32
+  __asm__ __volatile__(
+    "mov %%ebx, %%edi\n"
+    "cpuid\n"
+    "xchg %%edi, %%ebx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
+#elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 64
+  __asm__ __volatile__(
+    "mov %%rbx, %%rdi\n"
+    "cpuid\n"
+    "xchg %%rdi, %%rbx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
+#else
+  #error "[asmjit] x86::cpuidQuery() - Unsupported compiler."
+#endif
+}
+
+// Executes 'xgetbv' instruction.
+static inline void xgetbvQuery(xgetbv_t* out, uint32_t inEcx) noexcept {
+#if defined(_MSC_VER)
+  uint64_t value = _xgetbv(inEcx);
+  out->eax = uint32_t(value & 0xFFFFFFFFu);
+  out->edx = uint32_t(value >> 32);
+#elif defined(__GNUC__)
+  uint32_t outEax;
+  uint32_t outEdx;
+
+  // Replaced, because the world is not perfect:
+  //   __asm__ __volatile__("xgetbv" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
+  __asm__ __volatile__(".byte 0x0F, 0x01, 0xD0" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
+
+  out->eax = outEax;
+  out->edx = outEdx;
+#else
+  out->eax = 0;
+  out->edx = 0;
+#endif
+}
+
+// Map a 12-byte vendor string returned by `cpuid` into a `CpuInfo::Vendor` ID.
+static inline void simplifyCpuVendor(CpuInfo& cpu, uint32_t d0, uint32_t d1, uint32_t d2) noexcept {
+  struct Vendor {
+    char normalized[8];
+    union { char text[12]; uint32_t d[3]; };
+  };
+
+  static const Vendor table[] = {
+    { { 'A', 'M', 'D'                     }, {{ 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' }} },
+    { { 'I', 'N', 'T', 'E', 'L'           }, {{ 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' }} },
+    { { 'V', 'I', 'A'                     }, {{ 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' }} },
+    { { 'V', 'I', 'A'                     }, {{ 'V', 'I', 'A',  0 , 'V', 'I', 'A',  0 , 'V', 'I', 'A',  0  }} },
+    { { 'U', 'N', 'K', 'N', 'O', 'W', 'N' }, {{ 0                                                          }} }
+  };
+
+  uint32_t i;
+  for (i = 0; i < ASMJIT_ARRAY_SIZE(table) - 1; i++)
+    if (table[i].d[0] == d0 && table[i].d[1] == d1 && table[i].d[2] == d2)
+      break;
+  memcpy(cpu._vendor.str, table[i].normalized, 8);
+}
+
+static ASMJIT_FAVOR_SIZE void simplifyCpuBrand(char* s) noexcept {
+  char* d = s;
+
+  char c = s[0];
+  char prev = 0;
+
+  // Used to always clear the current character to ensure that the result
+  // doesn't contain garbage after a new null terminator is placed at the end.
+  s[0] = '\0';
+
+  for (;;) {
+    if (!c)
+      break;
+
+    if (!(c == ' ' && (prev == '@' || s[1] == ' ' || s[1] == '@'))) {
+      *d++ = c;
+      prev = c;
+    }
+
+    c = *++s;
+    s[0] = '\0';
+  }
+
+  d[0] = '\0';
+}
+
+static ASMJIT_FAVOR_SIZE void detectX86Cpu(CpuInfo& cpu) noexcept {
+  using Support::bitTest;
+
+  cpuid_t regs;
+  xgetbv_t xcr0 { 0, 0 };
+  CpuFeatures::X86& features = cpu.features().x86();
+
+  cpu._wasDetected = true;
+  cpu._maxLogicalProcessors = 1;
+
+  // We are gonna execute CPUID, which was introduced by I486, so it's the requirement.
+  features.add(CpuFeatures::X86::kI486);
+
+  // CPUID EAX=0
+  // -----------
+
+  // Get vendor string/id.
+  cpuidQuery(&regs, 0x0);
+
+  uint32_t maxId = regs.eax;
+  uint32_t maxSubLeafId_0x7 = 0;
+
+  simplifyCpuVendor(cpu, regs.ebx, regs.edx, regs.ecx);
+
+  // CPUID EAX=1
+  // -----------
+
+  if (maxId >= 0x1) {
+    // Get feature flags in ECX/EDX and family/model in EAX.
+    cpuidQuery(&regs, 0x1);
+
+    // Fill family and model fields.
+    uint32_t modelId  = (regs.eax >> 4) & 0x0F;
+    uint32_t familyId = (regs.eax >> 8) & 0x0F;
+
+    // Use extended family and model fields.
+    if (familyId == 0x06u || familyId == 0x0Fu)
+      modelId += (((regs.eax >> 16) & 0x0Fu) << 4);
+
+    if (familyId == 0x0Fu)
+      familyId += ((regs.eax >> 20) & 0xFFu);
+
+    cpu._modelId              = modelId;
+    cpu._familyId             = familyId;
+    cpu._brandId              = ((regs.ebx      ) & 0xFF);
+    cpu._processorType        = ((regs.eax >> 12) & 0x03);
+    cpu._maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);
+    cpu._stepping             = ((regs.eax      ) & 0x0F);
+    cpu._cacheLineSize        = ((regs.ebx >>  8) & 0xFF) * 8;
+
+    features.addIf(bitTest(regs.ecx,  0), CpuFeatures::X86::kSSE3);
+    features.addIf(bitTest(regs.ecx,  1), CpuFeatures::X86::kPCLMULQDQ);
+    features.addIf(bitTest(regs.ecx,  3), CpuFeatures::X86::kMONITOR);
+    features.addIf(bitTest(regs.ecx,  5), CpuFeatures::X86::kVMX);
+    features.addIf(bitTest(regs.ecx,  6), CpuFeatures::X86::kSMX);
+    features.addIf(bitTest(regs.ecx,  9), CpuFeatures::X86::kSSSE3);
+    features.addIf(bitTest(regs.ecx, 13), CpuFeatures::X86::kCMPXCHG16B);
+    features.addIf(bitTest(regs.ecx, 19), CpuFeatures::X86::kSSE4_1);
+    features.addIf(bitTest(regs.ecx, 20), CpuFeatures::X86::kSSE4_2);
+    features.addIf(bitTest(regs.ecx, 22), CpuFeatures::X86::kMOVBE);
+    features.addIf(bitTest(regs.ecx, 23), CpuFeatures::X86::kPOPCNT);
+    features.addIf(bitTest(regs.ecx, 25), CpuFeatures::X86::kAESNI);
+    features.addIf(bitTest(regs.ecx, 26), CpuFeatures::X86::kXSAVE);
+    features.addIf(bitTest(regs.ecx, 27), CpuFeatures::X86::kOSXSAVE);
+    features.addIf(bitTest(regs.ecx, 30), CpuFeatures::X86::kRDRAND);
+    features.addIf(bitTest(regs.edx,  0), CpuFeatures::X86::kFPU);
+    features.addIf(bitTest(regs.edx,  4), CpuFeatures::X86::kRDTSC);
+    features.addIf(bitTest(regs.edx,  5), CpuFeatures::X86::kMSR);
+    features.addIf(bitTest(regs.edx,  8), CpuFeatures::X86::kCMPXCHG8B);
+    features.addIf(bitTest(regs.edx, 15), CpuFeatures::X86::kCMOV);
+    features.addIf(bitTest(regs.edx, 19), CpuFeatures::X86::kCLFLUSH);
+    features.addIf(bitTest(regs.edx, 23), CpuFeatures::X86::kMMX);
+    features.addIf(bitTest(regs.edx, 24), CpuFeatures::X86::kFXSR);
+    features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kSSE);
+    features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kSSE, CpuFeatures::X86::kSSE2);
+    features.addIf(bitTest(regs.edx, 28), CpuFeatures::X86::kMT);
+
+    // Get the content of XCR0 if supported by the CPU and enabled by the OS.
+    if (features.hasXSAVE() && features.hasOSXSAVE()) {
+      xgetbvQuery(&xcr0, 0);
+    }
+
+    // Detect AVX+.
+    if (bitTest(regs.ecx, 28)) {
+      // - XCR0[2:1] == 11b
+      //   XMM & YMM states need to be enabled by OS.
+      if ((xcr0.eax & 0x00000006u) == 0x00000006u) {
+        features.add(CpuFeatures::X86::kAVX);
+        features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kFMA);
+        features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kF16C);
+      }
+    }
+  }
+
+  constexpr uint32_t kXCR0_AMX_Bits = 0x3u << 17;
+  bool amxEnabledByOS = (xcr0.eax & kXCR0_AMX_Bits) == kXCR0_AMX_Bits;
+
+#if defined(__APPLE__)
+  // Apple platform provides on-demand AVX512 support. When an AVX512 instruction is used the first time it results
+  // in #UD, which would cause the thread being promoted to use AVX512 support by the OS in addition to enabling the
+  // necessary bits in XCR0 register.
+  bool avx512EnabledByOS = true;
+#else
+  // - XCR0[2:1] ==  11b - XMM/YMM states need to be enabled by OS.
+  // - XCR0[7:5] == 111b - Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by OS.
+  constexpr uint32_t kXCR0_AVX512_Bits = (0x3u << 1) | (0x7u << 5);
+  bool avx512EnabledByOS = (xcr0.eax & kXCR0_AVX512_Bits) == kXCR0_AVX512_Bits;
+#endif
+
+  // CPUID EAX=7 ECX=0
+  // -----------------
+
+  // Detect new features if the processor supports CPUID-07.
+  bool maybeMPX = false;
+
+  if (maxId >= 0x7) {
+    cpuidQuery(&regs, 0x7);
+
+    maybeMPX = bitTest(regs.ebx, 14);
+    maxSubLeafId_0x7 = regs.eax;
+
+    features.addIf(bitTest(regs.ebx,  0), CpuFeatures::X86::kFSGSBASE);
+    features.addIf(bitTest(regs.ebx,  3), CpuFeatures::X86::kBMI);
+    features.addIf(bitTest(regs.ebx,  4), CpuFeatures::X86::kHLE);
+    features.addIf(bitTest(regs.ebx,  7), CpuFeatures::X86::kSMEP);
+    features.addIf(bitTest(regs.ebx,  8), CpuFeatures::X86::kBMI2);
+    features.addIf(bitTest(regs.ebx,  9), CpuFeatures::X86::kERMS);
+    features.addIf(bitTest(regs.ebx, 11), CpuFeatures::X86::kRTM);
+    features.addIf(bitTest(regs.ebx, 18), CpuFeatures::X86::kRDSEED);
+    features.addIf(bitTest(regs.ebx, 19), CpuFeatures::X86::kADX);
+    features.addIf(bitTest(regs.ebx, 20), CpuFeatures::X86::kSMAP);
+    features.addIf(bitTest(regs.ebx, 23), CpuFeatures::X86::kCLFLUSHOPT);
+    features.addIf(bitTest(regs.ebx, 24), CpuFeatures::X86::kCLWB);
+    features.addIf(bitTest(regs.ebx, 29), CpuFeatures::X86::kSHA);
+    features.addIf(bitTest(regs.ecx,  0), CpuFeatures::X86::kPREFETCHWT1);
+    features.addIf(bitTest(regs.ecx,  4), CpuFeatures::X86::kOSPKE);
+    features.addIf(bitTest(regs.ecx,  5), CpuFeatures::X86::kWAITPKG);
+    features.addIf(bitTest(regs.ecx,  7), CpuFeatures::X86::kCET_SS);
+    features.addIf(bitTest(regs.ecx,  8), CpuFeatures::X86::kGFNI);
+    features.addIf(bitTest(regs.ecx,  9), CpuFeatures::X86::kVAES);
+    features.addIf(bitTest(regs.ecx, 10), CpuFeatures::X86::kVPCLMULQDQ);
+    features.addIf(bitTest(regs.ecx, 22), CpuFeatures::X86::kRDPID);
+    features.addIf(bitTest(regs.ecx, 25), CpuFeatures::X86::kCLDEMOTE);
+    features.addIf(bitTest(regs.ecx, 27), CpuFeatures::X86::kMOVDIRI);
+    features.addIf(bitTest(regs.ecx, 28), CpuFeatures::X86::kMOVDIR64B);
+    features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kENQCMD);
+    features.addIf(bitTest(regs.edx,  5), CpuFeatures::X86::kUINTR);
+    features.addIf(bitTest(regs.edx, 14), CpuFeatures::X86::kSERIALIZE);
+    features.addIf(bitTest(regs.edx, 16), CpuFeatures::X86::kTSXLDTRK);
+    features.addIf(bitTest(regs.edx, 18), CpuFeatures::X86::kPCONFIG);
+    features.addIf(bitTest(regs.edx, 20), CpuFeatures::X86::kCET_IBT);
+
+    // Detect 'TSX' - Requires at least one of `HLE` and `RTM` features.
+    if (features.hasHLE() || features.hasRTM())
+      features.add(CpuFeatures::X86::kTSX);
+
+    // Detect 'AVX2' - Requires AVX as well.
+    if (bitTest(regs.ebx, 5) && features.hasAVX())
+      features.add(CpuFeatures::X86::kAVX2);
+
+    // Detect 'AVX512'.
+    if (avx512EnabledByOS && bitTest(regs.ebx, 16)) {
+      features.add(CpuFeatures::X86::kAVX512_F);
+
+      features.addIf(bitTest(regs.ebx, 17), CpuFeatures::X86::kAVX512_DQ);
+      features.addIf(bitTest(regs.ebx, 21), CpuFeatures::X86::kAVX512_IFMA);
+      features.addIf(bitTest(regs.ebx, 26), CpuFeatures::X86::kAVX512_PFI);
+      features.addIf(bitTest(regs.ebx, 27), CpuFeatures::X86::kAVX512_ERI);
+      features.addIf(bitTest(regs.ebx, 28), CpuFeatures::X86::kAVX512_CDI);
+      features.addIf(bitTest(regs.ebx, 30), CpuFeatures::X86::kAVX512_BW);
+      features.addIf(bitTest(regs.ebx, 31), CpuFeatures::X86::kAVX512_VL);
+      features.addIf(bitTest(regs.ecx,  1), CpuFeatures::X86::kAVX512_VBMI);
+      features.addIf(bitTest(regs.ecx,  6), CpuFeatures::X86::kAVX512_VBMI2);
+      features.addIf(bitTest(regs.ecx, 11), CpuFeatures::X86::kAVX512_VNNI);
+      features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kAVX512_BITALG);
+      features.addIf(bitTest(regs.ecx, 14), CpuFeatures::X86::kAVX512_VPOPCNTDQ);
+      features.addIf(bitTest(regs.edx,  2), CpuFeatures::X86::kAVX512_4VNNIW);
+      features.addIf(bitTest(regs.edx,  3), CpuFeatures::X86::kAVX512_4FMAPS);
+      features.addIf(bitTest(regs.edx,  8), CpuFeatures::X86::kAVX512_VP2INTERSECT);
+      features.addIf(bitTest(regs.edx, 23), CpuFeatures::X86::kAVX512_FP16);
+    }
+
+    // Detect 'AMX'.
+    if (amxEnabledByOS) {
+      features.addIf(bitTest(regs.edx, 22), CpuFeatures::X86::kAMX_BF16);
+      features.addIf(bitTest(regs.edx, 24), CpuFeatures::X86::kAMX_TILE);
+      features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kAMX_INT8);
+    }
+  }
+
+  // CPUID EAX=7 ECX=1
+  // -----------------
+
+  if (features.hasAVX512_F() && maxSubLeafId_0x7 >= 1) {
+    cpuidQuery(&regs, 0x7, 1);
+
+    features.addIf(bitTest(regs.eax,  3), CpuFeatures::X86::kAVX_VNNI);
+    features.addIf(bitTest(regs.eax,  5), CpuFeatures::X86::kAVX512_BF16);
+    features.addIf(bitTest(regs.eax, 22), CpuFeatures::X86::kHRESET);
+  }
+
+  // CPUID EAX=13 ECX=0
+  // ------------------
+
+  if (maxId >= 0xD) {
+    cpuidQuery(&regs, 0xD, 0);
+
+    // Both CPUID result and XCR0 has to be enabled to have support for MPX.
+    if (((regs.eax & xcr0.eax) & 0x00000018u) == 0x00000018u && maybeMPX)
+      features.add(CpuFeatures::X86::kMPX);
+
+    cpuidQuery(&regs, 0xD, 1);
+
+    features.addIf(bitTest(regs.eax, 0), CpuFeatures::X86::kXSAVEOPT);
+    features.addIf(bitTest(regs.eax, 1), CpuFeatures::X86::kXSAVEC);
+    features.addIf(bitTest(regs.eax, 3), CpuFeatures::X86::kXSAVES);
+  }
+
+  // CPUID EAX=14 ECX=0
+  // ------------------
+
+  if (maxId >= 0xE) {
+    cpuidQuery(&regs, 0xE, 0);
+
+    features.addIf(bitTest(regs.ebx, 4), CpuFeatures::X86::kPTWRITE);
+  }
+
+  // CPUID EAX=0x80000000...maxId
+  // ----------------------------
+
+  maxId = 0x80000000u;
+  uint32_t i = maxId;
+
+  // The highest EAX that we understand.
+  constexpr uint32_t kHighestProcessedEAX = 0x8000001Fu;
+
+  // Several CPUID calls are required to get the whole branc string. It's easier
+  // to copy one DWORD at a time instead of copying the string a byte by byte.
+  uint32_t* brand = cpu._brand.u32;
+  do {
+    cpuidQuery(&regs, i);
+    switch (i) {
+      case 0x80000000u:
+        maxId = Support::min<uint32_t>(regs.eax, kHighestProcessedEAX);
+        break;
+
+      case 0x80000001u:
+        features.addIf(bitTest(regs.ecx,  0), CpuFeatures::X86::kLAHFSAHF);
+        features.addIf(bitTest(regs.ecx,  2), CpuFeatures::X86::kSVM);
+        features.addIf(bitTest(regs.ecx,  5), CpuFeatures::X86::kLZCNT);
+        features.addIf(bitTest(regs.ecx,  6), CpuFeatures::X86::kSSE4A);
+        features.addIf(bitTest(regs.ecx,  7), CpuFeatures::X86::kMSSE);
+        features.addIf(bitTest(regs.ecx,  8), CpuFeatures::X86::kPREFETCHW);
+        features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kSKINIT);
+        features.addIf(bitTest(regs.ecx, 15), CpuFeatures::X86::kLWP);
+        features.addIf(bitTest(regs.ecx, 21), CpuFeatures::X86::kTBM);
+        features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kMONITORX);
+        features.addIf(bitTest(regs.edx, 20), CpuFeatures::X86::kNX);
+        features.addIf(bitTest(regs.edx, 21), CpuFeatures::X86::kFXSROPT);
+        features.addIf(bitTest(regs.edx, 22), CpuFeatures::X86::kMMX2);
+        features.addIf(bitTest(regs.edx, 27), CpuFeatures::X86::kRDTSCP);
+        features.addIf(bitTest(regs.edx, 29), CpuFeatures::X86::kPREFETCHW);
+        features.addIf(bitTest(regs.edx, 30), CpuFeatures::X86::k3DNOW2, CpuFeatures::X86::kMMX2);
+        features.addIf(bitTest(regs.edx, 31), CpuFeatures::X86::kPREFETCHW);
+
+        if (features.hasAVX()) {
+          features.addIf(bitTest(regs.ecx, 11), CpuFeatures::X86::kXOP);
+          features.addIf(bitTest(regs.ecx, 16), CpuFeatures::X86::kFMA4);
+        }
+
+        // This feature seems to be only supported by AMD.
+        if (cpu.isVendor("AMD")) {
+          features.addIf(bitTest(regs.ecx,  4), CpuFeatures::X86::kALTMOVCR8);
+        }
+        break;
+
+      case 0x80000002u:
+      case 0x80000003u:
+      case 0x80000004u:
+        *brand++ = regs.eax;
+        *brand++ = regs.ebx;
+        *brand++ = regs.ecx;
+        *brand++ = regs.edx;
+
+        // Go directly to the next one we are interested in.
+        if (i == 0x80000004u)
+          i = 0x80000008u - 1;
+        break;
+
+      case 0x80000008u:
+        features.addIf(bitTest(regs.ebx,  0), CpuFeatures::X86::kCLZERO);
+        features.addIf(bitTest(regs.ebx,  0), CpuFeatures::X86::kRDPRU);
+        features.addIf(bitTest(regs.ebx,  8), CpuFeatures::X86::kMCOMMIT);
+        features.addIf(bitTest(regs.ebx,  9), CpuFeatures::X86::kWBNOINVD);
+
+        // Go directly to the next one we are interested in.
+        i = 0x8000001Fu - 1;
+        break;
+
+      case 0x8000001Fu:
+        features.addIf(bitTest(regs.eax,  4), CpuFeatures::X86::kSNP);
+        break;
+    }
+  } while (++i <= maxId);
+
+  // Simplify CPU brand string a bit by removing some unnecessary spaces.
+  simplifyCpuBrand(cpu._brand.str);
+}
+
+#endif // ASMJIT_ARCH_X86
+
+// CpuInfo - Detect - ARM
+// ======================
+
+// The most relevant and accurate information can be found here:
+//   https://github.com/llvm-project/llvm/blob/master/lib/Target/AArch64/AArch64.td
+//   https://github.com/apple/llvm-project/blob/apple/main/llvm/lib/Target/AArch64/AArch64.td (Apple fork)
+//
+// Other resources:
+//   https://en.wikipedia.org/wiki/AArch64
+//   https://en.wikipedia.org/wiki/Apple_silicon#List_of_Apple_processors
+//   https://developer.arm.com/architectures/learn-the-architecture/understanding-the-armv8-x-extensions/single-page
+
+#if ASMJIT_ARCH_ARM
+
+static inline void populateBaseARMFeatures(CpuInfo& cpu) noexcept {
+#if ASMJIT_ARCH_ARM == 32
+  // No baseline flags at the moment.
+  DebugUtils::unused(cpu);
+#else
+  // AArch64 is based on ARMv8-A and later.
+  cpu.addFeature(CpuFeatures::ARM::kARMv6);
+  cpu.addFeature(CpuFeatures::ARM::kARMv7);
+  cpu.addFeature(CpuFeatures::ARM::kARMv8a);
+
+  // AArch64 comes with these features by default.
+  cpu.addFeature(CpuFeatures::ARM::kVFPv2);
+  cpu.addFeature(CpuFeatures::ARM::kVFPv3);
+  cpu.addFeature(CpuFeatures::ARM::kVFPv4);
+  cpu.addFeature(CpuFeatures::ARM::kASIMD);
+  cpu.addFeature(CpuFeatures::ARM::kIDIVA);
+#endif
+}
+
+// Detects ARM version by macros defined at compile time. This means that AsmJit will report features forced at
+// compile time that should always be provided by the target CPU. This also means that if we don't provide any
+// means to detect CPU features the features reported by AsmJit will at least not report less features than the
+// target it was compiled to.
+ASMJIT_MAYBE_UNUSED
+static ASMJIT_FAVOR_SIZE void detectARMFeaturesViaCompilerFlags(CpuInfo& cpu) noexcept {
+  DebugUtils::unused(cpu);
+
+#if ASMJIT_ARCH_ARM == 32
+
+  // ARM targets have no baseline at the moment.
+# if defined(__ARM_ARCH_7A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv7);
+# endif
+# if defined(__ARM_ARCH_8A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8a);
+# endif
+
+# if defined(__TARGET_ARCH_THUMB)
+  cpu.addFeature(CpuFeatures::ARM::kTHUMB);
+# if __TARGET_ARCH_THUMB >= 4
+  cpu.addFeature(CpuFeatures::ARM::kTHUMBv2);
+# endif
+# endif
+
+# if defined(__ARM_FEATURE_FMA)
+  cpu.addFeature(CpuFeatures::ARM::kVFPv3);
+  cpu.addFeature(CpuFeatures::ARM::kVFPv4);
+# endif
+
+# if defined(__ARM_NEON)
+  cpu.addFeature(CpuFeatures::ARM::kASIMD);
+# endif
+
+# if defined(__ARM_FEATURE_IDIV) && defined(__TARGET_ARCH_THUMB)
+  cpu.addFeature(CpuFeatures::ARM::kIDIVT);
+#endif
+# if defined(__ARM_FEATURE_IDIV) && !defined(__TARGET_ARCH_THUMB)
+  cpu.addFeature(CpuFeatures::ARM::kIDIVA);
+# endif
+
+#endif
+
+#if defined(__ARM_ARCH_8_1A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_1a);
+#endif
+#if defined(__ARM_ARCH_8_2A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_2a);
+#endif
+#if defined(__ARM_ARCH_8_3A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_3a);
+#endif
+#if defined(__ARM_ARCH_8_4A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_4a);
+#endif
+#if defined(__ARM_ARCH_8_5A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_5a);
+#endif
+#if defined(__ARM_ARCH_8_6A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_6a);
+#endif
+#if defined(__ARM_ARCH_8_7A__)
+  cpu.addFeature(CpuFeatures::ARM::kARMv8_7a);
+#endif
+
+#if defined(__ARM_FEATURE_AES)
+  cpu.addFeature(CpuFeatures::ARM::kAES);
+#endif
+
+#if defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC) && defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)
+  cpu.addFeature(CpuFeatures::ARM::kBF16);
+#endif
+
+#if defined(__ARM_FEATURE_CRC32)
+  cpu.addFeature(CpuFeatures::ARM::kCRC32);
+#endif
+
+#if defined(__ARM_FEATURE_CRYPTO)
+  cpu.addFeature(CpuFeatures::ARM::kAES,
+                 CpuFeatures::ARM::kSHA1,
+                 CpuFeatures::ARM::kSHA2);
+#endif
+
+#if defined(__ARM_FEATURE_DOTPROD)
+  cpu.addFeature(CpuFeatures::ARM::kDOTPROD);
+#endif
+
+#if defined(__ARM_FEATURE_FP16FML) || defined(__ARM_FEATURE_FP16_FML)
+  cpu.addFeature(CpuFeatures::ARM::kFP16FML);
+#endif
+
+#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
+  cpu.addFeature(CpuFeatures::ARM::kFP16FULL);
+#endif
+
+#if defined(__ARM_FEATURE_FRINT)
+  cpu.addFeature(CpuFeatures::ARM::kFRINT);
+#endif
+
+#if defined(__ARM_FEATURE_JCVT)
+  cpu.addFeature(CpuFeatures::ARM::kFJCVTZS);
+#endif
+
+#if defined(__ARM_FEATURE_MATMUL_INT8)
+  cpu.addFeature(CpuFeatures::ARM::kI8MM);
+#endif
+
+#if defined(__ARM_FEATURE_ATOMICS)
+  cpu.addFeature(CpuFeatures::ARM::kLSE);
+#endif
+
+#if defined(__ARM_FEATURE_MEMORY_TAGGING)
+  cpu.addFeature(CpuFeatures::ARM::kMTE);
+#endif
+
+#if defined(__ARM_FEATURE_QRDMX)
+  cpu.addFeature(CpuFeatures::ARM::kRDM);
+#endif
+
+#if defined(__ARM_FEATURE_RNG)
+  cpu.addFeature(CpuFeatures::ARM::kRNG);
+#endif
+
+#if defined(__ARM_FEATURE_SHA2)
+  cpu.addFeature(CpuFeatures::ARM::kSHA2);
+#endif
+
+#if defined(__ARM_FEATURE_SHA3)
+  cpu.addFeature(CpuFeatures::ARM::kSHA3);
+#endif
+
+#if defined(__ARM_FEATURE_SHA512)
+  cpu.addFeature(CpuFeatures::ARM::kSHA512);
+#endif
+
+#if defined(__ARM_FEATURE_SM3)
+  cpu.addFeature(CpuFeatures::ARM::kSM3);
+#endif
+
+#if defined(__ARM_FEATURE_SM4)
+  cpu.addFeature(CpuFeatures::ARM::kSM4);
+#endif
+
+#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_SVE_VECTOR_OPERATORS)
+  cpu.addFeature(CpuFeatures::ARM::kSVE);
+#endif
+
+#if defined(__ARM_FEATURE_SVE_MATMUL_INT8)
+  cpu.addFeature(CpuFeatures::ARM::kSVE_I8MM);
+#endif
+
+#if defined(__ARM_FEATURE_SVE_MATMUL_FP32)
+  cpu.addFeature(CpuFeatures::ARM::kSVE_F32MM);
+#endif
+
+#if defined(__ARM_FEATURE_SVE_MATMUL_FP64)
+  cpu.addFeature(CpuFeatures::ARM::kSVE_F64MM);
+#endif
+
+#if defined(__ARM_FEATURE_SVE2)
+  cpu.addFeature(CpuFeatures::ARM::kSVE2);
+#endif
+
+#if defined(__ARM_FEATURE_SVE2_AES)
+  cpu.addFeature(CpuFeatures::ARM::kSVE2_AES);
+#endif
+
+#if defined(__ARM_FEATURE_SVE2_BITPERM)
+  cpu.addFeature(CpuFeatures::ARM::kSVE2_BITPERM);
+#endif
+
+#if defined(__ARM_FEATURE_SVE2_SHA3)
+  cpu.addFeature(CpuFeatures::ARM::kSVE2_SHA3);
+#endif
+
+#if defined(__ARM_FEATURE_SVE2_SM4)
+  cpu.addFeature(CpuFeatures::ARM::kSVE2_SM4);
+#endif
+
+#if defined(__ARM_FEATURE_TME)
+  cpu.addFeature(CpuFeatures::ARM::kTME);
+#endif
+}
+
+ASMJIT_MAYBE_UNUSED
+static ASMJIT_FAVOR_SIZE void expandARMFeaturesByVersion(CpuInfo& cpu) noexcept {
+  CpuFeatures::ARM& features = cpu.features().arm();
+
+  if (features.hasARMv8_7a()) {
+    features.add(CpuFeatures::ARM::kARMv8_6a);
+  }
+
+  if (features.hasARMv8_6a()) {
+    features.add(CpuFeatures::ARM::kARMv8_5a,
+                 CpuFeatures::ARM::kBF16);
+
+    if (features.hasSVE())
+      features.add(CpuFeatures::ARM::kSVE_I8MM);
+  }
+
+  if (features.hasARMv8_5a()) {
+    features.add(CpuFeatures::ARM::kARMv8_4a,
+                 CpuFeatures::ARM::kALTNZCV,
+                 CpuFeatures::ARM::kBTI,
+                 CpuFeatures::ARM::kFRINT,
+                 CpuFeatures::ARM::kSB,
+                 CpuFeatures::ARM::kSSBS);
+  }
+
+  if (features.hasARMv8_4a()) {
+    features.add(CpuFeatures::ARM::kARMv8_3a,
+                 CpuFeatures::ARM::kDIT,
+                 CpuFeatures::ARM::kDOTPROD,
+                 CpuFeatures::ARM::kFLAGM,
+                 CpuFeatures::ARM::kPMU,
+                 CpuFeatures::ARM::kRCPC_IMMO);
+  }
+
+  if (features.hasARMv8_3a()) {
+    features.add(CpuFeatures::ARM::kARMv8_2a,
+                 CpuFeatures::ARM::kFCMA,
+                 CpuFeatures::ARM::kFJCVTZS);
+  }
+
+  if (features.hasARMv8_2a()) {
+    features.add(CpuFeatures::ARM::kARMv8_1a);
+  }
+
+  if (features.hasARMv8_1a()) {
+    features.add(CpuFeatures::ARM::kARMv8a,
+                 CpuFeatures::ARM::kCRC32,
+                 CpuFeatures::ARM::kLSE,
+                 CpuFeatures::ARM::kRDM);
+  }
+
+  if (features.hasARMv8a()) {
+    features.add(CpuFeatures::ARM::kARMv7,
+                 CpuFeatures::ARM::kVFPv2,
+                 CpuFeatures::ARM::kVFPv3,
+                 CpuFeatures::ARM::kVFPv4,
+                 CpuFeatures::ARM::kVFP_D32,
+                 CpuFeatures::ARM::kASIMD,
+                 CpuFeatures::ARM::kIDIVA);
+  }
+}
+
+// CpuInfo - Detect - ARM [Windows]
+// ================================
+
+#if defined(_WIN32)
+struct WinPFPMapping {
+  uint8_t featureId;
+  uint8_t pfpFeatureId;
+};
+
+static ASMJIT_FAVOR_SIZE void detectPFPFeatures(CpuInfo& cpu, const WinPFPMapping* mapping, size_t size) noexcept {
+  for (size_t i = 0; i < size; i++)
+    if (::IsProcessorFeaturePresent(mapping[i].pfpFeatureId))
+      cpu.addFeature(mapping[i].featureId);
+}
+
+//! Detect ARM CPU features on Windows.
+//!
+//! The detection is based on `IsProcessorFeaturePresent()` API call.
+static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
+  cpu._wasDetected = true;
+  populateBaseARMFeatures(cpu);
+
+  CpuFeatures::ARM& features = cpu.features().arm();
+
+  // Win32 for ARM requires ARMv7 with DSP extensions, VFPv3, and uses THUMBv2 by default.
+#if ASMJIT_ARCH_ARM == 32
+  features.add(CpuFeatures::ARM::kTHUMB);
+  features.add(CpuFeatures::ARM::kTHUMBv2);
+  features.add(CpuFeatures::ARM::kARMv6);
+  features.add(CpuFeatures::ARM::kARMv7);
+  features.add(CpuFeatures::ARM::kEDSP);
+  features.add(CpuFeatures::ARM::kVFPv2);
+  features.add(CpuFeatures::ARM::kVFPv3);
+#endif
+
+  // Windows for ARM requires ASIMD.
+  features.add(CpuFeatures::ARM::kASIMD);
+
+  // Detect additional CPU features by calling `IsProcessorFeaturePresent()`.
+  static const WinPFPMapping mapping[] = {
+#if ASMJIT_ARCH_ARM == 32
+    { uint8_t(CpuFeatures::ARM::kVFP_D32)  , 18 }, // PF_ARM_VFP_32_REGISTERS_AVAILABLE
+    { uint8_t(CpuFeatures::ARM::kIDIVT)    , 24 }, // PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE
+    { uint8_t(CpuFeatures::ARM::kVFPv4)    , 27 }, // PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE
+    { uint8_t(CpuFeatures::ARM::kARMv8a)   , 29 }, // PF_ARM_V8_INSTRUCTIONS_AVAILABLE
+#endif
+    { uint8_t(CpuFeatures::ARM::kAES)      , 30 }, // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE
+    { uint8_t(CpuFeatures::ARM::kCRC32)    , 31 }, // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE
+    { uint8_t(CpuFeatures::ARM::kLSE)      , 34 }  // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
+
+  };
+  detectPFPFeatures(cpu, mapping, ASMJIT_ARRAY_SIZE(mapping));
+
+  // Windows provides several instructions under a single flag:
+  if (features.hasAES()) {
+    features.add(CpuFeatures::ARM::kSHA1,
+                 CpuFeatures::ARM::kSHA2);
+  }
+
+  expandARMFeaturesByVersion(cpu);
+}
+
+// CpuInfo - Detect - ARM [Linux]
+// ==============================
+
+#elif defined(__linux__)
+
+struct LinuxHWCapMapping {
+  uint8_t featureId;
+  uint8_t hwCapBit;
+};
+
+static ASMJIT_FAVOR_SIZE void detectHWCaps(CpuInfo& cpu, unsigned long type, const LinuxHWCapMapping* mapping, size_t size) noexcept {
+  unsigned long mask = getauxval(type);
+  for (size_t i = 0; i < size; i++)
+    cpu.features().addIf(Support::bitTest(mask, mapping[i].hwCapBit), mapping[i].featureId);
+}
+
+#if ASMJIT_ARCH_ARM == 32
+
+// `AT_HWCAP` provides ARMv7 (and less) related flags.
+static const LinuxHWCapMapping hwCapMapping[] = {
+  { uint8_t(CpuFeatures::ARM::kVFPv2)       , 6  }, // HWCAP_VFP
+  { uint8_t(CpuFeatures::ARM::kEDSP)        , 7  }, // HWCAP_EDSP
+  { uint8_t(CpuFeatures::ARM::kASIMD)       , 12 }, // HWCAP_NEON
+  { uint8_t(CpuFeatures::ARM::kVFPv3)       , 13 }, // HWCAP_VFPv3
+  { uint8_t(CpuFeatures::ARM::kVFPv4)       , 16 }, // HWCAP_VFPv4
+  { uint8_t(CpuFeatures::ARM::kIDIVA)       , 17 }, // HWCAP_IDIVA
+  { uint8_t(CpuFeatures::ARM::kIDIVT)       , 18 }, // HWCAP_IDIVT
+  { uint8_t(CpuFeatures::ARM::kVFP_D32)     , 19 }  // HWCAP_VFPD32
+};
+
+// `AT_HWCAP2` provides ARMv8+ related flags.
+static const LinuxHWCapMapping hwCap2Mapping[] = {
+  { uint8_t(CpuFeatures::ARM::kAES)         , 0  }, // HWCAP2_AES
+  { uint8_t(CpuFeatures::ARM::kPMULL)       , 1  }, // HWCAP2_PMULL
+  { uint8_t(CpuFeatures::ARM::kSHA1)        , 2  }, // HWCAP2_SHA1
+  { uint8_t(CpuFeatures::ARM::kSHA2)        , 3  }, // HWCAP2_SHA2
+  { uint8_t(CpuFeatures::ARM::kCRC32)       , 4  }  // HWCAP2_CRC32
+};
+
+static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
+  cpu._wasDetected = true;
+
+  populateBaseARMFeatures(cpu);
+
+  CpuFeatures::ARM& features = cpu.features().arm();
+
+  detectHWCaps(cpu, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
+  detectHWCaps(cpu, AT_HWCAP2, hwCap2Mapping, ASMJIT_ARRAY_SIZE(hwCap2Mapping));
+
+  // VFPv3 implies VFPv2.
+  if (features.hasVFPv3())
+    features.add(CpuFeatures::ARM::kVFPv2);
+
+  // VFPv2 implies ARMv6.
+  if (features.hasVFPv2())
+    features.add(CpuFeatures::ARM::kARMv6);
+
+  // ARMv7 provides VFPv3|ASIMD.
+  if (features.hasVFPv3() || features.hasASIMD())
+    features.add(CpuFeatures::ARM::kARMv7);
+
+  // ARMv8 provives AES, CRC32, PMULL, SHA1, and SHA2.
+  if (features.hasAES() || features.hasCRC32() || features.hasPMULL() || features.hasSHA1() || features.hasSHA2())
+    features.add(CpuFeatures::ARM::kARMv8a);
+}
+
+#else
+
+// `AT_HWCAP` provides ARMv8+ related flags.
+static const LinuxHWCapMapping hwCapMapping[] = {
+  /*
+  { uint8_t(CpuFeatures::ARM::k)            , 0  }, // HWCAP_FP
+  */
+  { uint8_t(CpuFeatures::ARM::kASIMD)       , 1  }, // HWCAP_ASIMD
+  /*
+  { uint8_t(CpuFeatures::ARM::k)            , 2  }, // HWCAP_EVTSTRM
+  */
+  { uint8_t(CpuFeatures::ARM::kAES)         , 3  }, // HWCAP_AES
+  { uint8_t(CpuFeatures::ARM::kPMULL)       , 4  }, // HWCAP_PMULL
+  { uint8_t(CpuFeatures::ARM::kSHA1)        , 5  }, // HWCAP_SHA1
+  { uint8_t(CpuFeatures::ARM::kSHA2)        , 6  }, // HWCAP_SHA2
+  { uint8_t(CpuFeatures::ARM::kCRC32)       , 7  }, // HWCAP_CRC32
+  { uint8_t(CpuFeatures::ARM::kLSE)         , 8  }, // HWCAP_ATOMICS
+  { uint8_t(CpuFeatures::ARM::kFP16CONV)    , 9  }, // HWCAP_FPHP
+  { uint8_t(CpuFeatures::ARM::kFP16FULL)    , 10 }, // HWCAP_ASIMDHP
+  { uint8_t(CpuFeatures::ARM::kCPUID)       , 11 }, // HWCAP_CPUID
+  { uint8_t(CpuFeatures::ARM::kRDM)         , 12 }, // HWCAP_ASIMDRDM
+  { uint8_t(CpuFeatures::ARM::kFJCVTZS)     , 13 }, // HWCAP_JSCVT
+  { uint8_t(CpuFeatures::ARM::kFCMA)        , 14 }, // HWCAP_FCMA
+  /*
+  { uint8_t(CpuFeatures::ARM::k)            , 15 }, // HWCAP_LRCPC
+  { uint8_t(CpuFeatures::ARM::k)            , 16 }, // HWCAP_DCPOP
+  */
+  { uint8_t(CpuFeatures::ARM::kSHA3)        , 17 }, // HWCAP_SHA3
+  { uint8_t(CpuFeatures::ARM::kSM3)         , 18 }, // HWCAP_SM3
+  { uint8_t(CpuFeatures::ARM::kSM4)         , 19 }, // HWCAP_SM4
+  { uint8_t(CpuFeatures::ARM::kDOTPROD)     , 20 }, // HWCAP_ASIMDDP
+  { uint8_t(CpuFeatures::ARM::kSHA512)      , 21 }, // HWCAP_SHA512
+  { uint8_t(CpuFeatures::ARM::kSVE)         , 22 }, // HWCAP_SVE
+  { uint8_t(CpuFeatures::ARM::kFP16FML)     , 23 }, // HWCAP_ASIMDFHM
+  { uint8_t(CpuFeatures::ARM::kDIT)         , 24 }, // HWCAP_DIT
+  /*
+  { uint8_t(CpuFeatures::ARM::k)            , 25 }, // HWCAP_USCAT
+  { uint8_t(CpuFeatures::ARM::k)            , 26 }, // HWCAP_ILRCPC
+  */
+  { uint8_t(CpuFeatures::ARM::kFLAGM)       , 27 }, // HWCAP_FLAGM
+  { uint8_t(CpuFeatures::ARM::kSSBS)        , 28 }, // HWCAP_SSBS
+  { uint8_t(CpuFeatures::ARM::kSB)          , 29 }  // HWCAP_SB
+  /*
+  { uint8_t(CpuFeatures::ARM::k)            , 30 }, // HWCAP_PACA
+  { uint8_t(CpuFeatures::ARM::k)            , 31 }  // HWCAP_PACG
+  */
+};
+
+// `AT_HWCAP2` provides ARMv8+ related flags.
+static const LinuxHWCapMapping hwCapMapping2[] = {
+  /*
+  { uint8_t(CpuFeatures::ARM::k)            , 0  }, // HWCAP2_DCPODP
+  */
+  { uint8_t(CpuFeatures::ARM::kSVE2)        , 1  }, // HWCAP2_SVE2
+  { uint8_t(CpuFeatures::ARM::kSVE2_AES)    , 2  }, // HWCAP2_SVEAES
+  { uint8_t(CpuFeatures::ARM::kSVE_PMULL)   , 3  }, // HWCAP2_SVEPMULL
+  { uint8_t(CpuFeatures::ARM::kSVE2_BITPERM), 4  }, // HWCAP2_SVEBITPERM
+  { uint8_t(CpuFeatures::ARM::kSVE2_SHA3)   , 5  }, // HWCAP2_SVESHA3
+  { uint8_t(CpuFeatures::ARM::kSVE2_SM4)    , 6  }, // HWCAP2_SVESM4
+  { uint8_t(CpuFeatures::ARM::kALTNZCV)     , 7  }, // HWCAP2_FLAGM2
+  { uint8_t(CpuFeatures::ARM::kFRINT)       , 8  }, // HWCAP2_FRINT
+  { uint8_t(CpuFeatures::ARM::kSVE_I8MM)    , 9  }, // HWCAP2_SVEI8MM
+  { uint8_t(CpuFeatures::ARM::kSVE_F32MM)   , 10 }, // HWCAP2_SVEF32MM
+  { uint8_t(CpuFeatures::ARM::kSVE_F64MM)   , 11 }, // HWCAP2_SVEF64MM
+  { uint8_t(CpuFeatures::ARM::kSVE_BF16)    , 12 }, // HWCAP2_SVEBF16
+  { uint8_t(CpuFeatures::ARM::kI8MM)        , 13 }, // HWCAP2_I8MM
+  { uint8_t(CpuFeatures::ARM::kBF16)        , 14 }, // HWCAP2_BF16
+  { uint8_t(CpuFeatures::ARM::kDGH)         , 15 }, // HWCAP2_DGH
+  { uint8_t(CpuFeatures::ARM::kRNG)         , 16 }, // HWCAP2_RNG
+  { uint8_t(CpuFeatures::ARM::kBTI)         , 17 }, // HWCAP2_BTI
+  { uint8_t(CpuFeatures::ARM::kMTE)         , 18 }  // HWCAP2_MTE
+};
+
+static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
+  cpu._wasDetected = true;
+  populateBaseARMFeatures(cpu);
+
+  detectHWCaps(cpu, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
+  detectHWCaps(cpu, AT_HWCAP2, hwCapMapping2, ASMJIT_ARRAY_SIZE(hwCapMapping2));
+}
+
+#endif
+
+// CpuInfo - Detect - ARM [Apple]
+// ==============================
+
+#elif defined(__APPLE__)
+
+namespace AppleHWId {
+  enum CpuFamily : uint32_t {
+    // Generic ARM.
+    kCpuFamily_ARM_9              = 0xE73283AEu,
+    kCpuFamily_ARM_11             = 0x8FF620D8u,
+    kCpuFamily_ARM_12             = 0xBD1B0AE9u,
+    kCpuFamily_ARM_13             = 0x0CC90E64u,
+    kCpuFamily_ARM_14             = 0x96077EF1u,
+    kCpuFamily_ARM_15             = 0xA8511BCAu,
+
+    // Apple design.
+    kCpuFamily_SWIFT              = 0x1E2D6381u,
+    kCpuFamily_CYCLONE            = 0x37A09642u,
+    kCpuFamily_TYPHOON            = 0x2C91A47Eu,
+    kCpuFamily_TWISTER            = 0x92FB37C8u,
+    kCpuFamily_HURRICANE          = 0x67CEEE93u,
+    kCpuFamily_MONSOON_MISTRAL    = 0xE81E7EF6u,
+    kCpuFamily_VORTEX_TEMPEST     = 0x07D34B9Fu,
+    kCpuFamily_LIGHTNING_THUNDER  = 0x462504D2u,
+    kCpuFamily_FIRESTORM_ICESTORM = 0x1B588BB3u
+  };
+};
+
+static ASMJIT_FAVOR_SIZE uint32_t queryARMCpuFamilyId() noexcept {
+  uint32_t result = 0;
+  size_t size = sizeof(result);
+
+  int res = sysctlbyname("hw.cpufamily", &result, &size, nullptr, 0);
+  if (res != 0)
+    return 0;
+  else
+    return result;
+}
+
+static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
+  cpu._wasDetected = true;
+  populateBaseARMFeatures(cpu);
+
+  uint32_t cpuFamilyId = queryARMCpuFamilyId();
+  CpuFeatures::ARM& features = cpu.features().arm();
+
+  switch (cpuFamilyId) {
+    case AppleHWId::kCpuFamily_ARM_9:
+    case AppleHWId::kCpuFamily_ARM_11:
+    case AppleHWId::kCpuFamily_ARM_12:
+      break;
+
+    // ARM Cortex A8.
+    case AppleHWId::kCpuFamily_ARM_13:
+      break;
+
+    // ARM Cortex A9.
+    case AppleHWId::kCpuFamily_ARM_14:
+      break;
+
+    // ARM Cortex A7 - ARMv7k.
+    case AppleHWId::kCpuFamily_ARM_15:
+      features.add(CpuFeatures::ARM::kARMv7);
+      break;
+
+    // Apple A6/A6X - ARMv7s.
+    case AppleHWId::kCpuFamily_SWIFT:
+      features.add(CpuFeatures::ARM::kARMv7);
+      break;
+
+    // Apple A7 - ARMv8.0-A.
+    case AppleHWId::kCpuFamily_CYCLONE:
+      features.add(CpuFeatures::ARM::kARMv8a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2);
+      break;
+
+    // Apple A8 - ARMv8.0-A.
+    case AppleHWId::kCpuFamily_TYPHOON:
+      features.add(CpuFeatures::ARM::kARMv8a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2);
+      break;
+
+    // Apple A9 - ARMv8.0-A.
+    case AppleHWId::kCpuFamily_TWISTER:
+      features.add(CpuFeatures::ARM::kARMv8a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2);
+      break;
+
+    // Apple A10 - ARMv8.1-A.
+    case AppleHWId::kCpuFamily_HURRICANE:
+      features.add(CpuFeatures::ARM::kARMv8_1a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kRDM,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2);
+
+      break;
+
+    // Apple A11 - ARMv8.2-A.
+    case AppleHWId::kCpuFamily_MONSOON_MISTRAL:
+      features.add(CpuFeatures::ARM::kARMv8_2a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kFP16FULL,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2);
+      break;
+
+    // Apple A12 - ARMv8.3-A.
+    case AppleHWId::kCpuFamily_VORTEX_TEMPEST:
+      features.add(CpuFeatures::ARM::kARMv8_3a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kFP16FULL,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2);
+      break;
+
+    // Apple A13 - ARMv8.4-A.
+    case AppleHWId::kCpuFamily_LIGHTNING_THUNDER:
+      features.add(CpuFeatures::ARM::kARMv8_4a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kFP16FML,
+                   CpuFeatures::ARM::kFP16FULL,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2,
+                   CpuFeatures::ARM::kSHA3,
+                   CpuFeatures::ARM::kSHA512);
+      break;
+
+    // Apple A14/M1 - ARMv8.5-A.
+    case AppleHWId::kCpuFamily_FIRESTORM_ICESTORM:
+      features.add(CpuFeatures::ARM::kARMv8_4a,
+                   CpuFeatures::ARM::kAES,
+                   CpuFeatures::ARM::kALTNZCV,
+                   CpuFeatures::ARM::kFP16FML,
+                   CpuFeatures::ARM::kFP16FULL,
+                   CpuFeatures::ARM::kFRINT,
+                   CpuFeatures::ARM::kSB,
+                   CpuFeatures::ARM::kSHA1,
+                   CpuFeatures::ARM::kSHA2,
+                   CpuFeatures::ARM::kSHA3,
+                   CpuFeatures::ARM::kSHA512,
+                   CpuFeatures::ARM::kSSBS);
+      break;
+
+    default:
+      cpu._wasDetected = false;
+      break;
+  }
+
+  expandARMFeaturesByVersion(cpu);
+}
+
+// CpuInfo - Detect - ARM [Unknown]
+// ================================
+
+#else
+
+#if ASMJIT_ARCH_ARM == 64
+  #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with AArch64 CPU)")
+#else
+  #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with ARM CPU)")
+#endif
+
+static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
+  populateBaseARMFeatures(cpu);
+  detectARMFeaturesViaCompilerFlags(cpu);
+  expandARMFeaturesByVersion(cpu);
+}
+#endif
+
+#endif
+
+// CpuInfo - Detect - Host
+// =======================
+
+static uint32_t cpuInfoInitialized;
+static CpuInfo cpuInfoGlobal(Globals::NoInit);
+
+const CpuInfo& CpuInfo::host() noexcept {
+  // This should never cause a problem as the resulting information should always be the same. In the worst case we
+  // would just overwrite it non-atomically.
+  if (!cpuInfoInitialized) {
+    CpuInfo cpuInfoLocal;
+
+    cpuInfoLocal._arch = Arch::kHost;
+    cpuInfoLocal._subArch = SubArch::kHost;
+
+#if ASMJIT_ARCH_X86
+    detectX86Cpu(cpuInfoLocal);
+#elif ASMJIT_ARCH_ARM
+    detectARMCpu(cpuInfoLocal);
+#else
+    #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown CPU)")
+#endif
+
+    cpuInfoLocal._hwThreadCount = detectHWThreadCount();
+    cpuInfoGlobal = cpuInfoLocal;
+    cpuInfoInitialized = 1;
+  }
+
+  return cpuInfoGlobal;
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/cpuinfo.h b/lib/lepton/asmjit/core/cpuinfo.h
new file mode 100644
index 0000000000..4af5c3a82f
--- /dev/null
+++ b/lib/lepton/asmjit/core/cpuinfo.h
@@ -0,0 +1,813 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_CPUINFO_H_INCLUDED
+#define ASMJIT_CORE_CPUINFO_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/environment.h"
+#include "../core/globals.h"
+#include "../core/string.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! CPU features information.
+//!
+//! Each feature is represented by a single bit in an embedded bit array.
+class CpuFeatures {
+public:
+  //! A word that is used to represents feature bits.
+  typedef Support::BitWord BitWord;
+  //! Iterator that can iterate all CPU features set.
+  typedef Support::BitVectorIterator<BitWord> Iterator;
+
+  //! \name Constants
+  //! \{
+
+  //! \cond INTERNAL
+  enum : uint32_t {
+    kMaxFeatures = 256,
+    kNumBitWords = kMaxFeatures / Support::kBitWordSizeInBits
+  };
+  //! \endcond
+
+  //! \}
+
+  //! \name Data
+  //! \{
+
+  //! CPU features data.
+  struct Data {
+    //! \name Members
+    //! \{
+
+    //! Data bits.
+    Support::Array<BitWord, kNumBitWords> _bits;
+
+    //! \}
+
+    //! \name Overloaded Operators
+    //! \{
+
+    inline bool operator==(const Data& other) noexcept { return  eq(other); }
+    inline bool operator!=(const Data& other) noexcept { return !eq(other); }
+
+    //! \}
+
+    //! \name Accessors
+    //! \{
+
+    //! Returns true if there are no features set.
+    inline bool empty() const noexcept { return _bits.aggregate<Support::Or>(0) == 0; }
+
+    //! Returns all features as array of bitwords (see \ref Support::BitWord).
+    inline BitWord* bits() noexcept { return _bits.data(); }
+    //! Returns all features as array of bitwords (const).
+    inline const BitWord* bits() const noexcept { return _bits.data(); }
+
+    //! Returns the number of BitWords returned by \ref bits().
+    inline size_t bitWordCount() const noexcept { return kNumBitWords; }
+
+    //! Returns \ref Support::BitVectorIterator, that can be used to iterate over all features efficiently.
+    inline Iterator iterator() const noexcept { return Iterator(_bits.data(), kNumBitWords); }
+
+    //! Tests whether the feature `featureId` is present.
+    template<typename FeatureId>
+    ASMJIT_FORCE_INLINE bool has(const FeatureId& featureId) const noexcept {
+      ASMJIT_ASSERT(uint32_t(featureId) < kMaxFeatures);
+
+      uint32_t idx = uint32_t(featureId) / Support::kBitWordSizeInBits;
+      uint32_t bit = uint32_t(featureId) % Support::kBitWordSizeInBits;
+
+      return bool((_bits[idx] >> bit) & 0x1);
+    }
+
+    //! Tests whether all features as defined by `other` are present.
+    ASMJIT_FORCE_INLINE bool hasAll(const Data& other) const noexcept {
+      for (uint32_t i = 0; i < kNumBitWords; i++)
+        if ((_bits[i] & other._bits[i]) != other._bits[i])
+          return false;
+      return true;
+    }
+
+    //! \}
+
+    //! \name Manipulation
+    //! \{
+
+    inline void reset() noexcept { _bits.fill(0); }
+
+    //! Adds the given CPU `featureId` to the list of features.
+    template<typename FeatureId>
+    ASMJIT_FORCE_INLINE void add(const FeatureId& featureId) noexcept {
+      ASMJIT_ASSERT(uint32_t(featureId) < kMaxFeatures);
+
+      uint32_t idx = uint32_t(featureId) / Support::kBitWordSizeInBits;
+      uint32_t bit = uint32_t(featureId) % Support::kBitWordSizeInBits;
+
+      _bits[idx] |= BitWord(1) << bit;
+    }
+
+    template<typename FeatureId, typename... Args>
+    ASMJIT_FORCE_INLINE void add(const FeatureId& featureId, Args&&... otherFeatureIds) noexcept {
+      add(featureId);
+      add(std::forward<Args>(otherFeatureIds)...);
+    }
+
+    template<typename FeatureId>
+    ASMJIT_FORCE_INLINE void addIf(bool condition, const FeatureId& featureId) noexcept {
+      ASMJIT_ASSERT(uint32_t(featureId) < kMaxFeatures);
+
+      uint32_t idx = uint32_t(featureId) / Support::kBitWordSizeInBits;
+      uint32_t bit = uint32_t(featureId) % Support::kBitWordSizeInBits;
+
+      _bits[idx] |= BitWord(condition) << bit;
+    }
+
+    template<typename FeatureId, typename... Args>
+    ASMJIT_FORCE_INLINE void addIf(bool condition, const FeatureId& featureId, Args&&... otherFeatureIds) noexcept {
+      addIf(condition, featureId);
+      addIf(condition, std::forward<Args>(otherFeatureIds)...);
+    }
+
+    //! Removes the given CPU `featureId` from the list of features.
+    template<typename FeatureId>
+    ASMJIT_FORCE_INLINE void remove(const FeatureId& featureId) noexcept {
+      ASMJIT_ASSERT(uint32_t(featureId) < kMaxFeatures);
+
+      uint32_t idx = uint32_t(featureId) / Support::kBitWordSizeInBits;
+      uint32_t bit = uint32_t(featureId) % Support::kBitWordSizeInBits;
+
+      _bits[idx] &= ~(BitWord(1) << bit);
+    }
+
+    template<typename FeatureId, typename... Args>
+    ASMJIT_FORCE_INLINE void remove(const FeatureId& featureId, Args&&... otherFeatureIds) noexcept {
+      remove(featureId);
+      remove(std::forward<Args>(otherFeatureIds)...);
+    }
+
+    //! Tests whether this CPU features data matches `other`.
+    ASMJIT_FORCE_INLINE bool eq(const Data& other) const noexcept { return _bits == other._bits; }
+
+    //! \}
+
+  };
+
+  //! X86 specific features data.
+  struct X86 : public Data {
+    //! X86 CPU feature identifiers.
+    enum Id : uint8_t {
+      // @EnumValuesBegin{"enum": "CpuFeatures::X86"}@
+      kNone,                     //!< No feature (never set, used internally).
+
+      kMT,                       //!< CPU has multi-threading capabilities.
+      kNX,                       //!< CPU has Not-Execute-Bit aka DEP (data-execution prevention).
+      k3DNOW,                    //!< CPU has 3DNOW            (3DNOW base instructions) [AMD].
+      k3DNOW2,                   //!< CPU has 3DNOW2           (enhanced 3DNOW) [AMD].
+      kADX,                      //!< CPU has ADX              (multi-precision add-carry instruction extensions).
+      kAESNI,                    //!< CPU has AESNI            (AES encode/decode instructions).
+      kALTMOVCR8,                //!< CPU has LOCK MOV R<->CR0 (supports `MOV R<->CR8` via `LOCK MOV R<->CR0` in 32-bit mode) [AMD].
+      kAMX_BF16,                 //!< CPU has AMX_BF16         (advanced matrix extensions - BF16 instructions).
+      kAMX_INT8,                 //!< CPU has AMX_INT8         (advanced matrix extensions - INT8 instructions).
+      kAMX_TILE,                 //!< CPU has AMX_TILE         (advanced matrix extensions).
+      kAVX,                      //!< CPU has AVX              (advanced vector extensions).
+      kAVX2,                     //!< CPU has AVX2             (advanced vector extensions 2).
+      kAVX512_4FMAPS,            //!< CPU has AVX512_FMAPS     (FMA packed single).
+      kAVX512_4VNNIW,            //!< CPU has AVX512_VNNIW     (vector NN instructions word variable precision).
+      kAVX512_BF16,              //!< CPU has AVX512_BF16      (BFLOAT16 support instruction).
+      kAVX512_BITALG,            //!< CPU has AVX512_BITALG    (VPOPCNT[B|W], VPSHUFBITQMB).
+      kAVX512_BW,                //!< CPU has AVX512_BW        (packed BYTE|WORD).
+      kAVX512_CDI,               //!< CPU has AVX512_CDI       (conflict detection).
+      kAVX512_DQ,                //!< CPU has AVX512_DQ        (packed DWORD|QWORD).
+      kAVX512_ERI,               //!< CPU has AVX512_ERI       (exponential and reciprocal).
+      kAVX512_F,                 //!< CPU has AVX512_F         (AVX512 foundation).
+      kAVX512_FP16,              //!< CPU has AVX512_FP16      (FP16 extensions).
+      kAVX512_IFMA,              //!< CPU has AVX512_IFMA      (integer fused-multiply-add using 52-bit precision).
+      kAVX512_PFI,               //!< CPU has AVX512_PFI       (prefetch instructions).
+      kAVX512_VBMI,              //!< CPU has AVX512_VBMI      (vector byte manipulation).
+      kAVX512_VBMI2,             //!< CPU has AVX512_VBMI2     (vector byte manipulation 2).
+      kAVX512_VL,                //!< CPU has AVX512_VL        (vector length extensions).
+      kAVX512_VNNI,              //!< CPU has AVX512_VNNI      (vector neural network instructions).
+      kAVX512_VP2INTERSECT,      //!< CPU has AVX512_VP2INTERSECT
+      kAVX512_VPOPCNTDQ,         //!< CPU has AVX512_VPOPCNTDQ (VPOPCNT[D|Q] instructions).
+      kAVX_VNNI,                 //!< CPU has AVX_VNNI         (VEX encoding of vpdpbusd/vpdpbusds/vpdpwssd/vpdpwssds).
+      kBMI,                      //!< CPU has BMI              (bit manipulation instructions #1).
+      kBMI2,                     //!< CPU has BMI2             (bit manipulation instructions #2).
+      kCET_IBT,                  //!< CPU has CET-IBT          (indirect branch tracking).
+      kCET_SS,                   //!< CPU has CET-SS.
+      kCLDEMOTE,                 //!< CPU has CLDEMOTE         (cache line demote).
+      kCLFLUSH,                  //!< CPU has CLFUSH           (Cache Line flush).
+      kCLFLUSHOPT,               //!< CPU has CLFUSHOPT        (Cache Line flush - optimized).
+      kCLWB,                     //!< CPU has CLWB.
+      kCLZERO,                   //!< CPU has CLZERO.
+      kCMOV,                     //!< CPU has CMOV             (CMOV and FCMOV instructions).
+      kCMPXCHG16B,               //!< CPU has CMPXCHG16B       (compare-exchange 16 bytes) [X86_64].
+      kCMPXCHG8B,                //!< CPU has CMPXCHG8B        (compare-exchange 8 bytes).
+      kENCLV,                    //!< CPU has ENCLV.
+      kENQCMD,                   //!< CPU has ENQCMD           (enqueue stores).
+      kERMS,                     //!< CPU has ERMS             (enhanced REP MOVSB/STOSB).
+      kF16C,                     //!< CPU has F16C.
+      kFMA,                      //!< CPU has FMA              (fused-multiply-add 3 operand form).
+      kFMA4,                     //!< CPU has FMA4             (fused-multiply-add 4 operand form).
+      kFPU,                      //!< CPU has FPU              (FPU support).
+      kFSGSBASE,                 //!< CPU has FSGSBASE.
+      kFXSR,                     //!< CPU has FXSR             (FXSAVE/FXRSTOR instructions).
+      kFXSROPT,                  //!< CPU has FXSROTP          (FXSAVE/FXRSTOR is optimized).
+      kGEODE,                    //!< CPU has GEODE extensions (3DNOW additions).
+      kGFNI,                     //!< CPU has GFNI             (Galois field instructions).
+      kHLE,                      //!< CPU has HLE.
+      kHRESET,                   //!< CPU has HRESET.
+      kI486,                     //!< CPU has I486 features    (I486+ support).
+      kLAHFSAHF,                 //!< CPU has LAHF/SAHF        (LAHF/SAHF in 64-bit mode) [X86_64].
+      kLWP,                      //!< CPU has LWP              (lightweight profiling) [AMD].
+      kLZCNT,                    //!< CPU has LZCNT            (LZCNT instruction).
+      kMCOMMIT,                  //!< CPU has MCOMMIT          (MCOMMIT instruction).
+      kMMX,                      //!< CPU has MMX              (MMX base instructions).
+      kMMX2,                     //!< CPU has MMX2             (MMX extensions or MMX2).
+      kMONITOR,                  //!< CPU has MONITOR          (MONITOR/MWAIT instructions).
+      kMONITORX,                 //!< CPU has MONITORX         (MONITORX/MWAITX instructions).
+      kMOVBE,                    //!< CPU has MOVBE            (move with byte-order swap).
+      kMOVDIR64B,                //!< CPU has MOVDIR64B        (move 64 bytes as direct store).
+      kMOVDIRI,                  //!< CPU has MOVDIRI          (move dword/qword as direct store).
+      kMPX,                      //!< CPU has MPX              (memory protection extensions).
+      kMSR,                      //!< CPU has MSR              (RDMSR/WRMSR instructions).
+      kMSSE,                     //!< CPU has MSSE             (misaligned SSE support).
+      kOSXSAVE,                  //!< CPU has OSXSAVE          (XSAVE enabled by OS).
+      kOSPKE,                    //!< CPU has OSPKE            (PKE enabled by OS).
+      kPCLMULQDQ,                //!< CPU has PCLMULQDQ        (packed carry-less multiplication).
+      kPCONFIG,                  //!< CPU has PCONFIG          (PCONFIG instruction).
+      kPOPCNT,                   //!< CPU has POPCNT           (POPCNT instruction).
+      kPREFETCHW,                //!< CPU has PREFETCHW.
+      kPREFETCHWT1,              //!< CPU has PREFETCHWT1.
+      kPTWRITE,                  //!< CPU has PTWRITE.
+      kRDPID,                    //!< CPU has RDPID.
+      kRDPRU,                    //!< CPU has RDPRU.
+      kRDRAND,                   //!< CPU has RDRAND.
+      kRDSEED,                   //!< CPU has RDSEED.
+      kRDTSC,                    //!< CPU has RDTSC.
+      kRDTSCP,                   //!< CPU has RDTSCP.
+      kRTM,                      //!< CPU has RTM.
+      kSERIALIZE,                //!< CPU has SERIALIZE.
+      kSHA,                      //!< CPU has SHA              (SHA-1 and SHA-256 instructions).
+      kSKINIT,                   //!< CPU has SKINIT           (SKINIT/STGI instructions) [AMD].
+      kSMAP,                     //!< CPU has SMAP             (supervisor-mode access prevention).
+      kSMEP,                     //!< CPU has SMEP             (supervisor-mode execution prevention).
+      kSMX,                      //!< CPU has SMX              (safer mode extensions).
+      kSNP,                      //!< CPU has SNP.
+      kSSE,                      //!< CPU has SSE.
+      kSSE2,                     //!< CPU has SSE2.
+      kSSE3,                     //!< CPU has SSE3.
+      kSSE4_1,                   //!< CPU has SSE4.1.
+      kSSE4_2,                   //!< CPU has SSE4.2.
+      kSSE4A,                    //!< CPU has SSE4A [AMD].
+      kSSSE3,                    //!< CPU has SSSE3.
+      kSVM,                      //!< CPU has SVM              (virtualization) [AMD].
+      kTBM,                      //!< CPU has TBM              (trailing bit manipulation) [AMD].
+      kTSX,                      //!< CPU has TSX.
+      kTSXLDTRK,                 //!< CPU has TSXLDTRK.
+      kUINTR,                    //!< CPU has UINTR            (user interrupts).
+      kVAES,                     //!< CPU has VAES             (vector AES 256|512 bit support).
+      kVMX,                      //!< CPU has VMX              (virtualization) [INTEL].
+      kVPCLMULQDQ,               //!< CPU has VPCLMULQDQ       (vector PCLMULQDQ 256|512-bit support).
+      kWAITPKG,                  //!< CPU has WAITPKG          (UMONITOR, UMWAIT, TPAUSE).
+      kWBNOINVD,                 //!< CPU has WBNOINVD.
+      kXOP,                      //!< CPU has XOP              (XOP instructions) [AMD].
+      kXSAVE,                    //!< CPU has XSAVE.
+      kXSAVEC,                   //!< CPU has XSAVEC.
+      kXSAVEOPT,                 //!< CPU has XSAVEOPT.
+      kXSAVES,                   //!< CPU has XSAVES.
+      // @EnumValuesEnd@
+
+      kMaxValue = kXSAVES
+    };
+
+    #define ASMJIT_X86_FEATURE(FEATURE) \
+      inline bool has##FEATURE() const noexcept { return has(X86::k##FEATURE); }
+
+    ASMJIT_X86_FEATURE(MT)
+    ASMJIT_X86_FEATURE(NX)
+    ASMJIT_X86_FEATURE(3DNOW)
+    ASMJIT_X86_FEATURE(3DNOW2)
+    ASMJIT_X86_FEATURE(ADX)
+    ASMJIT_X86_FEATURE(AESNI)
+    ASMJIT_X86_FEATURE(ALTMOVCR8)
+    ASMJIT_X86_FEATURE(AMX_BF16)
+    ASMJIT_X86_FEATURE(AMX_INT8)
+    ASMJIT_X86_FEATURE(AMX_TILE)
+    ASMJIT_X86_FEATURE(AVX)
+    ASMJIT_X86_FEATURE(AVX2)
+    ASMJIT_X86_FEATURE(AVX512_4FMAPS)
+    ASMJIT_X86_FEATURE(AVX512_4VNNIW)
+    ASMJIT_X86_FEATURE(AVX512_BF16)
+    ASMJIT_X86_FEATURE(AVX512_BITALG)
+    ASMJIT_X86_FEATURE(AVX512_BW)
+    ASMJIT_X86_FEATURE(AVX512_CDI)
+    ASMJIT_X86_FEATURE(AVX512_DQ)
+    ASMJIT_X86_FEATURE(AVX512_ERI)
+    ASMJIT_X86_FEATURE(AVX512_F)
+    ASMJIT_X86_FEATURE(AVX512_FP16)
+    ASMJIT_X86_FEATURE(AVX512_IFMA)
+    ASMJIT_X86_FEATURE(AVX512_PFI)
+    ASMJIT_X86_FEATURE(AVX512_VBMI)
+    ASMJIT_X86_FEATURE(AVX512_VBMI2)
+    ASMJIT_X86_FEATURE(AVX512_VL)
+    ASMJIT_X86_FEATURE(AVX512_VNNI)
+    ASMJIT_X86_FEATURE(AVX512_VP2INTERSECT)
+    ASMJIT_X86_FEATURE(AVX512_VPOPCNTDQ)
+    ASMJIT_X86_FEATURE(AVX_VNNI)
+    ASMJIT_X86_FEATURE(BMI)
+    ASMJIT_X86_FEATURE(BMI2)
+    ASMJIT_X86_FEATURE(CET_IBT)
+    ASMJIT_X86_FEATURE(CET_SS)
+    ASMJIT_X86_FEATURE(CLDEMOTE)
+    ASMJIT_X86_FEATURE(CLFLUSH)
+    ASMJIT_X86_FEATURE(CLFLUSHOPT)
+    ASMJIT_X86_FEATURE(CLWB)
+    ASMJIT_X86_FEATURE(CLZERO)
+    ASMJIT_X86_FEATURE(CMOV)
+    ASMJIT_X86_FEATURE(CMPXCHG16B)
+    ASMJIT_X86_FEATURE(CMPXCHG8B)
+    ASMJIT_X86_FEATURE(ENCLV)
+    ASMJIT_X86_FEATURE(ENQCMD)
+    ASMJIT_X86_FEATURE(ERMS)
+    ASMJIT_X86_FEATURE(F16C)
+    ASMJIT_X86_FEATURE(FMA)
+    ASMJIT_X86_FEATURE(FMA4)
+    ASMJIT_X86_FEATURE(FPU)
+    ASMJIT_X86_FEATURE(FSGSBASE)
+    ASMJIT_X86_FEATURE(FXSR)
+    ASMJIT_X86_FEATURE(FXSROPT)
+    ASMJIT_X86_FEATURE(GEODE)
+    ASMJIT_X86_FEATURE(GFNI)
+    ASMJIT_X86_FEATURE(HLE)
+    ASMJIT_X86_FEATURE(HRESET)
+    ASMJIT_X86_FEATURE(I486)
+    ASMJIT_X86_FEATURE(LAHFSAHF)
+    ASMJIT_X86_FEATURE(LWP)
+    ASMJIT_X86_FEATURE(LZCNT)
+    ASMJIT_X86_FEATURE(MCOMMIT)
+    ASMJIT_X86_FEATURE(MMX)
+    ASMJIT_X86_FEATURE(MMX2)
+    ASMJIT_X86_FEATURE(MONITOR)
+    ASMJIT_X86_FEATURE(MONITORX)
+    ASMJIT_X86_FEATURE(MOVBE)
+    ASMJIT_X86_FEATURE(MOVDIR64B)
+    ASMJIT_X86_FEATURE(MOVDIRI)
+    ASMJIT_X86_FEATURE(MPX)
+    ASMJIT_X86_FEATURE(MSR)
+    ASMJIT_X86_FEATURE(MSSE)
+    ASMJIT_X86_FEATURE(OSXSAVE)
+    ASMJIT_X86_FEATURE(OSPKE)
+    ASMJIT_X86_FEATURE(PCLMULQDQ)
+    ASMJIT_X86_FEATURE(PCONFIG)
+    ASMJIT_X86_FEATURE(POPCNT)
+    ASMJIT_X86_FEATURE(PREFETCHW)
+    ASMJIT_X86_FEATURE(PREFETCHWT1)
+    ASMJIT_X86_FEATURE(PTWRITE)
+    ASMJIT_X86_FEATURE(RDPID)
+    ASMJIT_X86_FEATURE(RDPRU)
+    ASMJIT_X86_FEATURE(RDRAND)
+    ASMJIT_X86_FEATURE(RDSEED)
+    ASMJIT_X86_FEATURE(RDTSC)
+    ASMJIT_X86_FEATURE(RDTSCP)
+    ASMJIT_X86_FEATURE(RTM)
+    ASMJIT_X86_FEATURE(SERIALIZE)
+    ASMJIT_X86_FEATURE(SHA)
+    ASMJIT_X86_FEATURE(SKINIT)
+    ASMJIT_X86_FEATURE(SMAP)
+    ASMJIT_X86_FEATURE(SMEP)
+    ASMJIT_X86_FEATURE(SMX)
+    ASMJIT_X86_FEATURE(SNP)
+    ASMJIT_X86_FEATURE(SSE)
+    ASMJIT_X86_FEATURE(SSE2)
+    ASMJIT_X86_FEATURE(SSE3)
+    ASMJIT_X86_FEATURE(SSE4_1)
+    ASMJIT_X86_FEATURE(SSE4_2)
+    ASMJIT_X86_FEATURE(SSE4A)
+    ASMJIT_X86_FEATURE(SSSE3)
+    ASMJIT_X86_FEATURE(SVM)
+    ASMJIT_X86_FEATURE(TBM)
+    ASMJIT_X86_FEATURE(TSX)
+    ASMJIT_X86_FEATURE(TSXLDTRK)
+    ASMJIT_X86_FEATURE(UINTR)
+    ASMJIT_X86_FEATURE(VAES)
+    ASMJIT_X86_FEATURE(VMX)
+    ASMJIT_X86_FEATURE(VPCLMULQDQ)
+    ASMJIT_X86_FEATURE(WAITPKG)
+    ASMJIT_X86_FEATURE(WBNOINVD)
+    ASMJIT_X86_FEATURE(XOP)
+    ASMJIT_X86_FEATURE(XSAVE)
+    ASMJIT_X86_FEATURE(XSAVEC)
+    ASMJIT_X86_FEATURE(XSAVEOPT)
+    ASMJIT_X86_FEATURE(XSAVES)
+
+    #undef ASMJIT_X86_FEATURE
+  };
+
+  //! ARM specific features data.
+  struct ARM : public Data {
+    //! ARM CPU feature identifiers.
+    enum Id : uint8_t {
+      // @EnumValuesBegin{"enum": "CpuFeatures::ARM"}@
+      kNone = 0,                 //!< No feature (never set, used internally).
+      kTHUMB,                    //!< THUMB v1 ISA.
+      kTHUMBv2,                  //!< THUMB v2 ISA.
+
+      kARMv6,                    //!< ARMv6 ISA.
+      kARMv7,                    //!< ARMv7 ISA.
+      kARMv8a,                   //!< ARMv8-A ISA.
+      kARMv8_1a,                 //!< ARMv8.1-A ISA.
+      kARMv8_2a,                 //!< ARMv8.2-A ISA.
+      kARMv8_3a,                 //!< ARMv8.3-A ISA.
+      kARMv8_4a,                 //!< ARMv8.4-A ISA.
+      kARMv8_5a,                 //!< ARMv8.5-A ISA.
+      kARMv8_6a,                 //!< ARMv8.6-A ISA.
+      kARMv8_7a,                 //!< ARMv8.7-A ISA.
+
+      kVFPv2,                    //!< CPU has VFPv2 instruction set.
+      kVFPv3,                    //!< CPU has VFPv3 instruction set.
+      kVFPv4,                    //!< CPU has VFPv4 instruction set.
+      kVFP_D32,                  //!< CPU has 32 VFP-D (64-bit) registers.
+
+      kAES,                      //!< CPU has AES (AArch64 only).
+      kALTNZCV,                  //!< CPU has ALTNZCV (AArch64 only).
+      kASIMD,                    //!< CPU has Advanced SIMD (NEON on ARM/THUMB).
+      kBF16,                     //!< CPU has BF16 (AArch64 only).
+      kBTI,                      //!< CPU has BTI (branch target identification).
+      kCPUID,                    //!< CPU has accessible CPUID register (ID_AA64ZFR0_EL1).
+      kCRC32,                    //!< CPU has CRC32 .
+      kDGH,                      //!< CPU has DGH (AArch64 only).
+      kDIT,                      //!< CPU has data independent timing instructions (DIT).
+      kDOTPROD,                  //!< CPU has DOTPROD (SDOT/UDOT).
+      kEDSP,                     //!< CPU has EDSP (ARM/THUMB only).
+      kFCMA,                     //!< CPU has FCMA (FCADD/FCMLA).
+      kFJCVTZS,                  //!< CPU has FJCVTZS (AArch64 only).
+      kFLAGM,                    //!< CPU has FLAGM (AArch64 only).
+      kFP16CONV,                 //!< CPU has FP16 (half-float) conversion.
+      kFP16FML,                  //!< CPU has FMLAL{2}/FMLSL{2}
+      kFP16FULL,                 //!< CPU has full support for FP16.
+      kFRINT,                    //!< CPU has FRINT[32|64][X|Z] (AArch64 only).
+      kI8MM,                     //!< CPU has I8MM (AArch64 only).
+      kIDIVA,                    //!< CPU has hardware SDIV and UDIV (ARM mode).
+      kIDIVT,                    //!< CPU has hardware SDIV and UDIV (THUMB mode).
+      kLSE,                      //!< CPU has large system extensions (LSE) (AArch64 only).
+      kMTE,                      //!< CPU has MTE (AArch64 only).
+      kRCPC_IMMO,                //!< CPU has RCPC_IMMO (AArch64 only).
+      kRDM,                      //!< CPU has RDM (AArch64 only).
+      kPMU,                      //!< CPU has PMU (AArch64 only).
+      kPMULL,                    //!< CPU has PMULL (AArch64 only).
+      kRNG,                      //!< CPU has random number generation (RNG).
+      kSB,                       //!< CPU has speculative barrier SB (AArch64 only).
+      kSHA1,                     //!< CPU has SHA1.
+      kSHA2,                     //!< CPU has SHA2.
+      kSHA3,                     //!< CPU has SHA3.
+      kSHA512,                   //!< CPU has SHA512.
+      kSM3,                      //!< CPU has SM3.
+      kSM4,                      //!< CPU has SM4.
+      kSSBS,                     //!< CPU has SSBS.
+      kSVE,                      //!< CPU has SVE (AArch64 only).
+      kSVE_BF16,                 //!< CPU has SVE-BF16 (AArch64 only).
+      kSVE_F32MM,                //!< CPU has SVE-F32MM (AArch64 only).
+      kSVE_F64MM,                //!< CPU has SVE-F64MM (AArch64 only).
+      kSVE_I8MM,                 //!< CPU has SVE-I8MM (AArch64 only).
+      kSVE_PMULL,                //!< CPU has SVE-PMULL (AArch64 only).
+      kSVE2,                     //!< CPU has SVE2 (AArch64 only).
+      kSVE2_AES,                 //!< CPU has SVE2-AES (AArch64 only).
+      kSVE2_BITPERM,             //!< CPU has SVE2-BITPERM (AArch64 only).
+      kSVE2_SHA3,                //!< CPU has SVE2-SHA3 (AArch64 only).
+      kSVE2_SM4,                 //!< CPU has SVE2-SM4 (AArch64 only).
+      kTME,                      //!< CPU has transactional memory extensions (TME).
+      // @EnumValuesEnd@
+
+      kMaxValue = kTME
+    };
+
+    #define ASMJIT_ARM_FEATURE(FEATURE) \
+      inline bool has##FEATURE() const noexcept { return has(ARM::k##FEATURE); }
+
+    ASMJIT_ARM_FEATURE(THUMB)
+    ASMJIT_ARM_FEATURE(THUMBv2)
+
+    ASMJIT_ARM_FEATURE(ARMv6)
+    ASMJIT_ARM_FEATURE(ARMv7)
+    ASMJIT_ARM_FEATURE(ARMv8a)
+    ASMJIT_ARM_FEATURE(ARMv8_1a)
+    ASMJIT_ARM_FEATURE(ARMv8_2a)
+    ASMJIT_ARM_FEATURE(ARMv8_3a)
+    ASMJIT_ARM_FEATURE(ARMv8_4a)
+    ASMJIT_ARM_FEATURE(ARMv8_5a)
+    ASMJIT_ARM_FEATURE(ARMv8_6a)
+    ASMJIT_ARM_FEATURE(ARMv8_7a)
+
+    ASMJIT_ARM_FEATURE(VFPv2)
+    ASMJIT_ARM_FEATURE(VFPv3)
+    ASMJIT_ARM_FEATURE(VFPv4)
+    ASMJIT_ARM_FEATURE(VFP_D32)
+
+    ASMJIT_ARM_FEATURE(AES)
+    ASMJIT_ARM_FEATURE(ALTNZCV)
+    ASMJIT_ARM_FEATURE(ASIMD)
+    ASMJIT_ARM_FEATURE(BF16)
+    ASMJIT_ARM_FEATURE(BTI)
+    ASMJIT_ARM_FEATURE(CPUID)
+    ASMJIT_ARM_FEATURE(CRC32)
+    ASMJIT_ARM_FEATURE(DGH)
+    ASMJIT_ARM_FEATURE(DIT)
+    ASMJIT_ARM_FEATURE(DOTPROD)
+    ASMJIT_ARM_FEATURE(EDSP)
+    ASMJIT_ARM_FEATURE(FCMA)
+    ASMJIT_ARM_FEATURE(FLAGM)
+    ASMJIT_ARM_FEATURE(FP16CONV)
+    ASMJIT_ARM_FEATURE(FP16FML)
+    ASMJIT_ARM_FEATURE(FP16FULL)
+    ASMJIT_ARM_FEATURE(FRINT)
+    ASMJIT_ARM_FEATURE(IDIVA)
+    ASMJIT_ARM_FEATURE(IDIVT)
+    ASMJIT_ARM_FEATURE(LSE)
+    ASMJIT_ARM_FEATURE(MTE)
+    ASMJIT_ARM_FEATURE(FJCVTZS)
+    ASMJIT_ARM_FEATURE(I8MM)
+    ASMJIT_ARM_FEATURE(RCPC_IMMO)
+    ASMJIT_ARM_FEATURE(RDM)
+    ASMJIT_ARM_FEATURE(PMU)
+    ASMJIT_ARM_FEATURE(PMULL)
+    ASMJIT_ARM_FEATURE(RNG)
+    ASMJIT_ARM_FEATURE(SB)
+    ASMJIT_ARM_FEATURE(SHA1)
+    ASMJIT_ARM_FEATURE(SHA2)
+    ASMJIT_ARM_FEATURE(SHA3)
+    ASMJIT_ARM_FEATURE(SHA512)
+    ASMJIT_ARM_FEATURE(SM3)
+    ASMJIT_ARM_FEATURE(SM4)
+    ASMJIT_ARM_FEATURE(SSBS)
+    ASMJIT_ARM_FEATURE(SVE)
+    ASMJIT_ARM_FEATURE(SVE_BF16)
+    ASMJIT_ARM_FEATURE(SVE_F32MM)
+    ASMJIT_ARM_FEATURE(SVE_F64MM)
+    ASMJIT_ARM_FEATURE(SVE_I8MM)
+    ASMJIT_ARM_FEATURE(SVE_PMULL)
+    ASMJIT_ARM_FEATURE(SVE2)
+    ASMJIT_ARM_FEATURE(SVE2_AES)
+    ASMJIT_ARM_FEATURE(SVE2_BITPERM)
+    ASMJIT_ARM_FEATURE(SVE2_SHA3)
+    ASMJIT_ARM_FEATURE(SVE2_SM4)
+    ASMJIT_ARM_FEATURE(TME)
+
+    #undef ASMJIT_ARM_FEATURE
+  };
+
+  static_assert(uint32_t(X86::kMaxValue) < kMaxFeatures, "The number of X86 CPU features cannot exceed CpuFeatures::kMaxFeatures");
+  static_assert(uint32_t(ARM::kMaxValue) < kMaxFeatures, "The number of ARM CPU features cannot exceed CpuFeatures::kMaxFeatures");
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  Data _data {};
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline CpuFeatures() noexcept {}
+  inline CpuFeatures(const CpuFeatures& other) noexcept = default;
+  inline explicit CpuFeatures(Globals::NoInit_) noexcept {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline CpuFeatures& operator=(const CpuFeatures& other) noexcept = default;
+
+  inline bool operator==(const CpuFeatures& other) noexcept { return  eq(other); }
+  inline bool operator!=(const CpuFeatures& other) noexcept { return !eq(other); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns true if there are no features set.
+  inline bool empty() const noexcept { return _data.empty(); }
+
+  //! Casts this base class into a derived type `T`.
+  template<typename T = Data>
+  inline T& data() noexcept { return static_cast<T&>(_data); }
+
+  //! Casts this base class into a derived type `T` (const).
+  template<typename T = Data>
+  inline const T& data() const noexcept { return static_cast<const T&>(_data); }
+
+  //! Returns CpuFeatures::Data as \ref CpuFeatures::X86.
+  inline X86& x86() noexcept { return data<X86>(); }
+  //! Returns CpuFeatures::Data as \ref CpuFeatures::X86 (const).
+  inline const X86& x86() const noexcept { return data<X86>(); }
+
+  //! Returns CpuFeatures::Data as \ref CpuFeatures::ARM.
+  inline ARM& arm() noexcept { return data<ARM>(); }
+  //! Returns CpuFeatures::Data as \ref CpuFeatures::ARM (const).
+  inline const ARM& arm() const noexcept { return data<ARM>(); }
+
+  //! Returns all features as array of bitwords (see \ref Support::BitWord).
+  inline BitWord* bits() noexcept { return _data.bits(); }
+  //! Returns all features as array of bitwords (const).
+  inline const BitWord* bits() const noexcept { return _data.bits(); }
+  //! Returns the number of BitWords returned by \ref bits().
+  inline size_t bitWordCount() const noexcept { return _data.bitWordCount(); }
+
+  //! Returns \ref Support::BitVectorIterator, that can be used to iterate over all features efficiently.
+  inline Iterator iterator() const noexcept { return _data.iterator(); }
+
+  //! Tests whether the feature `featureId` is present.
+  template<typename FeatureId>
+  inline bool has(const FeatureId& featureId) const noexcept { return _data.has(featureId); }
+
+  //! Tests whether all features as defined by `other` are present.
+  inline bool hasAll(const CpuFeatures& other) const noexcept { return _data.hasAll(other._data); }
+
+  //! \}
+
+  //! \name Manipulation
+  //! \{
+
+  inline void reset() noexcept { _data.reset(); }
+
+  //! Adds the given CPU `featureId` to the list of features.
+  template<typename... Args>
+  inline void add(Args&&... args) noexcept { return _data.add(std::forward<Args>(args)...); }
+
+  //! Adds the given CPU `featureId` to the list of features if `condition` is true.
+  template<typename... Args>
+  inline void addIf(bool condition, Args&&... args) noexcept { return _data.addIf(condition, std::forward<Args>(args)...); }
+
+  //! Removes the given CPU `featureId` from the list of features.
+  template<typename... Args>
+  inline void remove(Args&&... args) noexcept { return _data.remove(std::forward<Args>(args)...); }
+
+  //! Tests whether this CPU features matches `other`.
+  inline bool eq(const CpuFeatures& other) const noexcept { return _data.eq(other._data); }
+
+  //! \}
+};
+
+//! CPU information.
+class CpuInfo {
+public:
+  //! \name Members
+  //! \{
+
+  //! Architecture.
+  Arch _arch;
+  //! Sub-architecture.
+  SubArch _subArch;
+  //! True if the CPU was detected, false if the detection failed or it's not available.
+  bool _wasDetected;
+  //! Reserved for future use.
+  uint8_t _reserved;
+  //! CPU family ID.
+  uint32_t _familyId;
+  //! CPU model ID.
+  uint32_t _modelId;
+  //! CPU brand ID.
+  uint32_t _brandId;
+  //! CPU stepping.
+  uint32_t _stepping;
+  //! Processor type.
+  uint32_t _processorType;
+  //! Maximum number of addressable IDs for logical processors.
+  uint32_t _maxLogicalProcessors;
+  //! Cache line size (in bytes).
+  uint32_t _cacheLineSize;
+  //! Number of hardware threads.
+  uint32_t _hwThreadCount;
+
+  //! CPU vendor string.
+  FixedString<16> _vendor;
+  //! CPU brand string.
+  FixedString<64> _brand;
+  //! CPU features.
+  CpuFeatures _features;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline CpuInfo() noexcept { reset(); }
+  inline CpuInfo(const CpuInfo& other) noexcept = default;
+
+  inline explicit CpuInfo(Globals::NoInit_) noexcept
+    : _features(Globals::NoInit) {};
+
+  //! Returns the host CPU information.
+  ASMJIT_API static const CpuInfo& host() noexcept;
+
+  //! Initializes CpuInfo architecture and sub-architecture members to `arch` and `subArch`, respectively.
+  inline void initArch(Arch arch, SubArch subArch = SubArch::kUnknown) noexcept {
+    _arch = arch;
+    _subArch = subArch;
+  }
+
+  inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline CpuInfo& operator=(const CpuInfo& other) noexcept = default;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the CPU architecture this information relates to.
+  inline Arch arch() const noexcept { return _arch; }
+
+  //! Returns the CPU sub-architecture this information relates to.
+  inline SubArch subArch() const noexcept { return _subArch; }
+
+  //! Returns whether the CPU was detected successfully.
+  //!
+  //! If the returned value is false it means that AsmJit either failed to detect the CPU or it doesn't have
+  //! implementation targeting the host architecture and operating system.
+  inline bool wasDetected() const noexcept { return _wasDetected; }
+
+  //! Returns the CPU family ID.
+  //!
+  //! Family identifier matches the FamilyId read by using CPUID on X86 architecture.
+  inline uint32_t familyId() const noexcept { return _familyId; }
+
+  //! Returns the CPU model ID.
+  //!
+  //! Family identifier matches the ModelId read by using CPUID on X86 architecture.
+
+  inline uint32_t modelId() const noexcept { return _modelId; }
+  //! Returns the CPU brand id.
+  //!
+  //! Family identifier matches the BrandId read by using CPUID on X86 architecture.
+  inline uint32_t brandId() const noexcept { return _brandId; }
+
+  //! Returns the CPU stepping.
+  //!
+  //! Family identifier matches the Stepping information read by using CPUID on X86 architecture.
+  inline uint32_t stepping() const noexcept { return _stepping; }
+
+  //! Returns the processor type.
+  //!
+  //! Family identifier matches the ProcessorType read by using CPUID on X86 architecture.
+  inline uint32_t processorType() const noexcept { return _processorType; }
+
+  //! Returns the maximum number of logical processors.
+  inline uint32_t maxLogicalProcessors() const noexcept { return _maxLogicalProcessors; }
+
+  //! Returns the size of a cache line flush.
+  inline uint32_t cacheLineSize() const noexcept { return _cacheLineSize; }
+
+  //! Returns number of hardware threads available.
+  inline uint32_t hwThreadCount() const noexcept { return _hwThreadCount; }
+
+  //! Returns a CPU vendor string.
+  inline const char* vendor() const noexcept { return _vendor.str; }
+  //! Tests whether the CPU vendor string is equal to `s`.
+  inline bool isVendor(const char* s) const noexcept { return _vendor.eq(s); }
+
+  //! Returns a CPU brand string.
+  inline const char* brand() const noexcept { return _brand.str; }
+
+  //! Returns CPU features.
+  inline CpuFeatures& features() noexcept { return _features; }
+  //! Returns CPU features (const).
+  inline const CpuFeatures& features() const noexcept { return _features; }
+
+  //! Tests whether the CPU has the given `feature`.
+  template<typename FeatureId>
+  inline bool hasFeature(const FeatureId& featureId) const noexcept { return _features.has(featureId); }
+
+  //! Adds the given CPU `featureId` to the list of features.
+  template<typename... Args>
+  inline void addFeature(Args&&... args) noexcept { return _features.add(std::forward<Args>(args)...); }
+
+  //! Removes the given CPU `featureId` from the list of features.
+  template<typename... Args>
+  inline void removeFeature(Args&&... args) noexcept { return _features.remove(std::forward<Args>(args)...); }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_CPUINFO_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/emithelper.cpp b/lib/lepton/asmjit/core/emithelper.cpp
new file mode 100644
index 0000000000..bcdf098f48
--- /dev/null
+++ b/lib/lepton/asmjit/core/emithelper.cpp
@@ -0,0 +1,323 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/archtraits.h"
+#include "../core/emithelper_p.h"
+#include "../core/formatter.h"
+#include "../core/funcargscontext_p.h"
+#include "../core/radefs_p.h"
+
+// Can be used for debugging...
+// #define ASMJIT_DUMP_ARGS_ASSIGNMENT
+
+ASMJIT_BEGIN_NAMESPACE
+
+// BaseEmitHelper - Formatting
+// ===========================
+
+#ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT
+static void dumpFuncValue(String& sb, Arch arch, const FuncValue& value) noexcept {
+  Formatter::formatTypeId(sb, value.typeId());
+  sb.append('@');
+
+  if (value.isIndirect())
+    sb.append('[');
+
+  if (value.isReg())
+    Formatter::formatRegister(sb, 0, nullptr, arch, value.regType(), value.regId());
+  else if (value.isStack())
+    sb.appendFormat("[%d]", value.stackOffset());
+  else
+    sb.append("<none>");
+
+  if (value.isIndirect())
+    sb.append(']');
+}
+
+static void dumpAssignment(String& sb, const FuncArgsContext& ctx) noexcept {
+  typedef FuncArgsContext::Var Var;
+
+  Arch arch = ctx.arch();
+  uint32_t varCount = ctx.varCount();
+
+  for (uint32_t i = 0; i < varCount; i++) {
+    const Var& var = ctx.var(i);
+    const FuncValue& dst = var.out;
+    const FuncValue& cur = var.cur;
+
+    sb.appendFormat("Var%u: ", i);
+    dumpFuncValue(sb, arch, dst);
+    sb.append(" <- ");
+    dumpFuncValue(sb, arch, cur);
+
+    if (var.isDone())
+      sb.append(" {Done}");
+
+    sb.append('\n');
+  }
+}
+#endif
+
+// BaseEmitHelper - EmitArgsAssignment
+// ===================================
+
+ASMJIT_FAVOR_SIZE Error BaseEmitHelper::emitArgsAssignment(const FuncFrame& frame, const FuncArgsAssignment& args) {
+  typedef FuncArgsContext::Var Var;
+  typedef FuncArgsContext::WorkData WorkData;
+
+  enum WorkFlags : uint32_t {
+    kWorkNone      = 0x00,
+    kWorkDidSome   = 0x01,
+    kWorkPending   = 0x02,
+    kWorkPostponed = 0x04
+  };
+
+  Arch arch = frame.arch();
+  const ArchTraits& archTraits = ArchTraits::byArch(arch);
+
+  RAConstraints constraints;
+  FuncArgsContext ctx;
+
+  ASMJIT_PROPAGATE(constraints.init(arch));
+  ASMJIT_PROPAGATE(ctx.initWorkData(frame, args, &constraints));
+
+#ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT
+  {
+    String sb;
+    dumpAssignment(sb, ctx);
+    printf("%s\n", sb.data());
+  }
+#endif
+
+  auto& workData = ctx._workData;
+  uint32_t varCount = ctx._varCount;
+  uint32_t saVarId = ctx._saVarId;
+
+  BaseReg sp = BaseReg(_emitter->_gpSignature, archTraits.spRegId());
+  BaseReg sa = sp;
+
+  if (frame.hasDynamicAlignment()) {
+    if (frame.hasPreservedFP())
+      sa.setId(archTraits.fpRegId());
+    else
+      sa.setId(saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId());
+  }
+
+  // Register to stack and stack to stack moves must be first as now we have
+  // the biggest chance of having as many as possible unassigned registers.
+
+  if (ctx._stackDstMask) {
+    // Base address of all arguments passed by stack.
+    BaseMem baseArgPtr(sa, int32_t(frame.saOffset(sa.id())));
+    BaseMem baseStackPtr(sp, 0);
+
+    for (uint32_t varId = 0; varId < varCount; varId++) {
+      Var& var = ctx._vars[varId];
+
+      if (!var.out.isStack())
+        continue;
+
+      FuncValue& cur = var.cur;
+      FuncValue& out = var.out;
+
+      ASMJIT_ASSERT(cur.isReg() || cur.isStack());
+      BaseReg reg;
+
+      BaseMem dstStackPtr = baseStackPtr.cloneAdjusted(out.stackOffset());
+      BaseMem srcStackPtr = baseArgPtr.cloneAdjusted(cur.stackOffset());
+
+      if (cur.isIndirect()) {
+        if (cur.isStack()) {
+          // TODO: Indirect stack.
+          return DebugUtils::errored(kErrorInvalidAssignment);
+        }
+        else {
+          srcStackPtr.setBaseId(cur.regId());
+        }
+      }
+
+      if (cur.isReg() && !cur.isIndirect()) {
+        WorkData& wd = workData[archTraits.regTypeToGroup(cur.regType())];
+        uint32_t regId = cur.regId();
+
+        reg.setSignatureAndId(archTraits.regTypeToSignature(cur.regType()), regId);
+        wd.unassign(varId, regId);
+      }
+      else {
+        // Stack to reg move - tricky since we move stack to stack we can decide which register to use. In general
+        // we follow the rule that IntToInt moves will use GP regs with possibility to signature or zero extend,
+        // and all other moves will either use GP or VEC regs depending on the size of the move.
+        OperandSignature signature = getSuitableRegForMemToMemMove(arch, out.typeId(), cur.typeId());
+        if (ASMJIT_UNLIKELY(!signature.isValid()))
+          return DebugUtils::errored(kErrorInvalidState);
+
+        WorkData& wd = workData[signature.regGroup()];
+        RegMask availableRegs = wd.availableRegs();
+        if (ASMJIT_UNLIKELY(!availableRegs))
+          return DebugUtils::errored(kErrorInvalidState);
+
+        uint32_t availableId = Support::ctz(availableRegs);
+        reg.setSignatureAndId(signature, availableId);
+
+        ASMJIT_PROPAGATE(emitArgMove(reg, out.typeId(), srcStackPtr, cur.typeId()));
+      }
+
+      if (cur.isIndirect() && cur.isReg())
+        workData[RegGroup::kGp].unassign(varId, cur.regId());
+
+      // Register to stack move.
+      ASMJIT_PROPAGATE(emitRegMove(dstStackPtr, reg, cur.typeId()));
+      var.markDone();
+    }
+  }
+
+  // Shuffle all registers that are currently assigned accordingly to target assignment.
+
+  uint32_t workFlags = kWorkNone;
+  for (;;) {
+    for (uint32_t varId = 0; varId < varCount; varId++) {
+      Var& var = ctx._vars[varId];
+      if (var.isDone() || !var.cur.isReg())
+        continue;
+
+      FuncValue& cur = var.cur;
+      FuncValue& out = var.out;
+
+      RegGroup curGroup = archTraits.regTypeToGroup(cur.regType());
+      RegGroup outGroup = archTraits.regTypeToGroup(out.regType());
+
+      uint32_t curId = cur.regId();
+      uint32_t outId = out.regId();
+
+      if (curGroup != outGroup) {
+        // TODO: Conversion is not supported.
+        return DebugUtils::errored(kErrorInvalidAssignment);
+      }
+      else {
+        WorkData& wd = workData[outGroup];
+        if (!wd.isAssigned(outId)) {
+EmitMove:
+          ASMJIT_PROPAGATE(
+            emitArgMove(
+              BaseReg(archTraits.regTypeToSignature(out.regType()), outId), out.typeId(),
+              BaseReg(archTraits.regTypeToSignature(cur.regType()), curId), cur.typeId()));
+
+          wd.reassign(varId, outId, curId);
+          cur.initReg(out.regType(), outId, out.typeId());
+
+          if (outId == out.regId())
+            var.markDone();
+          workFlags |= kWorkDidSome | kWorkPending;
+        }
+        else {
+          uint32_t altId = wd._physToVarId[outId];
+          Var& altVar = ctx._vars[altId];
+
+          if (!altVar.out.isInitialized() || (altVar.out.isReg() && altVar.out.regId() == curId)) {
+            // Only few architectures provide swap operations, and only for few register groups.
+            if (archTraits.hasInstRegSwap(curGroup)) {
+              RegType highestType = Support::max(cur.regType(), altVar.cur.regType());
+              if (Support::isBetween(highestType, RegType::kGp8Lo, RegType::kGp16))
+                highestType = RegType::kGp32;
+
+              OperandSignature signature = archTraits.regTypeToSignature(highestType);
+              ASMJIT_PROPAGATE(
+                emitRegSwap(BaseReg(signature, outId), BaseReg(signature, curId)));
+
+              wd.swap(varId, curId, altId, outId);
+              cur.setRegId(outId);
+              var.markDone();
+              altVar.cur.setRegId(curId);
+
+              if (altVar.out.isInitialized())
+                altVar.markDone();
+              workFlags |= kWorkDidSome;
+            }
+            else {
+              // If there is a scratch register it can be used to perform the swap.
+              RegMask availableRegs = wd.availableRegs();
+              if (availableRegs) {
+                RegMask inOutRegs = wd.dstRegs();
+                if (availableRegs & ~inOutRegs)
+                  availableRegs &= ~inOutRegs;
+                outId = Support::ctz(availableRegs);
+                goto EmitMove;
+              }
+              else {
+                workFlags |= kWorkPending;
+              }
+            }
+          }
+          else {
+            workFlags |= kWorkPending;
+          }
+        }
+      }
+    }
+
+    if (!(workFlags & kWorkPending))
+      break;
+
+    // If we did nothing twice it means that something is really broken.
+    if ((workFlags & (kWorkDidSome | kWorkPostponed)) == kWorkPostponed)
+      return DebugUtils::errored(kErrorInvalidState);
+
+    workFlags = (workFlags & kWorkDidSome) ? kWorkNone : kWorkPostponed;
+  }
+
+  // Load arguments passed by stack into registers. This is pretty simple and
+  // it never requires multiple iterations like the previous phase.
+
+  if (ctx._hasStackSrc) {
+    uint32_t iterCount = 1;
+    if (frame.hasDynamicAlignment() && !frame.hasPreservedFP())
+      sa.setId(saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId());
+
+    // Base address of all arguments passed by stack.
+    BaseMem baseArgPtr(sa, int32_t(frame.saOffset(sa.id())));
+
+    for (uint32_t iter = 0; iter < iterCount; iter++) {
+      for (uint32_t varId = 0; varId < varCount; varId++) {
+        Var& var = ctx._vars[varId];
+        if (var.isDone())
+          continue;
+
+        if (var.cur.isStack()) {
+          ASMJIT_ASSERT(var.out.isReg());
+
+          uint32_t outId = var.out.regId();
+          RegType outType = var.out.regType();
+
+          RegGroup group = archTraits.regTypeToGroup(outType);
+          WorkData& wd = workData[group];
+
+          if (outId == sa.id() && group == RegGroup::kGp) {
+            // This register will be processed last as we still need `saRegId`.
+            if (iterCount == 1) {
+              iterCount++;
+              continue;
+            }
+            wd.unassign(wd._physToVarId[outId], outId);
+          }
+
+          BaseReg dstReg = BaseReg(archTraits.regTypeToSignature(outType), outId);
+          BaseMem srcMem = baseArgPtr.cloneAdjusted(var.cur.stackOffset());
+
+          ASMJIT_PROPAGATE(emitArgMove(
+            dstReg, var.out.typeId(),
+            srcMem, var.cur.typeId()));
+
+          wd.assign(varId, outId);
+          var.cur.initReg(outType, outId, var.cur.typeId(), FuncValue::kFlagIsDone);
+        }
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/emithelper_p.h b/lib/lepton/asmjit/core/emithelper_p.h
new file mode 100644
index 0000000000..0333959e14
--- /dev/null
+++ b/lib/lepton/asmjit/core/emithelper_p.h
@@ -0,0 +1,58 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_EMITHELPER_P_H_INCLUDED
+#define ASMJIT_CORE_EMITHELPER_P_H_INCLUDED
+
+#include "../core/emitter.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_core
+//! \{
+
+//! Helper class that provides utilities for each supported architecture.
+class BaseEmitHelper {
+public:
+  BaseEmitter* _emitter;
+
+  inline explicit BaseEmitHelper(BaseEmitter* emitter = nullptr) noexcept
+    : _emitter(emitter) {}
+
+  inline BaseEmitter* emitter() const noexcept { return _emitter; }
+  inline void setEmitter(BaseEmitter* emitter) noexcept { _emitter = emitter; }
+
+  //! Emits a pure move operation between two registers or the same type or between a register and its home
+  //! slot. This function does not handle register conversion.
+  virtual Error emitRegMove(
+    const Operand_& dst_,
+    const Operand_& src_, TypeId typeId, const char* comment = nullptr) = 0;
+
+  //! Emits swap between two registers.
+  virtual Error emitRegSwap(
+    const BaseReg& a,
+    const BaseReg& b, const char* comment = nullptr) = 0;
+
+  //! Emits move from a function argument (either register or stack) to a register.
+  //!
+  //! This function can handle the necessary conversion from one argument to another, and from one register type
+  //! to another, if it's possible. Any attempt of conversion that requires third register of a different group
+  //! (for example conversion from K to MMX on X86/X64) will fail.
+  virtual Error emitArgMove(
+    const BaseReg& dst_, TypeId dstTypeId,
+    const Operand_& src_, TypeId srcTypeId, const char* comment = nullptr) = 0;
+
+  Error emitArgsAssignment(const FuncFrame& frame, const FuncArgsAssignment& args);
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_EMITHELPER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/emitter.cpp b/lib/lepton/asmjit/core/emitter.cpp
new file mode 100644
index 0000000000..92d67a7e5b
--- /dev/null
+++ b/lib/lepton/asmjit/core/emitter.cpp
@@ -0,0 +1,333 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/emitterutils_p.h"
+#include "../core/errorhandler.h"
+#include "../core/logger.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// BaseEmitter - Construction & Destruction
+// ========================================
+
+BaseEmitter::BaseEmitter(EmitterType emitterType) noexcept
+  : _emitterType(emitterType) {}
+
+BaseEmitter::~BaseEmitter() noexcept {
+  if (_code) {
+    _addEmitterFlags(EmitterFlags::kDestroyed);
+    _code->detach(this);
+  }
+}
+
+// BaseEmitter - Finalize
+// ======================
+
+Error BaseEmitter::finalize() {
+  // Does nothing by default, overridden by `BaseBuilder` and `BaseCompiler`.
+  return kErrorOk;
+}
+
+// BaseEmitter - Internals
+// =======================
+
+static constexpr EmitterFlags kEmitterPreservedFlags = EmitterFlags::kOwnLogger | EmitterFlags::kOwnErrorHandler;
+
+static ASMJIT_NOINLINE void BaseEmitter_updateForcedOptions(BaseEmitter* self) noexcept {
+  bool emitComments = false;
+  bool hasDiagnosticOptions = false;
+
+  if (self->emitterType() == EmitterType::kAssembler) {
+    // Assembler: Don't emit comments if logger is not attached.
+    emitComments = self->_code != nullptr && self->_logger != nullptr;
+    hasDiagnosticOptions = self->hasDiagnosticOption(DiagnosticOptions::kValidateAssembler);
+  }
+  else {
+    // Builder/Compiler: Always emit comments, we cannot assume they won't be used.
+    emitComments = self->_code != nullptr;
+    hasDiagnosticOptions = self->hasDiagnosticOption(DiagnosticOptions::kValidateIntermediate);
+  }
+
+  if (emitComments)
+    self->_addEmitterFlags(EmitterFlags::kLogComments);
+  else
+    self->_clearEmitterFlags(EmitterFlags::kLogComments);
+
+  // The reserved option tells emitter (Assembler/Builder/Compiler) that there may be either a border
+  // case (CodeHolder not attached, for example) or that logging or validation is required.
+  if (self->_code == nullptr || self->_logger || hasDiagnosticOptions)
+    self->_forcedInstOptions |= InstOptions::kReserved;
+  else
+    self->_forcedInstOptions &= ~InstOptions::kReserved;
+}
+
+// BaseEmitter - Diagnostic Options
+// ================================
+
+void BaseEmitter::addDiagnosticOptions(DiagnosticOptions options) noexcept {
+  _diagnosticOptions |= options;
+  BaseEmitter_updateForcedOptions(this);
+}
+
+void BaseEmitter::clearDiagnosticOptions(DiagnosticOptions options) noexcept {
+  _diagnosticOptions &= ~options;
+  BaseEmitter_updateForcedOptions(this);
+}
+
+// BaseEmitter - Logging
+// =====================
+
+void BaseEmitter::setLogger(Logger* logger) noexcept {
+#ifndef ASMJIT_NO_LOGGING
+  if (logger) {
+    _logger = logger;
+    _addEmitterFlags(EmitterFlags::kOwnLogger);
+  }
+  else {
+    _logger = nullptr;
+    _clearEmitterFlags(EmitterFlags::kOwnLogger);
+    if (_code)
+      _logger = _code->logger();
+  }
+  BaseEmitter_updateForcedOptions(this);
+#else
+  DebugUtils::unused(logger);
+#endif
+}
+
+// BaseEmitter - Error Handling
+// ============================
+
+void BaseEmitter::setErrorHandler(ErrorHandler* errorHandler) noexcept {
+  if (errorHandler) {
+    _errorHandler = errorHandler;
+    _addEmitterFlags(EmitterFlags::kOwnErrorHandler);
+  }
+  else {
+    _errorHandler = nullptr;
+    _clearEmitterFlags(EmitterFlags::kOwnErrorHandler);
+    if (_code)
+      _errorHandler = _code->errorHandler();
+  }
+}
+
+Error BaseEmitter::reportError(Error err, const char* message) {
+  ErrorHandler* eh = _errorHandler;
+  if (eh) {
+    if (!message)
+      message = DebugUtils::errorAsString(err);
+    eh->handleError(err, message, this);
+  }
+  return err;
+}
+
+// BaseEmitter - Labels
+// ====================
+
+Label BaseEmitter::labelByName(const char* name, size_t nameSize, uint32_t parentId) noexcept {
+  return Label(_code ? _code->labelIdByName(name, nameSize, parentId) : Globals::kInvalidId);
+}
+
+bool BaseEmitter::isLabelValid(uint32_t labelId) const noexcept {
+  return _code && labelId < _code->labelCount();
+}
+
+// BaseEmitter - Emit (Low-Level)
+// ==============================
+
+using EmitterUtils::noExt;
+
+Error BaseEmitter::_emitI(InstId instId) {
+  return _emit(instId, noExt[0], noExt[1], noExt[2], noExt);
+}
+
+Error BaseEmitter::_emitI(InstId instId, const Operand_& o0) {
+  return _emit(instId, o0, noExt[1], noExt[2], noExt);
+}
+
+Error BaseEmitter::_emitI(InstId instId, const Operand_& o0, const Operand_& o1) {
+  return _emit(instId, o0, o1, noExt[2], noExt);
+}
+
+Error BaseEmitter::_emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2) {
+  return _emit(instId, o0, o1, o2, noExt);
+}
+
+Error BaseEmitter::_emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) {
+  Operand_ opExt[3] = { o3 };
+  return _emit(instId, o0, o1, o2, opExt);
+}
+
+Error BaseEmitter::_emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4) {
+  Operand_ opExt[3] = { o3, o4 };
+  return _emit(instId, o0, o1, o2, opExt);
+}
+
+Error BaseEmitter::_emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5) {
+  Operand_ opExt[3] = { o3, o4, o5 };
+  return _emit(instId, o0, o1, o2, opExt);
+}
+
+Error BaseEmitter::_emitOpArray(InstId instId, const Operand_* operands, size_t opCount) {
+  const Operand_* op = operands;
+  Operand_ opExt[3];
+
+  switch (opCount) {
+    case 0:
+      return _emit(instId, noExt[0], noExt[1], noExt[2], noExt);
+
+    case 1:
+      return _emit(instId, op[0], noExt[1], noExt[2], noExt);
+
+    case 2:
+      return _emit(instId, op[0], op[1], noExt[2], noExt);
+
+    case 3:
+      return _emit(instId, op[0], op[1], op[2], noExt);
+
+    case 4:
+      opExt[0] = op[3];
+      opExt[1].reset();
+      opExt[2].reset();
+      return _emit(instId, op[0], op[1], op[2], opExt);
+
+    case 5:
+      opExt[0] = op[3];
+      opExt[1] = op[4];
+      opExt[2].reset();
+      return _emit(instId, op[0], op[1], op[2], opExt);
+
+    case 6:
+      return _emit(instId, op[0], op[1], op[2], op + 3);
+
+    default:
+      return DebugUtils::errored(kErrorInvalidArgument);
+  }
+}
+
+// BaseEmitter - Emit Utilities
+// ============================
+
+Error BaseEmitter::emitProlog(const FuncFrame& frame) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  return _funcs.emitProlog(this, frame);
+}
+
+Error BaseEmitter::emitEpilog(const FuncFrame& frame) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  return _funcs.emitEpilog(this, frame);
+}
+
+Error BaseEmitter::emitArgsAssignment(const FuncFrame& frame, const FuncArgsAssignment& args) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  return _funcs.emitArgsAssignment(this, frame, args);
+}
+
+// BaseEmitter - Comment
+// =====================
+
+Error BaseEmitter::commentf(const char* fmt, ...) {
+  if (!hasEmitterFlag(EmitterFlags::kLogComments)) {
+    if (!hasEmitterFlag(EmitterFlags::kAttached))
+      return reportError(DebugUtils::errored(kErrorNotInitialized));
+    return kErrorOk;
+  }
+
+#ifndef ASMJIT_NO_LOGGING
+  StringTmp<1024> sb;
+
+  va_list ap;
+  va_start(ap, fmt);
+  Error err = sb.appendVFormat(fmt, ap);
+  va_end(ap);
+
+  ASMJIT_PROPAGATE(err);
+  return comment(sb.data(), sb.size());
+#else
+  DebugUtils::unused(fmt);
+  return kErrorOk;
+#endif
+}
+
+Error BaseEmitter::commentv(const char* fmt, va_list ap) {
+  if (!hasEmitterFlag(EmitterFlags::kLogComments)) {
+    if (!hasEmitterFlag(EmitterFlags::kAttached))
+      return reportError(DebugUtils::errored(kErrorNotInitialized));
+    return kErrorOk;
+  }
+
+#ifndef ASMJIT_NO_LOGGING
+  StringTmp<1024> sb;
+  Error err = sb.appendVFormat(fmt, ap);
+
+  ASMJIT_PROPAGATE(err);
+  return comment(sb.data(), sb.size());
+#else
+  DebugUtils::unused(fmt, ap);
+  return kErrorOk;
+#endif
+}
+
+// BaseEmitter - Events
+// ====================
+
+Error BaseEmitter::onAttach(CodeHolder* code) noexcept {
+  _code = code;
+  _environment = code->environment();
+  _addEmitterFlags(EmitterFlags::kAttached);
+
+  const ArchTraits& archTraits = ArchTraits::byArch(code->arch());
+  RegType nativeRegType = Environment::is32Bit(code->arch()) ? RegType::kGp32 : RegType::kGp64;
+  _gpSignature = archTraits.regTypeToSignature(nativeRegType);
+
+  onSettingsUpdated();
+  return kErrorOk;
+}
+
+Error BaseEmitter::onDetach(CodeHolder* code) noexcept {
+  DebugUtils::unused(code);
+
+  if (!hasOwnLogger())
+    _logger = nullptr;
+
+  if (!hasOwnErrorHandler())
+    _errorHandler = nullptr;
+
+  _clearEmitterFlags(~kEmitterPreservedFlags);
+  _forcedInstOptions = InstOptions::kReserved;
+  _privateData = 0;
+
+  _environment.reset();
+  _gpSignature.reset();
+
+  _instOptions = InstOptions::kNone;
+  _extraReg.reset();
+  _inlineComment = nullptr;
+
+  return kErrorOk;
+}
+
+void BaseEmitter::onSettingsUpdated() noexcept {
+  // Only called when attached to CodeHolder by CodeHolder.
+  ASMJIT_ASSERT(_code != nullptr);
+
+  if (!hasOwnLogger())
+    _logger = _code->logger();
+
+  if (!hasOwnErrorHandler())
+    _errorHandler = _code->errorHandler();
+
+  BaseEmitter_updateForcedOptions(this);
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/emitter.h b/lib/lepton/asmjit/core/emitter.h
new file mode 100644
index 0000000000..b8afd6b8e0
--- /dev/null
+++ b/lib/lepton/asmjit/core/emitter.h
@@ -0,0 +1,741 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_EMITTER_H_INCLUDED
+#define ASMJIT_CORE_EMITTER_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/codeholder.h"
+#include "../core/formatter.h"
+#include "../core/inst.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+class ConstPool;
+class FuncFrame;
+class FuncArgsAssignment;
+
+//! Align mode, used by \ref BaseEmitter::align().
+enum class AlignMode : uint8_t {
+  //! Align executable code.
+  kCode = 0,
+  //! Align non-executable code.
+  kData = 1,
+  //! Align by a sequence of zeros.
+  kZero = 2,
+
+  //! Maximum value of `AlignMode`.
+  kMaxValue = kZero
+};
+
+//! Emitter type used by \ref BaseEmitter.
+enum class EmitterType : uint8_t {
+  //! Unknown or uninitialized.
+  kNone = 0,
+  //! Emitter inherits from \ref BaseAssembler.
+  kAssembler = 1,
+  //! Emitter inherits from \ref BaseBuilder.
+  kBuilder = 2,
+  //! Emitter inherits from \ref BaseCompiler.
+  kCompiler = 3,
+
+  //! Maximum value of `EmitterType`.
+  kMaxValue = kCompiler
+};
+
+//! Emitter flags, used by \ref BaseEmitter.
+enum class EmitterFlags : uint8_t {
+  //! No flags.
+  kNone = 0u,
+  //! Emitter is attached to CodeHolder.
+  kAttached = 0x01u,
+  //! The emitter must emit comments.
+  kLogComments = 0x08u,
+  //! The emitter has its own \ref Logger (not propagated from \ref CodeHolder).
+  kOwnLogger = 0x10u,
+  //! The emitter has its own \ref ErrorHandler (not propagated from \ref CodeHolder).
+  kOwnErrorHandler = 0x20u,
+  //! The emitter was finalized.
+  kFinalized = 0x40u,
+  //! The emitter was destroyed.
+  //!
+  //! This flag is used for a very short time when an emitter is being destroyed by
+  //! CodeHolder.
+  kDestroyed = 0x80u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(EmitterFlags)
+
+//! Encoding options.
+enum class EncodingOptions : uint32_t {
+  //! No encoding options.
+  kNone = 0,
+
+  //! Emit instructions that are optimized for size, if possible.
+  //!
+  //! Default: false.
+  //!
+  //! X86 Specific
+  //! ------------
+  //!
+  //! When this option is set it the assembler will try to fix instructions if possible into operation equivalent
+  //! instructions that take less bytes by taking advantage of implicit zero extension. For example instruction
+  //! like `mov r64, imm` and `and r64, imm` can be translated to `mov r32, imm` and `and r32, imm` when the
+  //! immediate constant is lesser than `2^31`.
+  kOptimizeForSize = 0x00000001u,
+
+  //! Emit optimized code-alignment sequences.
+  //!
+  //! Default: false.
+  //!
+  //! X86 Specific
+  //! ------------
+  //!
+  //! Default align sequence used by X86 architecture is one-byte (0x90) opcode that is often shown by disassemblers
+  //! as NOP. However there are more optimized align sequences for 2-11 bytes that may execute faster on certain CPUs.
+  //! If this feature is enabled AsmJit will generate specialized sequences for alignment between 2 to 11 bytes.
+  kOptimizedAlign = 0x00000002u,
+
+  //! Emit jump-prediction hints.
+  //!
+  //! Default: false.
+  //!
+  //! X86 Specific
+  //! ------------
+  //!
+  //! Jump prediction is usually based on the direction of the jump. If the jump is backward it is usually predicted as
+  //! taken; and if the jump is forward it is usually predicted as not-taken. The reason is that loops generally use
+  //! backward jumps and conditions usually use forward jumps. However this behavior can be overridden by using
+  //! instruction prefixes. If this option is enabled these hints will be emitted.
+  //!
+  //! This feature is disabled by default, because the only processor that used to take into consideration prediction
+  //! hints was P4. Newer processors implement heuristics for branch prediction and ignore static hints. This means
+  //! that this feature can be only used for annotation purposes.
+  kPredictedJumps = 0x00000010u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(EncodingOptions)
+
+//! Diagnostic options are used to tell emitters and their passes to perform diagnostics when emitting or processing
+//! user code. These options control validation and extra diagnostics that can be performed by higher level emitters.
+//!
+//! Instruction Validation
+//! ----------------------
+//!
+//! \ref BaseAssembler implementation perform by default only basic checks that are necessary to identify all
+//! variations of an instruction so the correct encoding can be selected. This is fine for production-ready code
+//! as the assembler doesn't have to perform checks that would slow it down. However, sometimes these checks are
+//! beneficial especially when the project that uses AsmJit is in a development phase, in which mistakes happen
+//! often. To make the experience of using AsmJit seamless it offers validation features that can be controlled
+//! by \ref DiagnosticOptions.
+//!
+//! Compiler Diagnostics
+//! --------------------
+//!
+//! Diagnostic options work with \ref BaseCompiler passes (precisely with its register allocation pass). These options
+//! can be used to enable logging of all operations that the Compiler does.
+enum class DiagnosticOptions : uint32_t {
+  //! No validation options.
+  kNone = 0,
+
+  //! Perform strict validation in \ref BaseAssembler::emit() implementations.
+  //!
+  //! This flag ensures that each instruction is checked before it's encoded into a binary representation. This flag
+  //! is only relevant for \ref BaseAssembler implementations, but can be set in any other emitter type, in that case
+  //! if that emitter needs to create an assembler on its own, for the purpose of \ref BaseEmitter::finalize() it
+  //! would propagate this flag to such assembler so all instructions passed to it are explicitly validated.
+  //!
+  //! Default: false.
+  kValidateAssembler = 0x00000001u,
+
+  //! Perform strict validation in \ref BaseBuilder::emit() and \ref BaseCompiler::emit() implementations.
+  //!
+  //! This flag ensures that each instruction is checked before an \ref InstNode representing the instruction is
+  //! created by \ref BaseBuilder or \ref BaseCompiler. This option could be more useful than \ref kValidateAssembler
+  //! in cases in which there is an invalid instruction passed to an assembler, which was invalid much earlier, most
+  //! likely when such instruction was passed to Builder/Compiler.
+  //!
+  //! This is a separate option that was introduced, because it's possible to manipulate the instruction stream
+  //! emitted by \ref BaseBuilder and \ref BaseCompiler - this means that it's allowed to emit invalid instructions
+  //! (for example with missing operands) that will be fixed later before finalizing it.
+  //!
+  //! Default: false.
+  kValidateIntermediate = 0x00000002u,
+
+  //! Annotate all nodes processed by register allocator (Compiler/RA).
+  //!
+  //! \note Annotations don't need debug options, however, some debug options like `kRADebugLiveness` may influence
+  //! their output (for example the mentioned option would add liveness information to per-instruction annotation).
+  kRAAnnotate = 0x00000080u,
+
+  //! Debug CFG generation and other related algorithms / operations (Compiler/RA).
+  kRADebugCFG = 0x00000100u,
+
+  //! Debug liveness analysis (Compiler/RA).
+  kRADebugLiveness = 0x00000200u,
+
+  //! Debug register allocation assignment (Compiler/RA).
+  kRADebugAssignment = 0x00000400u,
+
+  //! Debug the removal of code part of unreachable blocks.
+  kRADebugUnreachable = 0x00000800u,
+
+  //! Enable all debug options (Compiler/RA).
+  kRADebugAll = 0x0000FF00u,
+};
+ASMJIT_DEFINE_ENUM_FLAGS(DiagnosticOptions)
+
+//! Provides a base foundation to emitting code - specialized by \ref BaseAssembler and \ref BaseBuilder.
+class ASMJIT_VIRTAPI BaseEmitter {
+public:
+  ASMJIT_BASE_CLASS(BaseEmitter)
+
+  //! \name Members
+  //! \{
+
+  //! See \ref EmitterType.
+  EmitterType _emitterType = EmitterType::kNone;
+  //! See \ref EmitterFlags.
+  EmitterFlags _emitterFlags = EmitterFlags::kNone;
+  //! Validation flags in case validation is used.
+  //!
+  //! \note Validation flags are specific to the emitter and they are setup at construction time and then never
+  //! changed.
+  ValidationFlags _validationFlags = ValidationFlags::kNone;
+  //! Validation options.
+  DiagnosticOptions _diagnosticOptions = DiagnosticOptions::kNone;
+
+  //! All supported architectures in a bit-mask, where LSB is the bit with a zero index.
+  uint64_t _archMask = 0;
+
+  //! Encoding options.
+  EncodingOptions _encodingOptions = EncodingOptions::kNone;
+
+  //! Forced instruction options, combined with \ref _instOptions by \ref emit().
+  InstOptions _forcedInstOptions = InstOptions::kReserved;
+  //! Internal private data used freely by any emitter.
+  uint32_t _privateData = 0;
+
+  //! CodeHolder the emitter is attached to.
+  CodeHolder* _code = nullptr;
+  //! Attached \ref Logger.
+  Logger* _logger = nullptr;
+  //! Attached \ref ErrorHandler.
+  ErrorHandler* _errorHandler = nullptr;
+
+  //! Describes the target environment, matches \ref CodeHolder::environment().
+  Environment _environment {};
+  //! Native GP register signature and signature related information.
+  OperandSignature _gpSignature {};
+
+  //! Next instruction options (affects the next instruction).
+  InstOptions _instOptions = InstOptions::kNone;
+  //! Extra register (op-mask {k} on AVX-512) (affects the next instruction).
+  RegOnly _extraReg {};
+  //! Inline comment of the next instruction (affects the next instruction).
+  const char* _inlineComment = nullptr;
+
+  //! Function callbacks used by emitter implementation.
+  //!
+  //! These are typically shared between Assembler/Builder/Compiler of a single backend.
+  struct Funcs {
+    typedef Error (ASMJIT_CDECL* EmitProlog)(BaseEmitter* emitter, const FuncFrame& frame);
+    typedef Error (ASMJIT_CDECL* EmitEpilog)(BaseEmitter* emitter, const FuncFrame& frame);
+    typedef Error (ASMJIT_CDECL* EmitArgsAssignment)(BaseEmitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args);
+
+    typedef Error (ASMJIT_CDECL* FormatInstruction)(
+      String& sb,
+      FormatFlags formatFlags,
+      const BaseEmitter* emitter,
+      Arch arch,
+      const BaseInst& inst, const Operand_* operands, size_t opCount) ASMJIT_NOEXCEPT_TYPE;
+
+    typedef Error (ASMJIT_CDECL* ValidateFunc)(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) ASMJIT_NOEXCEPT_TYPE;
+
+    //! Emit prolog implementation.
+    EmitProlog emitProlog;
+    //! Emit epilog implementation.
+    EmitEpilog emitEpilog;
+    //! Emit arguments assignment implementation.
+    EmitArgsAssignment emitArgsAssignment;
+    //! Instruction formatter implementation.
+    FormatInstruction formatInstruction;
+    //! Instruction validation implementation.
+    ValidateFunc validate;
+
+    //! Resets all functions to nullptr.
+    inline void reset() noexcept {
+      emitProlog = nullptr;
+      emitEpilog = nullptr;
+      emitArgsAssignment = nullptr;
+      validate = nullptr;
+    }
+  };
+
+  Funcs _funcs {};
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit BaseEmitter(EmitterType emitterType) noexcept;
+  ASMJIT_API virtual ~BaseEmitter() noexcept;
+
+  //! \}
+
+  //! \name Cast
+  //! \{
+
+  template<typename T>
+  inline T* as() noexcept { return reinterpret_cast<T*>(this); }
+
+  template<typename T>
+  inline const T* as() const noexcept { return reinterpret_cast<const T*>(this); }
+
+  //! \}
+
+  //! \name Emitter Type & Flags
+  //! \{
+
+  //! Returns the type of this emitter, see `EmitterType`.
+  inline EmitterType emitterType() const noexcept { return _emitterType; }
+  //! Returns emitter flags , see `Flags`.
+  inline EmitterFlags emitterFlags() const noexcept { return _emitterFlags; }
+
+  //! Tests whether the emitter inherits from `BaseAssembler`.
+  inline bool isAssembler() const noexcept { return _emitterType == EmitterType::kAssembler; }
+  //! Tests whether the emitter inherits from `BaseBuilder`.
+  //!
+  //! \note Both Builder and Compiler emitters would return `true`.
+  inline bool isBuilder() const noexcept { return uint32_t(_emitterType) >= uint32_t(EmitterType::kBuilder); }
+  //! Tests whether the emitter inherits from `BaseCompiler`.
+  inline bool isCompiler() const noexcept { return _emitterType == EmitterType::kCompiler; }
+
+  //! Tests whether the emitter has the given `flag` enabled.
+  inline bool hasEmitterFlag(EmitterFlags flag) const noexcept { return Support::test(_emitterFlags, flag); }
+  //! Tests whether the emitter is finalized.
+  inline bool isFinalized() const noexcept { return hasEmitterFlag(EmitterFlags::kFinalized); }
+  //! Tests whether the emitter is destroyed (only used during destruction).
+  inline bool isDestroyed() const noexcept { return hasEmitterFlag(EmitterFlags::kDestroyed); }
+
+  inline void _addEmitterFlags(EmitterFlags flags) noexcept { _emitterFlags |= flags; }
+  inline void _clearEmitterFlags(EmitterFlags flags) noexcept { _emitterFlags &= _emitterFlags & ~flags; }
+
+  //! \}
+
+  //! \name Target Information
+  //! \{
+
+  //! Returns the CodeHolder this emitter is attached to.
+  inline CodeHolder* code() const noexcept { return _code; }
+
+  //! Returns the target environment.
+  //!
+  //! The returned \ref Environment reference matches \ref CodeHolder::environment().
+  inline const Environment& environment() const noexcept { return _environment; }
+
+  //! Tests whether the target architecture is 32-bit.
+  inline bool is32Bit() const noexcept { return environment().is32Bit(); }
+  //! Tests whether the target architecture is 64-bit.
+  inline bool is64Bit() const noexcept { return environment().is64Bit(); }
+
+  //! Returns the target architecture type.
+  inline Arch arch() const noexcept { return environment().arch(); }
+  //! Returns the target architecture sub-type.
+  inline SubArch subArch() const noexcept { return environment().subArch(); }
+
+  //! Returns the target architecture's GP register size (4 or 8 bytes).
+  inline uint32_t registerSize() const noexcept { return environment().registerSize(); }
+
+  //! \}
+
+  //! \name Initialization & Finalization
+  //! \{
+
+  //! Tests whether the emitter is initialized (i.e. attached to \ref CodeHolder).
+  inline bool isInitialized() const noexcept { return _code != nullptr; }
+
+  //! Finalizes this emitter.
+  //!
+  //! Materializes the content of the emitter by serializing it to the attached \ref CodeHolder through an architecture
+  //! specific \ref BaseAssembler. This function won't do anything if the emitter inherits from \ref BaseAssembler as
+  //! assemblers emit directly to a \ref CodeBuffer held by \ref CodeHolder. However, if this is an emitter that
+  //! inherits from \ref BaseBuilder or \ref BaseCompiler then these emitters need the materialization phase as they
+  //! store their content in a representation not visible to \ref CodeHolder.
+  ASMJIT_API virtual Error finalize();
+
+  //! \}
+
+  //! \name Logging
+  //! \{
+
+  //! Tests whether the emitter has a logger.
+  inline bool hasLogger() const noexcept { return _logger != nullptr; }
+
+  //! Tests whether the emitter has its own logger.
+  //!
+  //! Own logger means that it overrides the possible logger that may be used by \ref CodeHolder this emitter is
+  //! attached to.
+  inline bool hasOwnLogger() const noexcept { return hasEmitterFlag(EmitterFlags::kOwnLogger); }
+
+  //! Returns the logger this emitter uses.
+  //!
+  //! The returned logger is either the emitter's own logger or it's logger used by \ref CodeHolder this emitter
+  //! is attached to.
+  inline Logger* logger() const noexcept { return _logger; }
+
+  //! Sets or resets the logger of the emitter.
+  //!
+  //! If the `logger` argument is non-null then the logger will be considered emitter's own logger, see \ref
+  //! hasOwnLogger() for more details. If the given `logger` is null then the emitter will automatically use logger
+  //! that is attached to the \ref CodeHolder this emitter is attached to.
+  ASMJIT_API void setLogger(Logger* logger) noexcept;
+
+  //! Resets the logger of this emitter.
+  //!
+  //! The emitter will bail to using a logger attached to \ref CodeHolder this emitter is attached to, or no logger
+  //! at all if \ref CodeHolder doesn't have one.
+  inline void resetLogger() noexcept { return setLogger(nullptr); }
+
+  //! \}
+
+  //! \name Error Handling
+  //! \{
+
+  //! Tests whether the emitter has an error handler attached.
+  inline bool hasErrorHandler() const noexcept { return _errorHandler != nullptr; }
+
+  //! Tests whether the emitter has its own error handler.
+  //!
+  //! Own error handler means that it overrides the possible error handler that may be used by \ref CodeHolder this
+  //! emitter is attached to.
+  inline bool hasOwnErrorHandler() const noexcept { return hasEmitterFlag(EmitterFlags::kOwnErrorHandler); }
+
+  //! Returns the error handler this emitter uses.
+  //!
+  //! The returned error handler is either the emitter's own error handler or it's error handler used by
+  //! \ref CodeHolder this emitter is attached to.
+  inline ErrorHandler* errorHandler() const noexcept { return _errorHandler; }
+
+  //! Sets or resets the error handler of the emitter.
+  ASMJIT_API void setErrorHandler(ErrorHandler* errorHandler) noexcept;
+
+  //! Resets the error handler.
+  inline void resetErrorHandler() noexcept { setErrorHandler(nullptr); }
+
+  //! Handles the given error in the following way:
+  //!   1. If the emitter has \ref ErrorHandler attached, it calls its \ref ErrorHandler::handleError() member function
+  //!      first, and then returns the error. The `handleError()` function may throw.
+  //!   2. if the emitter doesn't have \ref ErrorHandler, the error is simply returned.
+  ASMJIT_API Error reportError(Error err, const char* message = nullptr);
+
+  //! \}
+
+  //! \name Encoding Options
+  //! \{
+
+  //! Returns encoding options.
+  inline EncodingOptions encodingOptions() const noexcept { return _encodingOptions; }
+  //! Tests whether the encoding `option` is set.
+  inline bool hasEncodingOption(EncodingOptions option) const noexcept { return Support::test(_encodingOptions, option); }
+
+  //! Enables the given encoding `options`.
+  inline void addEncodingOptions(EncodingOptions options) noexcept { _encodingOptions |= options; }
+  //! Disables the given encoding `options`.
+  inline void clearEncodingOptions(EncodingOptions options) noexcept { _encodingOptions &= ~options; }
+
+  //! \}
+
+  //! \name Diagnostic Options
+  //! \{
+
+  //! Returns the emitter's diagnostic options.
+  inline DiagnosticOptions diagnosticOptions() const noexcept { return _diagnosticOptions; }
+
+  //! Tests whether the given `option` is present in the emitter's diagnostic options.
+  inline bool hasDiagnosticOption(DiagnosticOptions option) const noexcept { return Support::test(_diagnosticOptions, option); }
+
+  //! Activates the given diagnostic `options`.
+  //!
+  //! This function is used to activate explicit validation options that will be then used by all emitter
+  //! implementations. There are in general two possibilities:
+  //!
+  //!   - Architecture specific assembler is used. In this case a \ref DiagnosticOptions::kValidateAssembler can be
+  //!     used to turn on explicit validation that will be used before an instruction is emitted. This means that
+  //!     internally an extra step will be performed to make sure that the instruction is correct. This is needed,
+  //!     because by default assemblers prefer speed over strictness.
+  //!
+  //!     This option should be used in debug builds as it's pretty expensive.
+  //!
+  //!   - Architecture specific builder or compiler is used. In this case the user can turn on
+  //!     \ref DiagnosticOptions::kValidateIntermediate option that adds explicit validation step before the Builder
+  //!     or Compiler creates an \ref InstNode to represent an emitted instruction. Error will be returned if the
+  //!     instruction is ill-formed. In addition, also \ref DiagnosticOptions::kValidateAssembler can be used, which
+  //!     would not be consumed by Builder / Compiler directly, but it would be propagated to an architecture specific
+  //!     \ref BaseAssembler implementation it creates during \ref BaseEmitter::finalize().
+  ASMJIT_API void addDiagnosticOptions(DiagnosticOptions options) noexcept;
+
+  //! Deactivates the given validation `options`.
+  //!
+  //! See \ref addDiagnosticOptions() and \ref DiagnosticOptions for more details.
+  ASMJIT_API void clearDiagnosticOptions(DiagnosticOptions options) noexcept;
+
+  //! \}
+
+  //! \name Instruction Options
+  //! \{
+
+  //! Returns forced instruction options.
+  //!
+  //! Forced instruction options are merged with next instruction options before the instruction is encoded. These
+  //! options have some bits reserved that are used by error handling, logging, and instruction validation purposes.
+  //! Other options are globals that affect each instruction.
+  inline InstOptions forcedInstOptions() const noexcept { return _forcedInstOptions; }
+
+  //! Returns options of the next instruction.
+  inline InstOptions instOptions() const noexcept { return _instOptions; }
+  //! Returns options of the next instruction.
+  inline void setInstOptions(InstOptions options) noexcept { _instOptions = options; }
+  //! Adds options of the next instruction.
+  inline void addInstOptions(InstOptions options) noexcept { _instOptions |= options; }
+  //! Resets options of the next instruction.
+  inline void resetInstOptions() noexcept { _instOptions = InstOptions::kNone; }
+
+  //! Tests whether the extra register operand is valid.
+  inline bool hasExtraReg() const noexcept { return _extraReg.isReg(); }
+  //! Returns an extra operand that will be used by the next instruction (architecture specific).
+  inline const RegOnly& extraReg() const noexcept { return _extraReg; }
+  //! Sets an extra operand that will be used by the next instruction (architecture specific).
+  inline void setExtraReg(const BaseReg& reg) noexcept { _extraReg.init(reg); }
+  //! Sets an extra operand that will be used by the next instruction (architecture specific).
+  inline void setExtraReg(const RegOnly& reg) noexcept { _extraReg.init(reg); }
+  //! Resets an extra operand that will be used by the next instruction (architecture specific).
+  inline void resetExtraReg() noexcept { _extraReg.reset(); }
+
+  //! Returns comment/annotation of the next instruction.
+  inline const char* inlineComment() const noexcept { return _inlineComment; }
+  //! Sets comment/annotation of the next instruction.
+  //!
+  //! \note This string is set back to null by `_emit()`, but until that it has to remain valid as the Emitter is not
+  //! required to make a copy of it (and it would be slow to do that for each instruction).
+  inline void setInlineComment(const char* s) noexcept { _inlineComment = s; }
+  //! Resets the comment/annotation to nullptr.
+  inline void resetInlineComment() noexcept { _inlineComment = nullptr; }
+
+  //! \}
+
+  //! \name Sections
+  //! \{
+
+  virtual Error section(Section* section) = 0;
+
+  //! \}
+
+  //! \name Labels
+  //! \{
+
+  //! Creates a new label.
+  virtual Label newLabel() = 0;
+  //! Creates a new named label.
+  virtual Label newNamedLabel(const char* name, size_t nameSize = SIZE_MAX, LabelType type = LabelType::kGlobal, uint32_t parentId = Globals::kInvalidId) = 0;
+
+  //! Creates a new anonymous label with a name, which can only be used for debugging purposes.
+  inline Label newAnonymousLabel(const char* name, size_t nameSize = SIZE_MAX) { return newNamedLabel(name, nameSize, LabelType::kAnonymous); }
+  //! Creates a new external label.
+  inline Label newExternalLabel(const char* name, size_t nameSize = SIZE_MAX) { return newNamedLabel(name, nameSize, LabelType::kExternal); }
+
+  //! Returns `Label` by `name`.
+  //!
+  //! Returns invalid Label in case that the name is invalid or label was not found.
+  //!
+  //! \note This function doesn't trigger ErrorHandler in case the name is invalid or no such label exist. You must
+  //! always check the validity of the `Label` returned.
+  ASMJIT_API Label labelByName(const char* name, size_t nameSize = SIZE_MAX, uint32_t parentId = Globals::kInvalidId) noexcept;
+
+  //! Binds the `label` to the current position of the current section.
+  //!
+  //! \note Attempt to bind the same label multiple times will return an error.
+  virtual Error bind(const Label& label) = 0;
+
+  //! Tests whether the label `id` is valid (i.e. registered).
+  ASMJIT_API bool isLabelValid(uint32_t labelId) const noexcept;
+  //! Tests whether the `label` is valid (i.e. registered).
+  inline bool isLabelValid(const Label& label) const noexcept { return isLabelValid(label.id()); }
+
+  //! \}
+
+  //! \name Emit
+  //! \{
+
+  // NOTE: These `emit()` helpers are designed to address a code-bloat generated by C++ compilers to call a function
+  // having many arguments. Each parameter to `_emit()` requires some code to pass it, which means that if we default
+  // to 5 arguments in `_emit()` and instId the C++ compiler would have to generate a virtual function call having 5
+  // parameters and additional `this` argument, which is quite a lot. Since by default most instructions have 2 to 3
+  // operands it's better to introduce helpers that pass from 0 to 6 operands that help to reduce the size of emit(...)
+  // function call.
+
+  //! Emits an instruction (internal).
+  ASMJIT_API Error _emitI(InstId instId);
+  //! \overload
+  ASMJIT_API Error _emitI(InstId instId, const Operand_& o0);
+  //! \overload
+  ASMJIT_API Error _emitI(InstId instId, const Operand_& o0, const Operand_& o1);
+  //! \overload
+  ASMJIT_API Error _emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2);
+  //! \overload
+  ASMJIT_API Error _emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3);
+  //! \overload
+  ASMJIT_API Error _emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4);
+  //! \overload
+  ASMJIT_API Error _emitI(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5);
+
+  //! Emits an instruction `instId` with the given `operands`.
+  template<typename... Args>
+  ASMJIT_FORCE_INLINE Error emit(InstId instId, Args&&... operands) {
+    return _emitI(instId, Support::ForwardOp<Args>::forward(operands)...);
+  }
+
+  ASMJIT_FORCE_INLINE Error emitOpArray(InstId instId, const Operand_* operands, size_t opCount) {
+    return _emitOpArray(instId, operands, opCount);
+  }
+
+  ASMJIT_FORCE_INLINE Error emitInst(const BaseInst& inst, const Operand_* operands, size_t opCount) {
+    setInstOptions(inst.options());
+    setExtraReg(inst.extraReg());
+    return _emitOpArray(inst.id(), operands, opCount);
+  }
+
+  //! \cond INTERNAL
+  //! Emits an instruction - all 6 operands must be defined.
+  virtual Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* oExt) = 0;
+  //! Emits instruction having operands stored in array.
+  ASMJIT_API virtual Error _emitOpArray(InstId instId, const Operand_* operands, size_t opCount);
+  //! \endcond
+
+  //! \}
+
+  //! \name Emit Utilities
+  //! \{
+
+  ASMJIT_API Error emitProlog(const FuncFrame& frame);
+  ASMJIT_API Error emitEpilog(const FuncFrame& frame);
+  ASMJIT_API Error emitArgsAssignment(const FuncFrame& frame, const FuncArgsAssignment& args);
+
+  //! \}
+
+  //! \name Align
+  //! \{
+
+  //! Aligns the current CodeBuffer position to the `alignment` specified.
+  //!
+  //! The sequence that is used to fill the gap between the aligned location and the current location depends on the
+  //! align `mode`, see \ref AlignMode. The `alignment` argument specifies alignment in bytes, so for example when
+  //! it's `32` it means that the code buffer will be aligned to `32` bytes.
+  virtual Error align(AlignMode alignMode, uint32_t alignment) = 0;
+
+  //! \}
+
+  //! \name Embed
+  //! \{
+
+  //! Embeds raw data into the \ref CodeBuffer.
+  virtual Error embed(const void* data, size_t dataSize) = 0;
+
+  //! Embeds a typed data array.
+  //!
+  //! This is the most flexible function for embedding data as it allows to:
+  //!
+  //!   - Assign a `typeId` to the data, so the emitter knows the type of items stored in `data`. Binary data should
+  //!     use \ref TypeId::kUInt8.
+  //!
+  //!   - Repeat the given data `repeatCount` times, so the data can be used as a fill pattern for example, or as a
+  //!     pattern used by SIMD instructions.
+  virtual Error embedDataArray(TypeId typeId, const void* data, size_t itemCount, size_t repeatCount = 1) = 0;
+
+  //! Embeds int8_t `value` repeated by `repeatCount`.
+  inline Error embedInt8(int8_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kInt8, &value, 1, repeatCount); }
+  //! Embeds uint8_t `value` repeated by `repeatCount`.
+  inline Error embedUInt8(uint8_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kUInt8, &value, 1, repeatCount); }
+  //! Embeds int16_t `value` repeated by `repeatCount`.
+  inline Error embedInt16(int16_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kInt16, &value, 1, repeatCount); }
+  //! Embeds uint16_t `value` repeated by `repeatCount`.
+  inline Error embedUInt16(uint16_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kUInt16, &value, 1, repeatCount); }
+  //! Embeds int32_t `value` repeated by `repeatCount`.
+  inline Error embedInt32(int32_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kInt32, &value, 1, repeatCount); }
+  //! Embeds uint32_t `value` repeated by `repeatCount`.
+  inline Error embedUInt32(uint32_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kUInt32, &value, 1, repeatCount); }
+  //! Embeds int64_t `value` repeated by `repeatCount`.
+  inline Error embedInt64(int64_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kInt64, &value, 1, repeatCount); }
+  //! Embeds uint64_t `value` repeated by `repeatCount`.
+  inline Error embedUInt64(uint64_t value, size_t repeatCount = 1) { return embedDataArray(TypeId::kUInt64, &value, 1, repeatCount); }
+  //! Embeds a floating point `value` repeated by `repeatCount`.
+  inline Error embedFloat(float value, size_t repeatCount = 1) { return embedDataArray(TypeId(TypeUtils::TypeIdOfT<float>::kTypeId), &value, 1, repeatCount); }
+  //! Embeds a floating point `value` repeated by `repeatCount`.
+  inline Error embedDouble(double value, size_t repeatCount = 1) { return embedDataArray(TypeId(TypeUtils::TypeIdOfT<double>::kTypeId), &value, 1, repeatCount); }
+
+  //! Embeds a constant pool at the current offset by performing the following:
+  //!   1. Aligns by using AlignMode::kData to the minimum `pool` alignment.
+  //!   2. Binds the ConstPool label so it's bound to an aligned location.
+  //!   3. Emits ConstPool content.
+  virtual Error embedConstPool(const Label& label, const ConstPool& pool) = 0;
+
+  //! Embeds an absolute `label` address as data.
+  //!
+  //! The `dataSize` is an optional argument that can be used to specify the size of the address data. If it's zero
+  //! (default) the address size is deduced from the target architecture (either 4 or 8 bytes).
+  virtual Error embedLabel(const Label& label, size_t dataSize = 0) = 0;
+
+  //! Embeds a delta (distance) between the `label` and `base` calculating it as `label - base`. This function was
+  //! designed to make it easier to embed lookup tables where each index is a relative distance of two labels.
+  virtual Error embedLabelDelta(const Label& label, const Label& base, size_t dataSize = 0) = 0;
+
+  //! \}
+
+  //! \name Comment
+  //! \{
+
+  //! Emits a comment stored in `data` with an optional `size` parameter.
+  virtual Error comment(const char* data, size_t size = SIZE_MAX) = 0;
+
+  //! Emits a formatted comment specified by `fmt` and variable number of arguments.
+  ASMJIT_API Error commentf(const char* fmt, ...);
+  //! Emits a formatted comment specified by `fmt` and `ap`.
+  ASMJIT_API Error commentv(const char* fmt, va_list ap);
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  //! Called after the emitter was attached to `CodeHolder`.
+  virtual Error onAttach(CodeHolder* ASMJIT_NONNULL(code)) noexcept = 0;
+  //! Called after the emitter was detached from `CodeHolder`.
+  virtual Error onDetach(CodeHolder* ASMJIT_NONNULL(code)) noexcept = 0;
+
+  //! Called when \ref CodeHolder has updated an important setting, which involves the following:
+  //!
+  //!   - \ref Logger has been changed (\ref CodeHolder::setLogger() has been called).
+  //!
+  //!   - \ref ErrorHandler has been changed (\ref CodeHolder::setErrorHandler() has been called).
+  //!
+  //! This function ensures that the settings are properly propagated from \ref CodeHolder to the emitter.
+  //!
+  //! \note This function is virtual and can be overridden, however, if you do so, always call \ref
+  //! BaseEmitter::onSettingsUpdated() within your own implementation to ensure that the emitter is
+  //! in a consistent state.
+  ASMJIT_API virtual void onSettingsUpdated() noexcept;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_EMITTER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/emitterutils.cpp b/lib/lepton/asmjit/core/emitterutils.cpp
new file mode 100644
index 0000000000..f36a1b3774
--- /dev/null
+++ b/lib/lepton/asmjit/core/emitterutils.cpp
@@ -0,0 +1,129 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/assembler.h"
+#include "../core/emitterutils_p.h"
+#include "../core/formatter_p.h"
+#include "../core/logger.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+namespace EmitterUtils {
+
+#ifndef ASMJIT_NO_LOGGING
+
+Error finishFormattedLine(String& sb, const FormatOptions& formatOptions, const uint8_t* binData, size_t binSize, size_t offsetSize, size_t immSize, const char* comment) noexcept {
+  ASMJIT_ASSERT(binSize >= offsetSize);
+  const size_t kNoBinSize = SIZE_MAX;
+
+  size_t commentSize = comment ? Support::strLen(comment, Globals::kMaxCommentSize) : 0;
+
+  if ((binSize != 0 && binSize != kNoBinSize) || commentSize) {
+    char sep = ';';
+    size_t padding = Formatter::paddingFromOptions(formatOptions, FormatPaddingGroup::kRegularLine);
+
+    for (size_t i = (binSize == kNoBinSize); i < 2; i++) {
+      ASMJIT_PROPAGATE(sb.padEnd(padding));
+
+      if (sep) {
+        ASMJIT_PROPAGATE(sb.append(sep));
+        ASMJIT_PROPAGATE(sb.append(' '));
+      }
+
+      // Append binary data or comment.
+      if (i == 0) {
+        ASMJIT_PROPAGATE(sb.appendHex(binData, binSize - offsetSize - immSize));
+        ASMJIT_PROPAGATE(sb.appendChars('.', offsetSize * 2));
+        ASMJIT_PROPAGATE(sb.appendHex(binData + binSize - immSize, immSize));
+        if (commentSize == 0) break;
+      }
+      else {
+        ASMJIT_PROPAGATE(sb.append(comment, commentSize));
+      }
+
+      sep = '|';
+      padding += Formatter::paddingFromOptions(formatOptions, FormatPaddingGroup::kMachineCode);
+    }
+  }
+
+  return sb.append('\n');
+}
+
+void logLabelBound(BaseAssembler* self, const Label& label) noexcept {
+  Logger* logger = self->logger();
+
+  StringTmp<512> sb;
+  size_t binSize = logger->hasFlag(FormatFlags::kMachineCode) ? size_t(0) : SIZE_MAX;
+
+  sb.appendChars(' ', logger->indentation(FormatIndentationGroup::kLabel));
+  Formatter::formatLabel(sb, logger->flags(), self, label.id());
+  sb.append(':');
+  finishFormattedLine(sb, logger->options(), nullptr, binSize, 0, 0, self->_inlineComment);
+  logger->log(sb.data(), sb.size());
+}
+
+void logInstructionEmitted(
+  BaseAssembler* self,
+  InstId instId,
+  InstOptions options,
+  const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt,
+  uint32_t relSize, uint32_t immSize, uint8_t* afterCursor) {
+
+  Logger* logger = self->logger();
+  ASMJIT_ASSERT(logger != nullptr);
+
+  StringTmp<256> sb;
+  FormatFlags formatFlags = logger->flags();
+
+  uint8_t* beforeCursor = self->bufferPtr();
+  intptr_t emittedSize = (intptr_t)(afterCursor - beforeCursor);
+
+  Operand_ opArray[Globals::kMaxOpCount];
+  opArrayFromEmitArgs(opArray, o0, o1, o2, opExt);
+
+  sb.appendChars(' ', logger->indentation(FormatIndentationGroup::kCode));
+  self->_funcs.formatInstruction(sb, formatFlags, self, self->arch(), BaseInst(instId, options, self->extraReg()), opArray, Globals::kMaxOpCount);
+
+  if (Support::test(formatFlags, FormatFlags::kMachineCode))
+    finishFormattedLine(sb, logger->options(), self->bufferPtr(), size_t(emittedSize), relSize, immSize, self->inlineComment());
+  else
+    finishFormattedLine(sb, logger->options(), nullptr, SIZE_MAX, 0, 0, self->inlineComment());
+  logger->log(sb);
+}
+
+Error logInstructionFailed(
+  BaseAssembler* self,
+  Error err,
+  InstId instId,
+  InstOptions options,
+  const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) {
+
+  StringTmp<256> sb;
+  sb.append(DebugUtils::errorAsString(err));
+  sb.append(": ");
+
+  Operand_ opArray[Globals::kMaxOpCount];
+  opArrayFromEmitArgs(opArray, o0, o1, o2, opExt);
+
+  self->_funcs.formatInstruction(sb, FormatFlags::kNone, self, self->arch(), BaseInst(instId, options, self->extraReg()), opArray, Globals::kMaxOpCount);
+
+  if (self->inlineComment()) {
+    sb.append(" ; ");
+    sb.append(self->inlineComment());
+  }
+
+  self->resetInstOptions();
+  self->resetExtraReg();
+  self->resetInlineComment();
+  return self->reportError(err, sb.data());
+}
+
+#endif
+
+} // {EmitterUtils}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/emitterutils_p.h b/lib/lepton/asmjit/core/emitterutils_p.h
new file mode 100644
index 0000000000..b7610e7026
--- /dev/null
+++ b/lib/lepton/asmjit/core/emitterutils_p.h
@@ -0,0 +1,89 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_EMITTERUTILS_P_H_INCLUDED
+#define ASMJIT_CORE_EMITTERUTILS_P_H_INCLUDED
+
+#include "../core/emitter.h"
+#include "../core/operand.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+class BaseAssembler;
+class FormatOptions;
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_core
+//! \{
+
+//! Utilities used by various emitters, mostly Assembler implementations.
+namespace EmitterUtils {
+
+//! Default paddings used by Emitter utils and Formatter.
+
+static constexpr Operand noExt[3];
+
+enum kOpIndex : uint32_t {
+  kOp3 = 0,
+  kOp4 = 1,
+  kOp5 = 2
+};
+
+static ASMJIT_FORCE_INLINE uint32_t opCountFromEmitArgs(const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) noexcept {
+  uint32_t opCount = 0;
+
+  if (opExt[kOp3].isNone()) {
+    if (!o0.isNone()) opCount = 1;
+    if (!o1.isNone()) opCount = 2;
+    if (!o2.isNone()) opCount = 3;
+  }
+  else {
+    opCount = 4;
+    if (!opExt[kOp4].isNone()) {
+      opCount = 5 + uint32_t(!opExt[kOp5].isNone());
+    }
+  }
+
+  return opCount;
+}
+
+static ASMJIT_FORCE_INLINE void opArrayFromEmitArgs(Operand_ dst[Globals::kMaxOpCount], const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) noexcept {
+  dst[0].copyFrom(o0);
+  dst[1].copyFrom(o1);
+  dst[2].copyFrom(o2);
+  dst[3].copyFrom(opExt[kOp3]);
+  dst[4].copyFrom(opExt[kOp4]);
+  dst[5].copyFrom(opExt[kOp5]);
+}
+
+#ifndef ASMJIT_NO_LOGGING
+Error finishFormattedLine(String& sb, const FormatOptions& formatOptions, const uint8_t* binData, size_t binSize, size_t offsetSize, size_t immSize, const char* comment) noexcept;
+
+void logLabelBound(BaseAssembler* self, const Label& label) noexcept;
+
+void logInstructionEmitted(
+  BaseAssembler* self,
+  InstId instId,
+  InstOptions options,
+  const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt,
+  uint32_t relSize, uint32_t immSize, uint8_t* afterCursor);
+
+Error logInstructionFailed(
+  BaseAssembler* self,
+  Error err,
+  InstId instId,
+  InstOptions options,
+  const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt);
+#endif
+
+}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_EMITTERUTILS_P_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/core/environment.cpp b/lib/lepton/asmjit/core/environment.cpp
new file mode 100644
index 0000000000..9a694af610
--- /dev/null
+++ b/lib/lepton/asmjit/core/environment.cpp
@@ -0,0 +1,46 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/environment.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// X86 Target
+// ----------
+//
+//   - 32-bit - Linux, OSX, BSD, and apparently also Haiku guarantee 16-byte
+//              stack alignment. Other operating systems are assumed to have
+//              4-byte alignment by default for safety reasons.
+//   - 64-bit - stack must be aligned to 16 bytes.
+//
+// ARM Target
+// ----------
+//
+//   - 32-bit - Stack must be aligned to 8 bytes.
+//   - 64-bit - Stack must be aligned to 16 bytes (hardware requirement).
+uint32_t Environment::stackAlignment() const noexcept {
+  if (is64Bit()) {
+    // Assume 16-byte alignment on any 64-bit target.
+    return 16;
+  }
+  else {
+    // The following platforms use 16-byte alignment in 32-bit mode.
+    if (isPlatformLinux() ||
+        isPlatformBSD() ||
+        isPlatformApple() ||
+        isPlatformHaiku()) {
+      return 16u;
+    }
+
+    if (isFamilyARM())
+      return 8;
+
+    // Bail to 4-byte alignment if we don't know.
+    return 4;
+  }
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/environment.h b/lib/lepton/asmjit/core/environment.h
new file mode 100644
index 0000000000..7e328a97c8
--- /dev/null
+++ b/lib/lepton/asmjit/core/environment.h
@@ -0,0 +1,508 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ENVIRONMENT_H_INCLUDED
+#define ASMJIT_CORE_ENVIRONMENT_H_INCLUDED
+
+#include "../core/archtraits.h"
+
+#if defined(__APPLE__)
+  #include <TargetConditionals.h>
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! Vendor.
+//!
+//! \note AsmJit doesn't use vendor information at the moment. It's provided for future use, if required.
+enum class Vendor : uint8_t {
+  //! Unknown or uninitialized platform vendor.
+  kUnknown = 0,
+
+  //! Maximum value of `PlatformVendor`.
+  kMaxValue = kUnknown,
+
+  //! Platform vendor detected at compile-time.
+  kHost =
+#if defined(_DOXYGEN)
+    DETECTED_AT_COMPILE_TIME
+#else
+    kUnknown
+#endif
+};
+
+//! Platform - runtime environment or operating system.
+enum class Platform : uint8_t {
+  //! Unknown or uninitialized platform.
+  kUnknown = 0,
+
+  //! Windows OS.
+  kWindows,
+
+  //! Other platform that is not Windows, most likely POSIX based.
+  kOther,
+
+  //! Linux OS.
+  kLinux,
+  //! GNU/Hurd OS.
+  kHurd,
+
+  //! FreeBSD OS.
+  kFreeBSD,
+  //! OpenBSD OS.
+  kOpenBSD,
+  //! NetBSD OS.
+  kNetBSD,
+  //! DragonFly BSD OS.
+  kDragonFlyBSD,
+
+  //! Haiku OS.
+  kHaiku,
+
+  //! Apple OSX.
+  kOSX,
+  //! Apple iOS.
+  kIOS,
+  //! Apple TVOS.
+  kTVOS,
+  //! Apple WatchOS.
+  kWatchOS,
+
+  //! Emscripten platform.
+  kEmscripten,
+
+  //! Maximum value of `Platform`.
+  kMaxValue = kEmscripten,
+
+  //! Platform detected at compile-time (platform of the host).
+  kHost =
+#if defined(_DOXYGEN)
+    DETECTED_AT_COMPILE_TIME
+#elif defined(__EMSCRIPTEN__)
+    kEmscripten
+#elif defined(_WIN32)
+    kWindows
+#elif defined(__linux__)
+    kLinux
+#elif defined(__gnu_hurd__)
+    kHurd
+#elif defined(__FreeBSD__)
+    kFreeBSD
+#elif defined(__OpenBSD__)
+    kOpenBSD
+#elif defined(__NetBSD__)
+    kNetBSD
+#elif defined(__DragonFly__)
+    kDragonFlyBSD
+#elif defined(__HAIKU__)
+    kHaiku
+#elif defined(__APPLE__) && TARGET_OS_OSX
+    kOSX
+#elif defined(__APPLE__) && TARGET_OS_TV
+    kTVOS
+#elif defined(__APPLE__) && TARGET_OS_WATCH
+    kWatchOS
+#elif defined(__APPLE__) && TARGET_OS_IPHONE
+    kIOS
+#else
+    kOther
+#endif
+};
+
+//! Platform ABI (application binary interface).
+enum class PlatformABI : uint8_t {
+  //! Unknown or uninitialied environment.
+  kUnknown = 0,
+  //! Microsoft ABI.
+  kMSVC,
+  //! GNU ABI.
+  kGNU,
+  //! Android Environment / ABI.
+  kAndroid,
+  //! Cygwin ABI.
+  kCygwin,
+
+  //! Maximum value of `PlatformABI`.
+  kMaxValue,
+
+  //! Host ABI detected at compile-time.
+  kHost =
+#if defined(_DOXYGEN)
+    DETECTED_AT_COMPILE_TIME
+#elif defined(_MSC_VER)
+    kMSVC
+#elif defined(__CYGWIN__)
+    kCygwin
+#elif defined(__MINGW32__) || defined(__GLIBC__)
+    kGNU
+#elif defined(__ANDROID__)
+    kAndroid
+#else
+    kUnknown
+#endif
+};
+
+//! Object format.
+//!
+//! \note AsmJit doesn't really use anything except \ref ObjectFormat::kUnknown and \ref ObjectFormat::kJIT at
+//! the moment. Object file formats are provided for future extensibility and a possibility to generate object
+//! files at some point.
+enum class ObjectFormat : uint8_t {
+  //! Unknown or uninitialized object format.
+  kUnknown = 0,
+
+  //! JIT code generation object, most likely \ref JitRuntime or a custom
+  //! \ref Target implementation.
+  kJIT,
+
+  //! Executable and linkable format (ELF).
+  kELF,
+  //! Common object file format.
+  kCOFF,
+  //! Extended COFF object format.
+  kXCOFF,
+  //! Mach object file format.
+  kMachO,
+
+  //! Maximum value of `ObjectFormat`.
+  kMaxValue
+};
+
+//! Represents an environment, which is usually related to a \ref Target.
+//!
+//! Environment has usually an 'arch-subarch-vendor-os-abi' format, which is sometimes called "Triple" (historically
+//! it used to be 3 only parts) or "Tuple", which is a convention used by Debian Linux.
+//!
+//! AsmJit doesn't support all possible combinations or architectures and ABIs, however, it models the environment
+//! similarly to other compilers for future extensibility.
+class Environment {
+public:
+  //! \name Members
+  //! \{
+
+  //! Architecture.
+  Arch _arch;
+  //! Sub-architecture type.
+  SubArch _subArch;
+  //! Vendor type.
+  Vendor _vendor;
+  //! Platform.
+  Platform _platform;
+  //! Platform ABI.
+  PlatformABI _platformABI;
+  //! Object format.
+  ObjectFormat _objectFormat;
+  //! Reserved for future use, must be zero.
+  uint8_t _reserved[2];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline Environment() noexcept :
+    _arch(Arch::kUnknown),
+    _subArch(SubArch::kUnknown),
+    _vendor(Vendor::kUnknown),
+    _platform(Platform::kUnknown),
+    _platformABI(PlatformABI::kUnknown),
+    _objectFormat(ObjectFormat::kUnknown),
+    _reserved { 0, 0 } {}
+
+  inline explicit Environment(
+    Arch arch,
+    SubArch subArch = SubArch::kUnknown,
+    Vendor vendor = Vendor::kUnknown,
+    Platform platform = Platform::kUnknown,
+    PlatformABI abi = PlatformABI::kUnknown,
+    ObjectFormat objectFormat = ObjectFormat::kUnknown) noexcept {
+
+    init(arch, subArch, vendor, platform, abi, objectFormat);
+  }
+
+  inline Environment(const Environment& other) noexcept = default;
+
+  //! Returns the host environment constructed from preprocessor macros defined by the compiler.
+  //!
+  //! The returned environment should precisely match the target host architecture, sub-architecture, platform,
+  //! and ABI.
+  static inline Environment host() noexcept {
+    return Environment(Arch::kHost, SubArch::kHost, Vendor::kHost, Platform::kHost, PlatformABI::kHost, ObjectFormat::kUnknown);
+  }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline Environment& operator=(const Environment& other) noexcept = default;
+
+  inline bool operator==(const Environment& other) const noexcept { return  equals(other); }
+  inline bool operator!=(const Environment& other) const noexcept { return !equals(other); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the environment is not set up.
+  //!
+  //! Returns true if all members are zero, and thus unknown.
+  inline bool empty() const noexcept {
+    // Unfortunately compilers won't optimize fields are checked one by one...
+    return _packed() == 0;
+  }
+
+  //! Tests whether the environment is initialized, which means it must have
+  //! a valid architecture.
+  inline bool isInitialized() const noexcept {
+    return _arch != Arch::kUnknown;
+  }
+
+  inline uint64_t _packed() const noexcept {
+    uint64_t x;
+    memcpy(&x, this, 8);
+    return x;
+  }
+
+  //! Resets all members of the environment to zero / unknown.
+  inline void reset() noexcept {
+    _arch = Arch::kUnknown;
+    _subArch = SubArch::kUnknown;
+    _vendor = Vendor::kUnknown;
+    _platform = Platform::kUnknown;
+    _platformABI = PlatformABI::kUnknown;
+    _objectFormat = ObjectFormat::kUnknown;
+    _reserved[0] = 0;
+    _reserved[1] = 0;
+  }
+
+  inline bool equals(const Environment& other) const noexcept {
+    return _packed() == other._packed();
+  }
+
+  //! Returns the architecture.
+  inline Arch arch() const noexcept { return _arch; }
+  //! Returns the sub-architecture.
+  inline SubArch subArch() const noexcept { return _subArch; }
+  //! Returns vendor.
+  inline Vendor vendor() const noexcept { return _vendor; }
+  //! Returns target's platform or operating system.
+  inline Platform platform() const noexcept { return _platform; }
+  //! Returns target's ABI.
+  inline PlatformABI platformABI() const noexcept { return _platformABI; }
+  //! Returns target's object format.
+  inline ObjectFormat objectFormat() const noexcept { return _objectFormat; }
+
+  inline void init(
+    Arch arch,
+    SubArch subArch = SubArch::kUnknown,
+    Vendor vendor = Vendor::kUnknown,
+    Platform platform = Platform::kUnknown,
+    PlatformABI platformABI = PlatformABI::kUnknown,
+    ObjectFormat objectFormat = ObjectFormat::kUnknown) noexcept {
+
+    _arch = arch;
+    _subArch = subArch;
+    _vendor = vendor;
+    _platform = platform;
+    _platformABI = platformABI;
+    _objectFormat = objectFormat;
+    _reserved[0] = 0;
+    _reserved[1] = 0;
+  }
+
+  inline bool isArchX86() const noexcept { return _arch == Arch::kX86; }
+  inline bool isArchX64() const noexcept { return _arch == Arch::kX64; }
+  inline bool isArchARM() const noexcept { return isArchARM(_arch); }
+  inline bool isArchThumb() const noexcept { return isArchThumb(_arch); }
+  inline bool isArchAArch64() const noexcept { return isArchAArch64(_arch); }
+  inline bool isArchMIPS32() const noexcept { return isArchMIPS32(_arch); }
+  inline bool isArchMIPS64() const noexcept { return isArchMIPS64(_arch); }
+  inline bool isArchRISCV32() const noexcept { return _arch == Arch::kRISCV32; }
+  inline bool isArchRISCV64() const noexcept { return _arch == Arch::kRISCV64; }
+
+  //! Tests whether the architecture is 32-bit.
+  inline bool is32Bit() const noexcept { return is32Bit(_arch); }
+  //! Tests whether the architecture is 64-bit.
+  inline bool is64Bit() const noexcept { return is64Bit(_arch); }
+
+  //! Tests whether the architecture is little endian.
+  inline bool isLittleEndian() const noexcept { return isLittleEndian(_arch); }
+  //! Tests whether the architecture is big endian.
+  inline bool isBigEndian() const noexcept { return isBigEndian(_arch); }
+
+  //! Tests whether this architecture is of X86 family.
+  inline bool isFamilyX86() const noexcept { return isFamilyX86(_arch); }
+  //! Tests whether this architecture family is ARM, THUMB, or AArch64.
+  inline bool isFamilyARM() const noexcept { return isFamilyARM(_arch); }
+  //! Tests whether this architecture family is AArch32 (ARM or THUMB).
+  inline bool isFamilyAArch32() const noexcept { return isFamilyAArch32(_arch); }
+  //! Tests whether this architecture family is AArch64.
+  inline bool isFamilyAArch64() const noexcept { return isFamilyAArch64(_arch); }
+  //! Tests whether this architecture family is MISP or MIPS64.
+  inline bool isFamilyMIPS() const noexcept { return isFamilyMIPS(_arch); }
+  //! Tests whether this architecture family is RISC-V (both 32-bit and 64-bit).
+  inline bool isFamilyRISCV() const noexcept { return isFamilyRISCV(_arch); }
+
+  //! Tests whether the environment platform is Windows.
+  inline bool isPlatformWindows() const noexcept { return _platform == Platform::kWindows; }
+
+  //! Tests whether the environment platform is Linux.
+  inline bool isPlatformLinux() const noexcept { return _platform == Platform::kLinux; }
+
+  //! Tests whether the environment platform is Hurd.
+  inline bool isPlatformHurd() const noexcept { return _platform == Platform::kHurd; }
+
+  //! Tests whether the environment platform is Haiku.
+  inline bool isPlatformHaiku() const noexcept { return _platform == Platform::kHaiku; }
+
+  //! Tests whether the environment platform is any BSD.
+  inline bool isPlatformBSD() const noexcept {
+    return _platform == Platform::kFreeBSD ||
+           _platform == Platform::kOpenBSD ||
+           _platform == Platform::kNetBSD ||
+           _platform == Platform::kDragonFlyBSD;
+  }
+
+  //! Tests whether the environment platform is any Apple platform (OSX, iOS, TVOS, WatchOS).
+  inline bool isPlatformApple() const noexcept {
+    return _platform == Platform::kOSX ||
+           _platform == Platform::kIOS ||
+           _platform == Platform::kTVOS ||
+           _platform == Platform::kWatchOS;
+  }
+
+  //! Tests whether the ABI is MSVC.
+  inline bool isMSVC() const noexcept { return _platformABI == PlatformABI::kMSVC; }
+  //! Tests whether the ABI is GNU.
+  inline bool isGNU() const noexcept { return _platformABI == PlatformABI::kGNU; }
+
+  //! Returns a calculated stack alignment for this environment.
+  ASMJIT_API uint32_t stackAlignment() const noexcept;
+
+  //! Returns a native register size of this architecture.
+  uint32_t registerSize() const noexcept { return registerSizeFromArch(_arch); }
+
+  //! Sets the architecture to `arch`.
+  inline void setArch(Arch arch) noexcept { _arch = arch; }
+  //! Sets the sub-architecture to `subArch`.
+  inline void setSubArch(SubArch subArch) noexcept { _subArch = subArch; }
+  //! Sets the vendor to `vendor`.
+  inline void setVendor(Vendor vendor) noexcept { _vendor = vendor; }
+  //! Sets the platform to `platform`.
+  inline void setPlatform(Platform platform) noexcept { _platform = platform; }
+  //! Sets the ABI to `platformABI`.
+  inline void setPlatformABI(PlatformABI platformABI) noexcept { _platformABI = platformABI; }
+  //! Sets the object format to `objectFormat`.
+  inline void setObjectFormat(ObjectFormat objectFormat) noexcept { _objectFormat = objectFormat; }
+
+  //! \}
+
+  //! \name Static Utilities
+  //! \{
+
+  static inline bool isDefinedArch(Arch arch) noexcept {
+    return uint32_t(arch) <= uint32_t(Arch::kMaxValue);
+  }
+
+  static inline bool isValidArch(Arch arch) noexcept {
+    return arch != Arch::kUnknown && uint32_t(arch) <= uint32_t(Arch::kMaxValue);
+  }
+
+  //! Tests whether the given architecture `arch` is 32-bit.
+  static inline bool is32Bit(Arch arch) noexcept {
+    return (uint32_t(arch) & uint32_t(Arch::k32BitMask)) == uint32_t(Arch::k32BitMask);
+  }
+
+  //! Tests whether the given architecture `arch` is 64-bit.
+  static inline bool is64Bit(Arch arch) noexcept {
+    return (uint32_t(arch) & uint32_t(Arch::k32BitMask)) == 0;
+  }
+
+  //! Tests whether the given architecture `arch` is little endian.
+  static inline bool isLittleEndian(Arch arch) noexcept {
+    return uint32_t(arch) < uint32_t(Arch::kBigEndian);
+  }
+
+  //! Tests whether the given architecture `arch` is big endian.
+  static inline bool isBigEndian(Arch arch) noexcept {
+    return uint32_t(arch) >= uint32_t(Arch::kBigEndian);
+  }
+
+  //! Tests whether the given architecture is Thumb or Thumb_BE.
+  static inline bool isArchThumb(Arch arch) noexcept {
+    return arch == Arch::kThumb || arch == Arch::kThumb_BE;
+  }
+
+  //! Tests whether the given architecture is ARM or ARM_BE.
+  static inline bool isArchARM(Arch arch) noexcept {
+    return arch == Arch::kARM || arch == Arch::kARM_BE;
+  }
+
+  //! Tests whether the given architecture is AArch64 or AArch64_BE.
+  static inline bool isArchAArch64(Arch arch) noexcept {
+    return arch == Arch::kAArch64 || arch == Arch::kAArch64_BE;
+  }
+
+  //! Tests whether the given architecture is MIPS32_LE or MIPS32_BE.
+  static inline bool isArchMIPS32(Arch arch) noexcept {
+    return arch == Arch::kMIPS32_LE || arch == Arch::kMIPS32_BE;
+  }
+
+  //! Tests whether the given architecture is MIPS64_LE or MIPS64_BE.
+  static inline bool isArchMIPS64(Arch arch) noexcept {
+    return arch == Arch::kMIPS64_LE || arch == Arch::kMIPS64_BE;
+  }
+
+  //! Tests whether the given architecture family is X86 or X64.
+  static inline bool isFamilyX86(Arch arch) noexcept {
+    return arch == Arch::kX86 || arch == Arch::kX64;
+  }
+
+  //! Tests whether the given architecture family is ARM, THUMB, or AArch64.
+  static inline bool isFamilyARM(Arch arch) noexcept {
+    return isArchARM(arch) || isArchAArch64(arch) || isArchThumb(arch);
+  }
+
+  //! Tests whether the given architecture family is AArch32 (ARM or THUMB).
+  static inline bool isFamilyAArch32(Arch arch) noexcept {
+    return isArchARM(arch) || isArchThumb(arch);
+  }
+
+  //! Tests whether the given architecture family is AArch64.
+  static inline bool isFamilyAArch64(Arch arch) noexcept {
+    return isArchAArch64(arch);
+  }
+
+  //! Tests whether the given architecture family is MISP or MIPS64.
+  static inline bool isFamilyMIPS(Arch arch) noexcept {
+    return isArchMIPS32(arch) || isArchMIPS64(arch);
+  }
+
+  //! Tests whether the given architecture family is RISC-V (both 32-bit and 64-bit).
+  static inline bool isFamilyRISCV(Arch arch) noexcept {
+    return arch == Arch::kRISCV32 || arch == Arch::kRISCV64;
+  }
+
+  //! Returns a native general purpose register size from the given architecture.
+  static inline uint32_t registerSizeFromArch(Arch arch) noexcept {
+    return is32Bit(arch) ? 4u : 8u;
+  }
+
+  //! \}
+};
+
+static_assert(sizeof(Environment) == 8,
+              "Environment must occupy exactly 8 bytes.");
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ENVIRONMENT_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/errorhandler.cpp b/lib/lepton/asmjit/core/errorhandler.cpp
new file mode 100644
index 0000000000..5a7dac5235
--- /dev/null
+++ b/lib/lepton/asmjit/core/errorhandler.cpp
@@ -0,0 +1,14 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/errorhandler.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+ErrorHandler::ErrorHandler() noexcept {}
+ErrorHandler::~ErrorHandler() noexcept {}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/errorhandler.h b/lib/lepton/asmjit/core/errorhandler.h
new file mode 100644
index 0000000000..5151d43304
--- /dev/null
+++ b/lib/lepton/asmjit/core/errorhandler.h
@@ -0,0 +1,228 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ERRORHANDLER_H_INCLUDED
+#define ASMJIT_CORE_ERRORHANDLER_H_INCLUDED
+
+#include "../core/globals.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_error_handling
+//! \{
+
+class BaseEmitter;
+
+//! Error handler can be used to override the default behavior of error handling.
+//!
+//! It's available to all classes that inherit `BaseEmitter`. Override \ref ErrorHandler::handleError() to implement
+//! your own error handler.
+//!
+//! The following use-cases are supported:
+//!
+//!   - Record the error and continue code generation. This is the simplest approach that can be used to at least log
+//!     possible errors.
+//!   - Throw an exception. AsmJit doesn't use exceptions and is completely exception-safe, but it's perfectly legal
+//!     to throw an exception from the error handler.
+//!   - Use plain old C's `setjmp()` and `longjmp()`. Asmjit always puts Assembler, Builder and Compiler to
+//!     a consistent state before calling \ref handleError(), so `longjmp()` can be used without issues to cancel the
+//!     code generation if an error occurred. This method can be used if exception handling in your project is turned
+//!     off and you still want some comfort. In most cases it should be safe as AsmJit uses \ref Zone memory and the
+//!     ownership of memory it allocates always ends with the instance that allocated it. If using this approach please
+//!     never jump outside the life-time of \ref CodeHolder and \ref BaseEmitter.
+//!
+//! \ref ErrorHandler can be attached to \ref CodeHolder or \ref BaseEmitter, which has a priority. The example below
+//! uses error handler that just prints the error, but lets AsmJit continue:
+//!
+//! ```
+//! // Error Handling #1 - Logging and returning Error.
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Error handler that just prints the error and lets AsmJit ignore it.
+//! class SimpleErrorHandler : public ErrorHandler {
+//! public:
+//!   Error err;
+//!
+//!   inline SimpleErrorHandler() : err(kErrorOk) {}
+//!
+//!   void handleError(Error err, const char* message, BaseEmitter* origin) override {
+//!     this->err = err;
+//!     fprintf(stderr, "ERROR: %s\n", message);
+//!   }
+//! };
+//!
+//! int main() {
+//!   JitRuntime rt;
+//!   SimpleErrorHandler eh;
+//!
+//!   CodeHolder code;
+//!   code.init(rt.environment());
+//!   code.setErrorHandler(&eh);
+//!
+//!   // Try to emit instruction that doesn't exist.
+//!   x86::Assembler a(&code);
+//!   a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1);
+//!
+//!   if (eh.err) {
+//!     // Assembler failed!
+//!     return 1;
+//!   }
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! If error happens during instruction emitting / encoding the assembler behaves transactionally - the output buffer
+//! won't advance if encoding failed, thus either a fully encoded instruction or nothing is emitted. The error handling
+//! shown above is useful, but it's still not the best way of dealing with errors in AsmJit. The following example
+//! shows how to use exception handling to handle errors in a more C++ way:
+//!
+//! ```
+//! // Error Handling #2 - Throwing an exception.
+//! #include <asmjit/x86.h>
+//! #include <exception>
+//! #include <string>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Error handler that throws a user-defined `AsmJitException`.
+//! class AsmJitException : public std::exception {
+//! public:
+//!   Error err;
+//!   std::string message;
+//!
+//!   AsmJitException(Error err, const char* message) noexcept
+//!     : err(err),
+//!       message(message) {}
+//!
+//!   const char* what() const noexcept override { return message.c_str(); }
+//! };
+//!
+//! class ThrowableErrorHandler : public ErrorHandler {
+//! public:
+//!   // Throw is possible, functions that use ErrorHandler are never 'noexcept'.
+//!   void handleError(Error err, const char* message, BaseEmitter* origin) override {
+//!     throw AsmJitException(err, message);
+//!   }
+//! };
+//!
+//! int main() {
+//!   JitRuntime rt;
+//!   ThrowableErrorHandler eh;
+//!
+//!   CodeHolder code;
+//!   code.init(rt.environment());
+//!   code.setErrorHandler(&eh);
+//!
+//!   x86::Assembler a(&code);
+//!
+//!   // Try to emit instruction that doesn't exist.
+//!   try {
+//!     a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1);
+//!   }
+//!   catch (const AsmJitException& ex) {
+//!     printf("EXCEPTION THROWN: %s\n", ex.what());
+//!     return 1;
+//!   }
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! If C++ exceptions are not what you like or your project turns off them completely there is still a way of reducing
+//! the error handling to a minimum by using a standard setjmp/longjmp approach. AsmJit is exception-safe and cleans
+//! up everything before calling the ErrorHandler, so any approach is safe. You can simply jump from the error handler
+//! without causing any side-effects or memory leaks. The following example demonstrates how it could be done:
+//!
+//! ```
+//! // Error Handling #3 - Using setjmp/longjmp if exceptions are not allowed.
+//! #include <asmjit/x86.h>
+//! #include <setjmp.h>
+//! #include <stdio.h>
+//!
+//! class LongJmpErrorHandler : public asmjit::ErrorHandler {
+//! public:
+//!   inline LongJmpErrorHandler() : err(asmjit::kErrorOk) {}
+//!
+//!   void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) override {
+//!     this->err = err;
+//!     longjmp(state, 1);
+//!   }
+//!
+//!   jmp_buf state;
+//!   asmjit::Error err;
+//! };
+//!
+//! int main(int argc, char* argv[]) {
+//!   using namespace asmjit;
+//!
+//!   JitRuntime rt;
+//!   LongJmpErrorHandler eh;
+//!
+//!   CodeHolder code;
+//!   code.init(rt.rt.environment());
+//!   code.setErrorHandler(&eh);
+//!
+//!   x86::Assembler a(&code);
+//!
+//!   if (!setjmp(eh.state)) {
+//!     // Try to emit instruction that doesn't exist.
+//!     a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1);
+//!   }
+//!   else {
+//!     Error err = eh.err;
+//!     printf("ASMJIT ERROR: 0x%08X [%s]\n", err, DebugUtils::errorAsString(err));
+//!   }
+//!
+//!   return 0;
+//! }
+//! ```
+class ASMJIT_VIRTAPI ErrorHandler {
+public:
+  ASMJIT_BASE_CLASS(ErrorHandler)
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `ErrorHandler` instance.
+  ASMJIT_API ErrorHandler() noexcept;
+  //! Destroys the `ErrorHandler` instance.
+  ASMJIT_API virtual ~ErrorHandler() noexcept;
+
+  //! \}
+
+  //! \name Interface
+  //! \{
+
+  //! Error handler (must be reimplemented).
+  //!
+  //! Error handler is called after an error happened and before it's propagated to the caller. There are multiple
+  //! ways how the error handler can be used:
+  //!
+  //! 1. User-based error handling without throwing exception or using C's`longjmp()`. This is for users that don't
+  //!     use exceptions and want customized error handling.
+  //!
+  //! 2. Throwing an exception. AsmJit doesn't use exceptions and is completely exception-safe, but you can throw
+  //!     exception from your error handler if this way is the preferred way of handling errors in your project.
+  //!
+  //! 3. Using plain old C's `setjmp()` and `longjmp()`. Asmjit always puts `BaseEmitter` to a consistent state before
+  //!    calling `handleError()`  so `longjmp()` can be used without any issues to cancel the code generation if an
+  //!    error occurred. There is no difference between exceptions and `longjmp()` from AsmJit's perspective, however,
+  //!    never jump outside of `CodeHolder` and `BaseEmitter` scope as you would leak memory.
+  virtual void handleError(Error err, const char* message, BaseEmitter* origin) = 0;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ERRORHANDLER_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/core/formatter.cpp b/lib/lepton/asmjit/core/formatter.cpp
new file mode 100644
index 0000000000..1a9a386e3f
--- /dev/null
+++ b/lib/lepton/asmjit/core/formatter.cpp
@@ -0,0 +1,584 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/archtraits.h"
+#include "../core/builder.h"
+#include "../core/codeholder.h"
+#include "../core/compiler.h"
+#include "../core/emitter.h"
+#include "../core/formatter_p.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/type.h"
+
+#if !defined(ASMJIT_NO_X86)
+  #include "../x86/x86formatter_p.h"
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  #include "../arm/a64formatter_p.h"
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+#if defined(ASMJIT_NO_COMPILER)
+class VirtReg;
+#endif
+
+namespace Formatter {
+
+static const char wordNameTable[][8] = {
+  "db",
+  "dw",
+  "dd",
+  "dq",
+  "byte",
+  "half",
+  "word",
+  "hword",
+  "dword",
+  "qword",
+  "xword",
+  "short",
+  "long",
+  "quad"
+};
+
+
+Error formatTypeId(String& sb, TypeId typeId) noexcept {
+  if (typeId == TypeId::kVoid)
+    return sb.append("void");
+
+  if (!TypeUtils::isValid(typeId))
+    return sb.append("unknown");
+
+  const char* typeName = "unknown";
+  uint32_t typeSize = TypeUtils::sizeOf(typeId);
+  TypeId scalarType = TypeUtils::scalarOf(typeId);
+
+  switch (scalarType) {
+    case TypeId::kIntPtr : typeName = "intptr" ; break;
+    case TypeId::kUIntPtr: typeName = "uintptr"; break;
+    case TypeId::kInt8   : typeName = "int8"   ; break;
+    case TypeId::kUInt8  : typeName = "uint8"  ; break;
+    case TypeId::kInt16  : typeName = "int16"  ; break;
+    case TypeId::kUInt16 : typeName = "uint16" ; break;
+    case TypeId::kInt32  : typeName = "int32"  ; break;
+    case TypeId::kUInt32 : typeName = "uint32" ; break;
+    case TypeId::kInt64  : typeName = "int64"  ; break;
+    case TypeId::kUInt64 : typeName = "uint64" ; break;
+    case TypeId::kFloat32: typeName = "float32"; break;
+    case TypeId::kFloat64: typeName = "float64"; break;
+    case TypeId::kFloat80: typeName = "float80"; break;
+    case TypeId::kMask8  : typeName = "mask8"  ; break;
+    case TypeId::kMask16 : typeName = "mask16" ; break;
+    case TypeId::kMask32 : typeName = "mask32" ; break;
+    case TypeId::kMask64 : typeName = "mask64" ; break;
+    case TypeId::kMmx32  : typeName = "mmx32"  ; break;
+    case TypeId::kMmx64  : typeName = "mmx64"  ; break;
+
+    default:
+      typeName = "unknown";
+      break;
+  }
+
+  uint32_t baseSize = TypeUtils::sizeOf(scalarType);
+  if (typeSize > baseSize) {
+    uint32_t count = typeSize / baseSize;
+    return sb.appendFormat("%sx%u", typeName, unsigned(count));
+  }
+  else {
+    return sb.append(typeName);
+  }
+}
+
+Error formatFeature(
+  String& sb,
+  Arch arch,
+  uint32_t featureId) noexcept {
+
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::FormatterInternal::formatFeature(sb, featureId);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH32) && !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyARM(arch))
+    return arm::FormatterInternal::formatFeature(sb, featureId);
+#endif
+
+  return kErrorInvalidArch;
+}
+
+Error formatLabel(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  uint32_t labelId) noexcept {
+
+  DebugUtils::unused(formatFlags);
+
+  const LabelEntry* le = emitter->code()->labelEntry(labelId);
+  if (ASMJIT_UNLIKELY(!le))
+    return sb.appendFormat("<InvalidLabel:%u>", labelId);
+
+  if (le->hasName()) {
+    if (le->hasParent()) {
+      uint32_t parentId = le->parentId();
+      const LabelEntry* pe = emitter->code()->labelEntry(parentId);
+
+      if (ASMJIT_UNLIKELY(!pe))
+        ASMJIT_PROPAGATE(sb.appendFormat("<InvalidLabel:%u>", labelId));
+      else if (ASMJIT_UNLIKELY(!pe->hasName()))
+        ASMJIT_PROPAGATE(sb.appendFormat("L%u", parentId));
+      else
+        ASMJIT_PROPAGATE(sb.append(pe->name()));
+
+      ASMJIT_PROPAGATE(sb.append('.'));
+    }
+
+    if (le->type() == LabelType::kAnonymous)
+      ASMJIT_PROPAGATE(sb.appendFormat("L%u@", labelId));
+    return sb.append(le->name());
+  }
+  else {
+    return sb.appendFormat("L%u", labelId);
+  }
+}
+
+Error formatRegister(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  RegType regType,
+  uint32_t regId) noexcept {
+
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::FormatterInternal::formatRegister(sb, formatFlags, emitter, arch, regType, regId);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::FormatterInternal::formatRegister(sb, formatFlags, emitter, arch, regType, regId);
+#endif
+
+  return kErrorInvalidArch;
+}
+
+Error formatOperand(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept {
+
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::FormatterInternal::formatOperand(sb, formatFlags, emitter, arch, op);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::FormatterInternal::formatOperand(sb, formatFlags, emitter, arch, op);
+#endif
+
+  return kErrorInvalidArch;
+}
+
+ASMJIT_API Error formatDataType(
+  String& sb,
+  FormatFlags formatFlags,
+  Arch arch,
+  TypeId typeId) noexcept
+{
+  DebugUtils::unused(formatFlags);
+
+  if (ASMJIT_UNLIKELY(uint32_t(arch) > uint32_t(Arch::kMaxValue)))
+    return DebugUtils::errored(kErrorInvalidArch);
+
+  uint32_t typeSize = TypeUtils::sizeOf(typeId);
+  if (typeSize == 0 || typeSize > 8)
+    return DebugUtils::errored(kErrorInvalidState);
+
+  uint32_t typeSizeLog2 = Support::ctz(typeSize);
+  return sb.append(wordNameTable[size_t(ArchTraits::byArch(arch).typeNameIdByIndex(typeSizeLog2))]);
+}
+
+static Error formatDataHelper(String& sb, const char* typeName, uint32_t typeSize, const uint8_t* data, size_t itemCount) noexcept {
+  sb.append('.');
+  sb.append(typeName);
+  sb.append(' ');
+
+  for (size_t i = 0; i < itemCount; i++) {
+    uint64_t v = 0;
+
+    if (i != 0)
+      ASMJIT_PROPAGATE(sb.append(", ", 2));
+
+    switch (typeSize) {
+      case 1: v = data[0]; break;
+      case 2: v = Support::readU16u(data); break;
+      case 4: v = Support::readU32u(data); break;
+      case 8: v = Support::readU64u(data); break;
+    }
+
+    ASMJIT_PROPAGATE(sb.appendUInt(v, 16, typeSize * 2, StringFormatFlags::kAlternate));
+    data += typeSize;
+  }
+
+  return kErrorOk;
+}
+
+Error formatData(
+  String& sb,
+  FormatFlags formatFlags,
+  Arch arch,
+  TypeId typeId, const void* data, size_t itemCount, size_t repeatCount) noexcept
+{
+  DebugUtils::unused(formatFlags);
+
+  if (ASMJIT_UNLIKELY(!Environment::isDefinedArch(arch)))
+    return DebugUtils::errored(kErrorInvalidArch);
+
+  uint32_t typeSize = TypeUtils::sizeOf(typeId);
+  if (typeSize == 0)
+    return DebugUtils::errored(kErrorInvalidState);
+
+  if (!Support::isPowerOf2(typeSize)) {
+    itemCount *= typeSize;
+    typeSize = 1;
+  }
+
+  while (typeSize > 8u) {
+    typeSize >>= 1;
+    itemCount <<= 1;
+  }
+
+  uint32_t typeSizeLog2 = Support::ctz(typeSize);
+  const char* wordName = wordNameTable[size_t(ArchTraits::byArch(arch).typeNameIdByIndex(typeSizeLog2))];
+
+  if (repeatCount > 1)
+    ASMJIT_PROPAGATE(sb.appendFormat(".repeat %zu ", repeatCount));
+
+  return formatDataHelper(sb, wordName, typeSize, static_cast<const uint8_t*>(data), itemCount);
+}
+
+Error formatInstruction(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept {
+
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::FormatterInternal::formatInstruction(sb, formatFlags, emitter, arch, inst, operands, opCount);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyARM(arch))
+    return a64::FormatterInternal::formatInstruction(sb, formatFlags, emitter, arch, inst, operands, opCount);
+#endif
+
+  return kErrorInvalidArch;
+}
+
+#ifndef ASMJIT_NO_BUILDER
+
+#ifndef ASMJIT_NO_COMPILER
+static Error formatFuncValue(String& sb, FormatFlags formatFlags, const BaseEmitter* emitter, FuncValue value) noexcept {
+  TypeId typeId = value.typeId();
+  ASMJIT_PROPAGATE(formatTypeId(sb, typeId));
+
+  if (value.isAssigned()) {
+    ASMJIT_PROPAGATE(sb.append('@'));
+
+    if (value.isIndirect())
+      ASMJIT_PROPAGATE(sb.append('['));
+
+    // NOTE: It should be either reg or stack, but never both. We
+    // use two IFs on purpose so if the FuncValue is both it would
+    // show in logs.
+    if (value.isReg()) {
+      ASMJIT_PROPAGATE(formatRegister(sb, formatFlags, emitter, emitter->arch(), value.regType(), value.regId()));
+    }
+
+    if (value.isStack()) {
+      ASMJIT_PROPAGATE(sb.appendFormat("[%d]", int(value.stackOffset())));
+    }
+
+    if (value.isIndirect())
+      ASMJIT_PROPAGATE(sb.append(']'));
+  }
+
+  return kErrorOk;
+}
+
+static Error formatFuncValuePack(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseCompiler* cc,
+  const FuncValuePack& pack,
+  const RegOnly* vRegs) noexcept {
+
+  size_t count = pack.count();
+  if (!count)
+    return sb.append("void");
+
+  if (count > 1)
+    sb.append('[');
+
+  for (uint32_t valueIndex = 0; valueIndex < count; valueIndex++) {
+    const FuncValue& value = pack[valueIndex];
+    if (!value)
+      break;
+
+    if (valueIndex)
+      ASMJIT_PROPAGATE(sb.append(", "));
+
+    ASMJIT_PROPAGATE(formatFuncValue(sb, formatFlags, cc, value));
+
+    if (vRegs) {
+      const VirtReg* virtReg = nullptr;
+      static const char nullReg[] = "<none>";
+
+      if (vRegs[valueIndex].isReg() && cc->isVirtIdValid(vRegs[valueIndex].id()))
+        virtReg = cc->virtRegById(vRegs[valueIndex].id());
+
+      ASMJIT_PROPAGATE(sb.appendFormat(" %s", virtReg ? virtReg->name() : nullReg));
+    }
+  }
+
+  if (count > 1)
+    sb.append(']');
+
+  return kErrorOk;
+}
+
+static Error formatFuncRets(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseCompiler* cc,
+  const FuncDetail& fd) noexcept {
+
+  return formatFuncValuePack(sb, formatFlags, cc, fd.retPack(), nullptr);
+}
+
+static Error formatFuncArgs(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseCompiler* cc,
+  const FuncDetail& fd,
+  const FuncNode::ArgPack* argPacks) noexcept {
+
+  uint32_t argCount = fd.argCount();
+  if (!argCount)
+    return sb.append("void");
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    if (argIndex)
+      ASMJIT_PROPAGATE(sb.append(", "));
+
+    ASMJIT_PROPAGATE(formatFuncValuePack(sb, formatFlags, cc, fd.argPack(argIndex), argPacks[argIndex]._data));
+  }
+
+  return kErrorOk;
+}
+#endif
+
+Error formatNode(
+  String& sb,
+  const FormatOptions& formatOptions,
+  const BaseBuilder* builder,
+  const BaseNode* node) noexcept {
+
+  if (node->hasPosition() && formatOptions.hasFlag(FormatFlags::kPositions))
+    ASMJIT_PROPAGATE(sb.appendFormat("<%05u> ", node->position()));
+
+  size_t startLineIndex = sb.size();
+
+  switch (node->type()) {
+    case NodeType::kInst:
+    case NodeType::kJump: {
+      const InstNode* instNode = node->as<InstNode>();
+      ASMJIT_PROPAGATE(builder->_funcs.formatInstruction(sb, formatOptions.flags(), builder,
+        builder->arch(),
+        instNode->baseInst(), instNode->operands(), instNode->opCount()));
+      break;
+    }
+
+    case NodeType::kSection: {
+      const SectionNode* sectionNode = node->as<SectionNode>();
+      if (builder->_code->isSectionValid(sectionNode->id())) {
+        const Section* section = builder->_code->sectionById(sectionNode->id());
+        ASMJIT_PROPAGATE(sb.appendFormat(".section %s", section->name()));
+      }
+      break;
+    }
+
+    case NodeType::kLabel: {
+      const LabelNode* labelNode = node->as<LabelNode>();
+      ASMJIT_PROPAGATE(formatLabel(sb, formatOptions.flags(), builder, labelNode->labelId()));
+      ASMJIT_PROPAGATE(sb.append(":"));
+      break;
+    }
+
+    case NodeType::kAlign: {
+      const AlignNode* alignNode = node->as<AlignNode>();
+      ASMJIT_PROPAGATE(sb.appendFormat(".align %u (%s)",
+        alignNode->alignment(),
+        alignNode->alignMode() == AlignMode::kCode ? "code" : "data"));
+      break;
+    }
+
+    case NodeType::kEmbedData: {
+      const EmbedDataNode* embedNode = node->as<EmbedDataNode>();
+      ASMJIT_PROPAGATE(sb.append('.'));
+      ASMJIT_PROPAGATE(formatDataType(sb, formatOptions.flags(), builder->arch(), embedNode->typeId()));
+      ASMJIT_PROPAGATE(sb.appendFormat(" {Count=%zu Repeat=%zu TotalSize=%zu}", embedNode->itemCount(), embedNode->repeatCount(), embedNode->dataSize()));
+      break;
+    }
+
+    case NodeType::kEmbedLabel: {
+      const EmbedLabelNode* embedNode = node->as<EmbedLabelNode>();
+      ASMJIT_PROPAGATE(sb.append(".label "));
+      ASMJIT_PROPAGATE(formatLabel(sb, formatOptions.flags(), builder, embedNode->labelId()));
+      break;
+    }
+
+    case NodeType::kEmbedLabelDelta: {
+      const EmbedLabelDeltaNode* embedNode = node->as<EmbedLabelDeltaNode>();
+      ASMJIT_PROPAGATE(sb.append(".label ("));
+      ASMJIT_PROPAGATE(formatLabel(sb, formatOptions.flags(), builder, embedNode->labelId()));
+      ASMJIT_PROPAGATE(sb.append(" - "));
+      ASMJIT_PROPAGATE(formatLabel(sb, formatOptions.flags(), builder, embedNode->baseLabelId()));
+      ASMJIT_PROPAGATE(sb.append(")"));
+      break;
+    }
+
+    case NodeType::kConstPool: {
+      const ConstPoolNode* constPoolNode = node->as<ConstPoolNode>();
+      ASMJIT_PROPAGATE(sb.appendFormat("[ConstPool Size=%zu Alignment=%zu]", constPoolNode->size(), constPoolNode->alignment()));
+      break;
+    };
+
+    case NodeType::kComment: {
+      const CommentNode* commentNode = node->as<CommentNode>();
+      ASMJIT_PROPAGATE(sb.appendFormat("; %s", commentNode->inlineComment()));
+      break;
+    }
+
+    case NodeType::kSentinel: {
+      const SentinelNode* sentinelNode = node->as<SentinelNode>();
+      const char* sentinelName = nullptr;
+
+      switch (sentinelNode->sentinelType()) {
+        case SentinelType::kFuncEnd:
+          sentinelName = "[FuncEnd]";
+          break;
+
+        default:
+          sentinelName = "[Sentinel]";
+          break;
+      }
+
+      ASMJIT_PROPAGATE(sb.append(sentinelName));
+      break;
+    }
+
+#ifndef ASMJIT_NO_COMPILER
+    case NodeType::kFunc: {
+      const FuncNode* funcNode = node->as<FuncNode>();
+
+      if (builder->isCompiler()) {
+        ASMJIT_PROPAGATE(formatLabel(sb, formatOptions.flags(), builder, funcNode->labelId()));
+        ASMJIT_PROPAGATE(sb.append(": "));
+
+        ASMJIT_PROPAGATE(formatFuncRets(sb, formatOptions.flags(), static_cast<const BaseCompiler*>(builder), funcNode->detail()));
+        ASMJIT_PROPAGATE(sb.append(" Func("));
+        ASMJIT_PROPAGATE(formatFuncArgs(sb, formatOptions.flags(), static_cast<const BaseCompiler*>(builder), funcNode->detail(), funcNode->argPacks()));
+        ASMJIT_PROPAGATE(sb.append(")"));
+      }
+      break;
+    }
+
+    case NodeType::kFuncRet: {
+      const FuncRetNode* retNode = node->as<FuncRetNode>();
+      ASMJIT_PROPAGATE(sb.append("[FuncRet]"));
+
+      for (uint32_t i = 0; i < 2; i++) {
+        const Operand_& op = retNode->_opArray[i];
+        if (!op.isNone()) {
+          ASMJIT_PROPAGATE(sb.append(i == 0 ? " " : ", "));
+          ASMJIT_PROPAGATE(formatOperand(sb, formatOptions.flags(), builder, builder->arch(), op));
+        }
+      }
+      break;
+    }
+
+    case NodeType::kInvoke: {
+      const InvokeNode* invokeNode = node->as<InvokeNode>();
+      ASMJIT_PROPAGATE(builder->_funcs.formatInstruction(sb, formatOptions.flags(), builder,
+        builder->arch(),
+        invokeNode->baseInst(), invokeNode->operands(), invokeNode->opCount()));
+      break;
+    }
+#endif
+
+    default: {
+      ASMJIT_PROPAGATE(sb.appendFormat("[UserNode:%u]", node->type()));
+      break;
+    }
+  }
+
+  if (node->hasInlineComment()) {
+    size_t requiredPadding = paddingFromOptions(formatOptions, FormatPaddingGroup::kRegularLine);
+    size_t currentPadding = sb.size() - startLineIndex;
+
+    if (currentPadding < requiredPadding)
+      ASMJIT_PROPAGATE(sb.appendChars(' ', requiredPadding - currentPadding));
+
+    ASMJIT_PROPAGATE(sb.append("; "));
+    ASMJIT_PROPAGATE(sb.append(node->inlineComment()));
+  }
+
+  return kErrorOk;
+}
+
+Error formatNodeList(
+  String& sb,
+  const FormatOptions& formatOptions,
+  const BaseBuilder* builder) noexcept {
+
+  return formatNodeList(sb, formatOptions, builder, builder->firstNode(), nullptr);
+}
+
+Error formatNodeList(
+  String& sb,
+  const FormatOptions& formatOptions,
+  const BaseBuilder* builder,
+  const BaseNode* begin,
+  const BaseNode* end) noexcept {
+
+  const BaseNode* node = begin;
+  while (node != end) {
+    ASMJIT_PROPAGATE(formatNode(sb, formatOptions, builder, node));
+    ASMJIT_PROPAGATE(sb.append('\n'));
+    node = node->next();
+  }
+  return kErrorOk;
+}
+#endif
+
+} // {Formatter}
+
+ASMJIT_END_NAMESPACE
+
+#endif
diff --git a/lib/lepton/asmjit/core/formatter.h b/lib/lepton/asmjit/core/formatter.h
new file mode 100644
index 0000000000..d7a4b93476
--- /dev/null
+++ b/lib/lepton/asmjit/core/formatter.h
@@ -0,0 +1,247 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_FORMATTER_H_INCLUDED
+#define ASMJIT_CORE_FORMATTER_H_INCLUDED
+
+#include "../core/globals.h"
+#include "../core/inst.h"
+#include "../core/string.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_logging
+//! \{
+
+class BaseBuilder;
+class BaseEmitter;
+class BaseNode;
+struct Operand_;
+
+//! Format flags used by \ref Logger and \ref FormatOptions.
+enum class FormatFlags : uint32_t {
+  //! No formatting flags.
+  kNone = 0u,
+
+  //! Show also binary form of each logged instruction (Assembler).
+  kMachineCode = 0x00000001u,
+  //! Show a text explanation of some immediate values.
+  kExplainImms = 0x00000002u,
+  //! Use hexadecimal notation of immediate values.
+  kHexImms = 0x00000004u,
+  //! Use hexadecimal notation of addresses and offsets in addresses.
+  kHexOffsets = 0x00000008u,
+  //! Show casts between virtual register types (Compiler output).
+  kRegCasts = 0x00000010u,
+  //! Show positions associated with nodes (Compiler output).
+  kPositions = 0x00000020u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(FormatFlags)
+
+//! Format indentation group, used by \ref FormatOptions.
+enum class FormatIndentationGroup : uint32_t {
+  //! Indentation used for instructions and directives.
+  kCode = 0u,
+  //! Indentation used for labels and function nodes.
+  kLabel = 1u,
+  //! Indentation used for comments (not inline comments).
+  kComment = 2u,
+
+  //! \cond INTERNAL
+  //! Reserved for future use.
+  kReserved = 3u,
+  //! \endcond
+
+  //! Maximum value of `FormatIndentationGroup`.
+  kMaxValue = kReserved
+};
+
+//! Format padding group, used by \ref FormatOptions.
+enum class FormatPaddingGroup : uint32_t {
+  //! Describes padding of a regular line, which can represent instruction, data, or assembler directives.
+  kRegularLine = 0,
+  //! Describes padding of machine code dump that is visible next to the instruction, if enabled.
+  kMachineCode = 1,
+
+  //! Maximum value of `FormatPaddingGroup`.
+  kMaxValue = kMachineCode
+};
+
+//! Formatting options used by \ref Logger and \ref Formatter.
+class FormatOptions {
+public:
+  //! \name Members
+  //! \{
+
+  //! Format flags.
+  FormatFlags _flags = FormatFlags::kNone;
+  //! Indentations for each indentation group.
+  Support::Array<uint8_t, uint32_t(FormatIndentationGroup::kMaxValue) + 1> _indentation {};
+  //! Paddings for each padding group.
+  Support::Array<uint16_t, uint32_t(FormatPaddingGroup::kMaxValue) + 1> _padding {};
+
+  //! \}
+
+  //! \name Reset
+  //! \{
+
+  //! Resets FormatOptions to its default initialized state.
+  inline void reset() noexcept {
+    _flags = FormatFlags::kNone;
+    _indentation.fill(uint8_t(0));
+    _padding.fill(uint16_t(0));
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns format flags.
+  inline FormatFlags flags() const noexcept { return _flags; }
+  //! Tests whether the given `flag` is set in format flags.
+  inline bool hasFlag(FormatFlags flag) const noexcept { return Support::test(_flags, flag); }
+
+  //! Resets all format flags to `flags`.
+  inline void setFlags(FormatFlags flags) noexcept { _flags = flags; }
+  //! Adds `flags` to format flags.
+  inline void addFlags(FormatFlags flags) noexcept { _flags |= flags; }
+  //! Removes `flags` from format flags.
+  inline void clearFlags(FormatFlags flags) noexcept { _flags &= ~flags; }
+
+  //! Returns indentation for the given indentation `group`.
+  inline uint8_t indentation(FormatIndentationGroup group) const noexcept { return _indentation[group]; }
+  //! Sets indentation for the given indentation `group`.
+  inline void setIndentation(FormatIndentationGroup group, uint32_t n) noexcept { _indentation[group] = uint8_t(n); }
+  //! Resets indentation for the given indentation `group` to zero.
+  inline void resetIndentation(FormatIndentationGroup group) noexcept { _indentation[group] = uint8_t(0); }
+
+  //! Returns pading for the given padding `group`.
+  inline size_t padding(FormatPaddingGroup group) const noexcept { return _padding[group]; }
+  //! Sets pading for the given padding `group`.
+  inline void setPadding(FormatPaddingGroup group, size_t n) noexcept { _padding[group] = uint16_t(n); }
+  //! Resets pading for the given padding `group` to zero, which means that a default padding will be used
+  //! based on the target architecture properties.
+  inline void resetPadding(FormatPaddingGroup group) noexcept { _padding[group] = uint16_t(0); }
+
+  //! \}
+};
+
+//! Provides formatting functionality to format operands, instructions, and nodes.
+namespace Formatter {
+
+#ifndef ASMJIT_NO_LOGGING
+
+//! Appends a formatted `typeId` to the output string `sb`.
+ASMJIT_API Error formatTypeId(
+  String& sb,
+  TypeId typeId) noexcept;
+
+//! Appends a formatted `featureId` to the output string `sb`.
+//!
+//! See \ref CpuFeatures.
+ASMJIT_API Error formatFeature(
+  String& sb,
+  Arch arch,
+  uint32_t featureId) noexcept;
+
+//! Appends a formatted register to the output string `sb`.
+//!
+//! \note Emitter is optional, but it's required to format virtual registers, which won't be formatted properly
+//! if the `emitter` is not provided.
+ASMJIT_API Error formatRegister(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  RegType regType,
+  uint32_t regId) noexcept;
+
+//! Appends a formatted label to the output string `sb`.
+//!
+//! \note Emitter is optional, but it's required to format named labels properly, otherwise the formatted as
+//! it is an anonymous label.
+ASMJIT_API Error formatLabel(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  uint32_t labelId) noexcept;
+
+//! Appends a formatted operand to the output string `sb`.
+//!
+//! \note Emitter is optional, but it's required to format named labels and virtual registers. See
+//! \ref formatRegister() and \ref formatLabel() for more details.
+ASMJIT_API Error formatOperand(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept;
+
+//! Appends a formatted data-type to the output string `sb`.
+ASMJIT_API Error formatDataType(
+  String& sb,
+  FormatFlags formatFlags,
+  Arch arch,
+  TypeId typeId) noexcept;
+
+//! Appends a formatted data to the output string `sb`.
+ASMJIT_API Error formatData(
+  String& sb,
+  FormatFlags formatFlags,
+  Arch arch,
+  TypeId typeId, const void* data, size_t itemCount, size_t repeatCount = 1) noexcept;
+
+//! Appends a formatted instruction to the output string `sb`.
+//!
+//! \note Emitter is optional, but it's required to format named labels and virtual registers. See
+//! \ref formatRegister() and \ref formatLabel() for more details.
+ASMJIT_API Error formatInstruction(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept;
+
+#ifndef ASMJIT_NO_BUILDER
+//! Appends a formatted node to the output string `sb`.
+//!
+//! The `node` must belong to the provided `builder`.
+ASMJIT_API Error formatNode(
+  String& sb,
+  const FormatOptions& formatOptions,
+  const BaseBuilder* builder,
+  const BaseNode* node) noexcept;
+
+//! Appends formatted nodes to the output string `sb`.
+//!
+//! All nodes that are part of the given `builder` will be appended.
+ASMJIT_API Error formatNodeList(
+  String& sb,
+  const FormatOptions& formatOptions,
+  const BaseBuilder* builder) noexcept;
+
+//! Appends formatted nodes to the output string `sb`.
+//!
+//! This function works the same as \ref formatNode(), but appends more nodes to the output string,
+//! separating each node with a newline '\n' character.
+ASMJIT_API Error formatNodeList(
+  String& sb,
+  const FormatOptions& formatOptions,
+  const BaseBuilder* builder,
+  const BaseNode* begin,
+  const BaseNode* end) noexcept;
+#endif
+
+#endif
+
+} // {Formatter}
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_FORMATTER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/formatter_p.h b/lib/lepton/asmjit/core/formatter_p.h
new file mode 100644
index 0000000000..6070fd74f2
--- /dev/null
+++ b/lib/lepton/asmjit/core/formatter_p.h
@@ -0,0 +1,34 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_FORMATTER_P_H_INCLUDED
+#define ASMJIT_CORE_FORMATTER_P_H_INCLUDED
+
+#include "../core/formatter.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_logging
+//! \{
+
+namespace Formatter {
+
+static ASMJIT_FORCE_INLINE size_t paddingFromOptions(const FormatOptions& formatOptions, FormatPaddingGroup group) noexcept {
+  static constexpr uint16_t _defaultPaddingTable[uint32_t(FormatPaddingGroup::kMaxValue) + 1] = { 44, 26 };
+  static_assert(uint32_t(FormatPaddingGroup::kMaxValue) + 1 == 2, "If a new group is defined it must be added here");
+
+  size_t padding = formatOptions.padding(group);
+  return padding ? padding : size_t(_defaultPaddingTable[uint32_t(group)]);
+}
+
+} // {Formatter}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_FORMATTER_H_P_INCLUDED
diff --git a/lib/lepton/asmjit/core/func.cpp b/lib/lepton/asmjit/core/func.cpp
new file mode 100644
index 0000000000..04dc2aaf20
--- /dev/null
+++ b/lib/lepton/asmjit/core/func.cpp
@@ -0,0 +1,286 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/archtraits.h"
+#include "../core/func.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+#include "../core/funcargscontext_p.h"
+
+#if !defined(ASMJIT_NO_X86)
+  #include "../x86/x86func_p.h"
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  #include "../arm/a64func_p.h"
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+// CallConv - Init & Reset
+// =======================
+
+ASMJIT_FAVOR_SIZE Error CallConv::init(CallConvId ccId, const Environment& environment) noexcept {
+  reset();
+
+#if !defined(ASMJIT_NO_X86)
+  if (environment.isFamilyX86())
+    return x86::FuncInternal::initCallConv(*this, ccId, environment);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (environment.isFamilyAArch64())
+    return a64::FuncInternal::initCallConv(*this, ccId, environment);
+#endif
+
+  return DebugUtils::errored(kErrorInvalidArgument);
+}
+
+// FuncDetail - Init / Reset
+// =========================
+
+ASMJIT_FAVOR_SIZE Error FuncDetail::init(const FuncSignature& signature, const Environment& environment) noexcept {
+  CallConvId ccId = signature.callConvId();
+  uint32_t argCount = signature.argCount();
+
+  if (ASMJIT_UNLIKELY(argCount > Globals::kMaxFuncArgs))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  CallConv& cc = _callConv;
+  ASMJIT_PROPAGATE(cc.init(ccId, environment));
+
+  uint32_t registerSize = Environment::registerSizeFromArch(cc.arch());
+  uint32_t deabstractDelta = TypeUtils::deabstractDeltaOfSize(registerSize);
+
+  const TypeId* signatureArgs = signature.args();
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    FuncValuePack& argPack = _args[argIndex];
+    argPack[0].initTypeId(TypeUtils::deabstract(signatureArgs[argIndex], deabstractDelta));
+  }
+
+  _argCount = uint8_t(argCount);
+  _vaIndex = uint8_t(signature.vaIndex());
+
+  TypeId ret = signature.ret();
+  if (ret != TypeId::kVoid)
+    _rets[0].initTypeId(TypeUtils::deabstract(ret, deabstractDelta));
+
+#if !defined(ASMJIT_NO_X86)
+  if (environment.isFamilyX86())
+    return x86::FuncInternal::initFuncDetail(*this, signature, registerSize);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (environment.isFamilyAArch64())
+    return a64::FuncInternal::initFuncDetail(*this, signature, registerSize);
+#endif
+
+  // We should never bubble here as if `cc.init()` succeeded then there has to be an implementation for the current
+  // architecture. However, stay safe.
+  return DebugUtils::errored(kErrorInvalidArgument);
+}
+
+// FuncFrame - Init
+// ================
+
+ASMJIT_FAVOR_SIZE Error FuncFrame::init(const FuncDetail& func) noexcept {
+  Arch arch = func.callConv().arch();
+  if (!Environment::isValidArch(arch))
+    return DebugUtils::errored(kErrorInvalidArch);
+
+  const ArchTraits& archTraits = ArchTraits::byArch(arch);
+
+  // Initializing FuncFrame means making a copy of some properties of `func`. Properties like `_localStackSize` will
+  // be set by the user before the frame is finalized.
+  reset();
+
+  _arch = arch;
+  _spRegId = uint8_t(archTraits.spRegId());
+  _saRegId = uint8_t(BaseReg::kIdBad);
+
+  uint32_t naturalStackAlignment = func.callConv().naturalStackAlignment();
+  uint32_t minDynamicAlignment = Support::max<uint32_t>(naturalStackAlignment, 16);
+
+  if (minDynamicAlignment == naturalStackAlignment)
+    minDynamicAlignment <<= 1;
+
+  _naturalStackAlignment = uint8_t(naturalStackAlignment);
+  _minDynamicAlignment = uint8_t(minDynamicAlignment);
+  _redZoneSize = uint8_t(func.redZoneSize());
+  _spillZoneSize = uint8_t(func.spillZoneSize());
+  _finalStackAlignment = uint8_t(_naturalStackAlignment);
+
+  if (func.hasFlag(CallConvFlags::kCalleePopsStack)) {
+    _calleeStackCleanup = uint16_t(func.argStackSize());
+  }
+
+  // Initial masks of dirty and preserved registers.
+  for (RegGroup group : RegGroupVirtValues{}) {
+    _dirtyRegs[group] = func.usedRegs(group);
+    _preservedRegs[group] = func.preservedRegs(group);
+  }
+
+  // Exclude stack pointer - this register is never included in saved GP regs.
+  _preservedRegs[RegGroup::kGp] &= ~Support::bitMask(archTraits.spRegId());
+
+  // The size and alignment of save/restore area of registers for each virtual register group
+  _saveRestoreRegSize = func.callConv()._saveRestoreRegSize;
+  _saveRestoreAlignment = func.callConv()._saveRestoreAlignment;
+
+  return kErrorOk;
+}
+
+// FuncFrame - Finalize
+// ====================
+
+ASMJIT_FAVOR_SIZE Error FuncFrame::finalize() noexcept {
+  if (!Environment::isValidArch(arch()))
+    return DebugUtils::errored(kErrorInvalidArch);
+
+  const ArchTraits& archTraits = ArchTraits::byArch(arch());
+
+  uint32_t registerSize = _saveRestoreRegSize[RegGroup::kGp];
+  uint32_t vectorSize = _saveRestoreRegSize[RegGroup::kVec];
+  uint32_t returnAddressSize = archTraits.hasLinkReg() ? 0u : registerSize;
+
+  // The final stack alignment must be updated accordingly to call and local stack alignments.
+  uint32_t stackAlignment = _finalStackAlignment;
+  ASMJIT_ASSERT(stackAlignment == Support::max(_naturalStackAlignment,
+                                               _callStackAlignment,
+                                               _localStackAlignment));
+
+  bool hasFP = hasPreservedFP();
+  bool hasDA = hasDynamicAlignment();
+
+  uint32_t kSp = archTraits.spRegId();
+  uint32_t kFp = archTraits.fpRegId();
+  uint32_t kLr = archTraits.linkRegId();
+
+  // Make frame pointer dirty if the function uses it.
+  if (hasFP) {
+    _dirtyRegs[RegGroup::kGp] |= Support::bitMask(kFp);
+
+    // Currently required by ARM, if this works differently across architectures we would have to generalize most
+    // likely in CallConv.
+    if (kLr != BaseReg::kIdBad)
+      _dirtyRegs[RegGroup::kGp] |= Support::bitMask(kLr);
+  }
+
+  // These two are identical if the function doesn't align its stack dynamically.
+  uint32_t saRegId = _saRegId;
+  if (saRegId == BaseReg::kIdBad)
+    saRegId = kSp;
+
+  // Fix stack arguments base-register from SP to FP in case it was not picked before and the function performs
+  // dynamic stack alignment.
+  if (hasDA && saRegId == kSp)
+    saRegId = kFp;
+
+  // Mark as dirty any register but SP if used as SA pointer.
+  if (saRegId != kSp)
+    _dirtyRegs[RegGroup::kGp] |= Support::bitMask(saRegId);
+
+  _spRegId = uint8_t(kSp);
+  _saRegId = uint8_t(saRegId);
+
+  // Setup stack size used to save preserved registers.
+  uint32_t saveRestoreSizes[2] {};
+  for (RegGroup group : RegGroupVirtValues{})
+    saveRestoreSizes[size_t(!archTraits.hasInstPushPop(group))]
+      += Support::alignUp(Support::popcnt(savedRegs(group)) * saveRestoreRegSize(group), saveRestoreAlignment(group));
+
+  _pushPopSaveSize  = uint16_t(saveRestoreSizes[0]);
+  _extraRegSaveSize = uint16_t(saveRestoreSizes[1]);
+
+  uint32_t v = 0;                            // The beginning of the stack frame relative to SP after prolog.
+  v += callStackSize();                      // Count 'callStackSize'      <- This is used to call functions.
+  v  = Support::alignUp(v, stackAlignment);  // Align to function's stack alignment.
+
+  _localStackOffset = v;                     // Store 'localStackOffset'   <- Function's local stack starts here.
+  v += localStackSize();                     // Count 'localStackSize'     <- Function's local stack ends here.
+
+  // If the function's stack must be aligned, calculate the alignment necessary to store vector registers, and set
+  // `FuncAttributes::kAlignedVecSR` to inform PEI that it can use instructions that perform aligned stores/loads.
+  if (stackAlignment >= vectorSize && _extraRegSaveSize) {
+    addAttributes(FuncAttributes::kAlignedVecSR);
+    v = Support::alignUp(v, vectorSize);     // Align 'extraRegSaveOffset'.
+  }
+
+  _extraRegSaveOffset = v;                   // Store 'extraRegSaveOffset' <- Non-GP save/restore starts here.
+  v += _extraRegSaveSize;                    // Count 'extraRegSaveSize'   <- Non-GP save/restore ends here.
+
+  // Calculate if dynamic alignment (DA) slot (stored as offset relative to SP) is required and its offset.
+  if (hasDA && !hasFP) {
+    _daOffset = v;                           // Store 'daOffset'           <- DA pointer would be stored here.
+    v += registerSize;                       // Count 'daOffset'.
+  }
+  else {
+    _daOffset = FuncFrame::kTagInvalidOffset;
+  }
+
+  // Link Register
+  // -------------
+  //
+  // The stack is aligned after the function call as the return address is stored in a link register. Some
+  // architectures may require to always have aligned stack after PUSH/POP operation, which is represented
+  // by ArchTraits::stackAlignmentConstraint().
+  //
+  // No Link Register (X86/X64)
+  // --------------------------
+  //
+  // The return address should be stored after GP save/restore regs. It has the same size as `registerSize`
+  // (basically the native register/pointer size). We don't adjust it now as `v` now contains the exact size
+  // that the function requires to adjust (call frame + stack frame, vec stack size). The stack (if we consider
+  // this size) is misaligned now, as it's always aligned before the function call - when `call()` is executed
+  // it pushes the current EIP|RIP onto the stack, and misaligns it by 12 or 8 bytes (depending on the
+  // architecture). So count number of bytes needed to align it up to the function's CallFrame (the beginning).
+  if (v || hasFuncCalls() || !returnAddressSize)
+    v += Support::alignUpDiff(v + pushPopSaveSize() + returnAddressSize, stackAlignment);
+
+  _pushPopSaveOffset = v;                    // Store 'pushPopSaveOffset'  <- Function's push/pop save/restore starts here.
+  _stackAdjustment = v;                      // Store 'stackAdjustment'    <- SA used by 'add SP, SA' and 'sub SP, SA'.
+  v += _pushPopSaveSize;                     // Count 'pushPopSaveSize'    <- Function's push/pop save/restore ends here.
+  _finalStackSize = v;                       // Store 'finalStackSize'     <- Final stack used by the function.
+
+  if (!archTraits.hasLinkReg())
+    v += registerSize;                       // Count 'ReturnAddress'      <- As CALL pushes onto stack.
+
+  // If the function performs dynamic stack alignment then the stack-adjustment must be aligned.
+  if (hasDA)
+    _stackAdjustment = Support::alignUp(_stackAdjustment, stackAlignment);
+
+  // Calculate where the function arguments start relative to SP.
+  _saOffsetFromSP = hasDA ? FuncFrame::kTagInvalidOffset : v;
+
+  // Calculate where the function arguments start relative to FP or user-provided register.
+  _saOffsetFromSA = hasFP ? returnAddressSize + registerSize      // Return address + frame pointer.
+                          : returnAddressSize + _pushPopSaveSize; // Return address + all push/pop regs.
+
+  return kErrorOk;
+}
+
+// FuncArgsAssignment - UpdateFuncFrame
+// ====================================
+
+ASMJIT_FAVOR_SIZE Error FuncArgsAssignment::updateFuncFrame(FuncFrame& frame) const noexcept {
+  Arch arch = frame.arch();
+  const FuncDetail* func = funcDetail();
+
+  if (!func)
+    return DebugUtils::errored(kErrorInvalidState);
+
+  RAConstraints constraints;
+  ASMJIT_PROPAGATE(constraints.init(arch));
+
+  FuncArgsContext ctx;
+  ASMJIT_PROPAGATE(ctx.initWorkData(frame, *this, &constraints));
+  ASMJIT_PROPAGATE(ctx.markDstRegsDirty(frame));
+  ASMJIT_PROPAGATE(ctx.markScratchRegs(frame));
+  ASMJIT_PROPAGATE(ctx.markStackArgsReg(frame));
+  return kErrorOk;
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/func.h b/lib/lepton/asmjit/core/func.h
new file mode 100644
index 0000000000..78a41f98f6
--- /dev/null
+++ b/lib/lepton/asmjit/core/func.h
@@ -0,0 +1,1445 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_FUNC_H_INCLUDED
+#define ASMJIT_CORE_FUNC_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/environment.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_function
+//! \{
+
+//! Calling convention id.
+//!
+//! Calling conventions can be divided into the following groups:
+//!
+//!   - Universal - calling conventions are applicable to any target. They will be converted to a target dependent
+//!     calling convention at runtime by \ref CallConv::init() with some help from \ref Environment. The purpose of
+//!     these calling conventions is to make using functions less target dependent and closer to C and C++.
+//!
+//!   - Target specific - calling conventions that are used by a particular architecture and ABI. For example
+//!     Windows 64-bit calling convention and AMD64 SystemV calling convention.
+enum class CallConvId : uint8_t {
+  //! None or invalid (can't be used).
+  kNone = 0,
+
+  // Universal Calling Conventions
+  // -----------------------------
+
+  //! Standard function call or explicit `__cdecl` where it can be specified.
+  //!
+  //! This is a universal calling convention, which is used to initialize specific calling connventions based on
+  //! architecture, platform, and its ABI.
+  kCDecl = 1,
+
+  //! `__stdcall` on targets that support this calling convention (X86).
+  //!
+  //! \note This calling convention is only supported on 32-bit X86. If used on environment that doesn't support
+  //! this calling convention it will be replaced by \ref CallConvId::kCDecl.
+  kStdCall = 2,
+
+  //! `__fastcall` on targets that support this calling convention (X86).
+  //!
+  //! \note This calling convention is only supported on 32-bit X86. If used on environment that doesn't support
+  //! this calling convention it will be replaced by \ref CallConvId::kCDecl.
+  kFastCall = 3,
+
+  //! `__vectorcall` on targets that support this calling convention (X86/X64).
+  //!
+  //! \note This calling convention is only supported on 32-bit and 64-bit X86 architecture on Windows platform.
+  //! If used on environment that doesn't support this calling it will be replaced by \ref CallConvId::kCDecl.
+  kVectorCall = 4,
+
+  //! `__thiscall` on targets that support this calling convention (X86).
+  //!
+  //! \note This calling convention is only supported on 32-bit X86 Windows platform. If used on environment that
+  //! doesn't support this calling convention it will be replaced by \ref CallConvId::kCDecl.
+  kThisCall = 5,
+
+  //! `__attribute__((regparm(1)))` convention (GCC and Clang).
+  kRegParm1 = 6,
+  //! `__attribute__((regparm(2)))` convention (GCC and Clang).
+  kRegParm2 = 7,
+  //! `__attribute__((regparm(3)))` convention (GCC and Clang).
+  kRegParm3 = 8,
+
+  //! Soft-float calling convention (ARM).
+  //!
+  //! Floating point arguments are passed via general purpose registers.
+  kSoftFloat = 9,
+
+  //! Hard-float calling convention (ARM).
+  //!
+  //! Floating point arguments are passed via SIMD registers.
+  kHardFloat = 10,
+
+  //! AsmJit specific calling convention designed for calling functions inside a multimedia code that don't use many
+  //! registers internally, but are long enough to be called and not inlined. These functions are usually used to
+  //! calculate trigonometric functions, logarithms, etc...
+  kLightCall2 = 16,
+  kLightCall3 = 17,
+  kLightCall4 = 18,
+
+  // ABI-Specific Calling Conventions
+  // --------------------------------
+
+  //! X64 System-V calling convention.
+  kX64SystemV = 32,
+  //! X64 Windows calling convention.
+  kX64Windows = 33,
+
+  //! Maximum value of `CallConvId`.
+  kMaxValue = kX64Windows,
+
+  // Host Calling Conventions
+  // ------------------------
+
+  //! Host calling convention detected at compile-time.
+  kHost =
+#if defined(_DOXYGEN)
+    DETECTED_AT_COMPILE_TIME
+#elif ASMJIT_ARCH_ARM == 32 && defined(__SOFTFP__)
+    kSoftFloat
+#elif ASMJIT_ARCH_ARM == 32 && !defined(__SOFTFP__)
+    kHardFloat
+#else
+    kCDecl
+#endif
+};
+
+//! Strategy used by calling conventions to assign registers to function arguments.
+//!
+//! Calling convention strategy describes how AsmJit should convert function arguments used by \ref FuncSignature
+//! into register identifiers and stack offsets. The \ref CallConvStrategy::kDefault strategy assigns registers
+//! and then stack whereas \ref CallConvStrategy::kX64Windows strategy does register shadowing as defined by WIN64
+//! calling convention, which is only used by 64-bit Windows.
+enum class CallConvStrategy : uint8_t {
+  //! Default register assignment strategy.
+  kDefault = 0,
+  //! Windows 64-bit ABI register assignment strategy.
+  kX64Windows = 1,
+  //! Windows 64-bit __vectorcall register assignment strategy.
+  kX64VectorCall = 2,
+
+  //! Maximum value of `CallConvStrategy`.
+  kMaxValue = kX64VectorCall
+};
+
+//! Calling convention flags.
+enum class CallConvFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+  //! Callee is responsible for cleaning up the stack.
+  kCalleePopsStack = 0x0001u,
+  //! Pass vector arguments indirectly (as a pointer).
+  kIndirectVecArgs = 0x0002u,
+  //! Pass F32 and F64 arguments via VEC128 register.
+  kPassFloatsByVec = 0x0004u,
+  //! Pass MMX and vector arguments via stack if the function has variable arguments.
+  kPassVecByStackIfVA = 0x0008u,
+  //! MMX registers are passed and returned via GP registers.
+  kPassMmxByGp = 0x0010u,
+  //! MMX registers are passed and returned via XMM registers.
+  kPassMmxByXmm = 0x0020u,
+  //! Calling convention can be used with variable arguments.
+  kVarArgCompatible = 0x0080u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(CallConvFlags)
+
+//! Function calling convention.
+//!
+//! Function calling convention is a scheme that defines how function parameters are passed and how function
+//! returns its result. AsmJit defines a variety of architecture and OS specific calling conventions and also
+//! provides a compile time detection to make the code-generation easier.
+struct CallConv {
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    //! Maximum number of register arguments per register group.
+    //!
+    //! \note This is not really AsmJit's limitatation, it's just the number that makes sense considering all common
+    //! calling conventions. Usually even conventions that use registers to pass function arguments are limited to 8
+    //! and less arguments passed via registers per group.
+    kMaxRegArgsPerGroup = 16
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Target architecture.
+  Arch _arch;
+  //! Calling convention id.
+  CallConvId _id;
+  //! Register assignment strategy.
+  CallConvStrategy _strategy;
+
+  //! Red zone size (AMD64 == 128 bytes).
+  uint8_t _redZoneSize;
+  //! Spill zone size (WIN-X64 == 32 bytes).
+  uint8_t _spillZoneSize;
+  //! Natural stack alignment as defined by OS/ABI.
+  uint8_t _naturalStackAlignment;
+
+  //! Calling convention flags.
+  CallConvFlags _flags;
+
+  //! Size to save/restore per register group.
+  Support::Array<uint8_t, Globals::kNumVirtGroups> _saveRestoreRegSize;
+  //! Alignment of save/restore groups.
+  Support::Array<uint8_t, Globals::kNumVirtGroups> _saveRestoreAlignment;
+
+  //! Mask of all passed registers, per group.
+  Support::Array<RegMask, Globals::kNumVirtGroups> _passedRegs;
+  //! Mask of all preserved registers, per group.
+  Support::Array<RegMask, Globals::kNumVirtGroups> _preservedRegs;
+
+  //! Passed registers' order.
+  union RegOrder {
+    //! Passed registers, ordered.
+    uint8_t id[kMaxRegArgsPerGroup];
+    //! Packed IDs in `uint32_t` array.
+    uint32_t packed[(kMaxRegArgsPerGroup + 3) / 4];
+  };
+
+  //! Passed registers' order, per register group.
+  Support::Array<RegOrder, Globals::kNumVirtGroups> _passedOrder;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Initializes this calling convention to the given `ccId` based on the `environment`.
+  //!
+  //! See \ref CallConvId and \ref Environment for more details.
+  ASMJIT_API Error init(CallConvId ccId, const Environment& environment) noexcept;
+
+  //! Resets this CallConv struct into a defined state.
+  //!
+  //! It's recommended to reset the \ref CallConv struct in case you would like create a custom calling convention
+  //! as it prevents from using an uninitialized data (CallConv doesn't have a constructor that would initialize it,
+  //! it's just a struct).
+  inline void reset() noexcept {
+    memset(this, 0, sizeof(*this));
+    memset(_passedOrder.data(), 0xFF, sizeof(_passedOrder));
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the target architecture of this calling convention.
+  inline Arch arch() const noexcept { return _arch; }
+  //! Sets the target architecture of this calling convention.
+  inline void setArch(Arch arch) noexcept { _arch = arch; }
+
+  //! Returns the calling convention id.
+  inline CallConvId id() const noexcept { return _id; }
+  //! Sets the calling convention id.
+  inline void setId(CallConvId ccId) noexcept { _id = ccId; }
+
+  //! Returns the strategy used to assign registers to arguments.
+  inline CallConvStrategy strategy() const noexcept { return _strategy; }
+  //! Sets the strategy used to assign registers to arguments.
+  inline void setStrategy(CallConvStrategy ccStrategy) noexcept { _strategy = ccStrategy; }
+
+  //! Tests whether the calling convention has the given `flag` set.
+  inline bool hasFlag(CallConvFlags flag) const noexcept { return Support::test(_flags, flag); }
+  //! Returns the calling convention flags, see `Flags`.
+  inline CallConvFlags flags() const noexcept { return _flags; }
+  //! Adds the calling convention flags, see `Flags`.
+  inline void setFlags(CallConvFlags flag) noexcept { _flags = flag; };
+  //! Adds the calling convention flags, see `Flags`.
+  inline void addFlags(CallConvFlags flags) noexcept { _flags |= flags; };
+
+  //! Tests whether this calling convention specifies 'RedZone'.
+  inline bool hasRedZone() const noexcept { return _redZoneSize != 0; }
+  //! Tests whether this calling convention specifies 'SpillZone'.
+  inline bool hasSpillZone() const noexcept { return _spillZoneSize != 0; }
+
+  //! Returns size of 'RedZone'.
+  inline uint32_t redZoneSize() const noexcept { return _redZoneSize; }
+  //! Returns size of 'SpillZone'.
+  inline uint32_t spillZoneSize() const noexcept { return _spillZoneSize; }
+
+  //! Sets size of 'RedZone'.
+  inline void setRedZoneSize(uint32_t size) noexcept { _redZoneSize = uint8_t(size); }
+  //! Sets size of 'SpillZone'.
+  inline void setSpillZoneSize(uint32_t size) noexcept { _spillZoneSize = uint8_t(size); }
+
+  //! Returns a natural stack alignment.
+  inline uint32_t naturalStackAlignment() const noexcept { return _naturalStackAlignment; }
+  //! Sets a natural stack alignment.
+  //!
+  //! This function can be used to override the default stack alignment in case that you know that it's alignment is
+  //! different. For example it allows to implement custom calling conventions that guarantee higher stack alignment.
+  inline void setNaturalStackAlignment(uint32_t value) noexcept { _naturalStackAlignment = uint8_t(value); }
+
+  //! Returns the size of a register (or its part) to be saved and restored of the given `group`.
+  inline uint32_t saveRestoreRegSize(RegGroup group) const noexcept { return _saveRestoreRegSize[group]; }
+  //! Sets the size of a vector register (or its part) to be saved and restored.
+  inline void setSaveRestoreRegSize(RegGroup group, uint32_t size) noexcept { _saveRestoreRegSize[group] = uint8_t(size); }
+
+  //! Returns the alignment of a save-restore area of the given `group`.
+  inline uint32_t saveRestoreAlignment(RegGroup group) const noexcept { return _saveRestoreAlignment[group]; }
+  //! Sets the alignment of a save-restore area of the given `group`.
+  inline void setSaveRestoreAlignment(RegGroup group, uint32_t alignment) noexcept { _saveRestoreAlignment[group] = uint8_t(alignment); }
+
+  //! Returns the order of passed registers of the given `group`.
+  inline const uint8_t* passedOrder(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _passedOrder[size_t(group)].id;
+  }
+
+  //! Returns the mask of passed registers of the given `group`.
+  inline RegMask passedRegs(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _passedRegs[size_t(group)];
+  }
+
+  inline void _setPassedPacked(RegGroup group, uint32_t p0, uint32_t p1, uint32_t p2, uint32_t p3) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+
+    _passedOrder[group].packed[0] = p0;
+    _passedOrder[group].packed[1] = p1;
+    _passedOrder[group].packed[2] = p2;
+    _passedOrder[group].packed[3] = p3;
+  }
+
+  //! Resets the order and mask of passed registers.
+  inline void setPassedToNone(RegGroup group) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+
+    _setPassedPacked(group, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu);
+    _passedRegs[size_t(group)] = 0u;
+  }
+
+  //! Sets the order and mask of passed registers.
+  inline void setPassedOrder(RegGroup group, uint32_t a0, uint32_t a1 = 0xFF, uint32_t a2 = 0xFF, uint32_t a3 = 0xFF, uint32_t a4 = 0xFF, uint32_t a5 = 0xFF, uint32_t a6 = 0xFF, uint32_t a7 = 0xFF) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+
+    // NOTE: This should always be called with all arguments known at compile time, so even if it looks scary it
+    // should be translated into few instructions.
+    _setPassedPacked(group, Support::bytepack32_4x8(a0, a1, a2, a3),
+                            Support::bytepack32_4x8(a4, a5, a6, a7),
+                            0xFFFFFFFFu,
+                            0xFFFFFFFFu);
+
+    _passedRegs[group] = (a0 != 0xFF ? 1u << a0 : 0u) |
+                         (a1 != 0xFF ? 1u << a1 : 0u) |
+                         (a2 != 0xFF ? 1u << a2 : 0u) |
+                         (a3 != 0xFF ? 1u << a3 : 0u) |
+                         (a4 != 0xFF ? 1u << a4 : 0u) |
+                         (a5 != 0xFF ? 1u << a5 : 0u) |
+                         (a6 != 0xFF ? 1u << a6 : 0u) |
+                         (a7 != 0xFF ? 1u << a7 : 0u) ;
+  }
+
+  //! Returns preserved register mask of the given `group`.
+  inline RegMask preservedRegs(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _preservedRegs[group];
+  }
+
+  //! Sets preserved register mask of the given `group`.
+  inline void setPreservedRegs(RegGroup group, RegMask regs) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    _preservedRegs[group] = regs;
+  }
+
+  //! \}
+};
+
+//! Function signature.
+//!
+//! Contains information about function return type, count of arguments and their TypeIds. Function signature is
+//! a low level structure which doesn't contain platform specific or calling convention specific information.
+struct FuncSignature {
+  //! \name Constants
+  //! \{
+
+  enum : uint8_t {
+    //! Doesn't have variable number of arguments (`...`).
+    kNoVarArgs = 0xFFu
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Calling convention id.
+  CallConvId _ccId;
+  //! Count of arguments.
+  uint8_t _argCount;
+  //! Index of a first VA or `kNoVarArgs`.
+  uint8_t _vaIndex;
+  //! Return value TypeId.
+  TypeId _ret;
+  //! Function arguments TypeIds.
+  const TypeId* _args;
+
+  //! \}
+
+  //! \name Initializtion & Reset
+  //! \{
+
+  //! Initializes the function signature.
+  inline void init(CallConvId ccId, uint32_t vaIndex, TypeId ret, const TypeId* args, uint32_t argCount) noexcept {
+    ASMJIT_ASSERT(argCount <= 0xFF);
+
+    _ccId = ccId;
+    _argCount = uint8_t(argCount);
+    _vaIndex = uint8_t(vaIndex);
+    _ret = ret;
+    _args = args;
+  }
+
+  inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the calling convention.
+  inline CallConvId callConvId() const noexcept { return _ccId; }
+  //! Sets the calling convention to `ccId`;
+  inline void setCallConvId(CallConvId ccId) noexcept { _ccId = ccId; }
+
+  //! Tests whether the function has variable number of arguments (...).
+  inline bool hasVarArgs() const noexcept { return _vaIndex != kNoVarArgs; }
+  //! Returns the variable arguments (...) index, `kNoVarArgs` if none.
+  inline uint32_t vaIndex() const noexcept { return _vaIndex; }
+  //! Sets the variable arguments (...) index to `index`.
+  inline void setVaIndex(uint32_t index) noexcept { _vaIndex = uint8_t(index); }
+  //! Resets the variable arguments index (making it a non-va function).
+  inline void resetVaIndex() noexcept { _vaIndex = kNoVarArgs; }
+
+  //! Returns the number of function arguments.
+  inline uint32_t argCount() const noexcept { return _argCount; }
+
+  inline bool hasRet() const noexcept { return _ret != TypeId::kVoid; }
+  //! Returns the return value type.
+  inline TypeId ret() const noexcept { return _ret; }
+
+  //! Returns the type of the argument at index `i`.
+  inline TypeId arg(uint32_t i) const noexcept {
+    ASMJIT_ASSERT(i < _argCount);
+    return _args[i];
+  }
+  //! Returns the array of function arguments' types.
+  inline const TypeId* args() const noexcept { return _args; }
+
+  //! \}
+};
+
+template<typename... RET_ARGS>
+class FuncSignatureT : public FuncSignature {
+public:
+  inline FuncSignatureT(CallConvId ccId = CallConvId::kHost, uint32_t vaIndex = kNoVarArgs) noexcept {
+    static constexpr TypeId ret_args[] = { (TypeId(TypeUtils::TypeIdOfT<RET_ARGS>::kTypeId))... };
+    init(ccId, vaIndex, ret_args[0], ret_args + 1, uint32_t(ASMJIT_ARRAY_SIZE(ret_args) - 1));
+  }
+};
+
+//! Function signature builder.
+class FuncSignatureBuilder : public FuncSignature {
+public:
+  TypeId _builderArgList[Globals::kMaxFuncArgs];
+
+  //! \name Initializtion & Reset
+  //! \{
+
+  inline FuncSignatureBuilder(CallConvId ccId = CallConvId::kHost, uint32_t vaIndex = kNoVarArgs) noexcept {
+    init(ccId, vaIndex, TypeId::kVoid, _builderArgList, 0);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Sets the return type to `retType`.
+  inline void setRet(TypeId retType) noexcept { _ret = retType; }
+  //! Sets the return type based on `T`.
+  template<typename T>
+  inline void setRetT() noexcept { setRet(TypeId(TypeUtils::TypeIdOfT<T>::kTypeId)); }
+
+  //! Sets the argument at index `index` to `argType`.
+  inline void setArg(uint32_t index, TypeId argType) noexcept {
+    ASMJIT_ASSERT(index < _argCount);
+    _builderArgList[index] = argType;
+  }
+  //! Sets the argument at index `i` to the type based on `T`.
+  template<typename T>
+  inline void setArgT(uint32_t index) noexcept { setArg(index, TypeId(TypeUtils::TypeIdOfT<T>::kTypeId)); }
+
+  //! Appends an argument of `type` to the function prototype.
+  inline void addArg(TypeId type) noexcept {
+    ASMJIT_ASSERT(_argCount < Globals::kMaxFuncArgs);
+    _builderArgList[_argCount++] = type;
+  }
+  //! Appends an argument of type based on `T` to the function prototype.
+  template<typename T>
+  inline void addArgT() noexcept { addArg(TypeId(TypeUtils::TypeIdOfT<T>::kTypeId)); }
+
+  //! \}
+};
+
+//! Argument or return value (or its part) as defined by `FuncSignature`, but with register or stack address
+//! (and other metadata) assigned.
+struct FuncValue {
+  //! \name Constants
+  //! \{
+
+  enum Bits : uint32_t {
+    kTypeIdShift      = 0,             //!< TypeId shift.
+    kTypeIdMask       = 0x000000FFu,   //!< TypeId mask.
+
+    kFlagIsReg        = 0x00000100u,   //!< Passed by register.
+    kFlagIsStack      = 0x00000200u,   //!< Passed by stack.
+    kFlagIsIndirect   = 0x00000400u,   //!< Passed indirectly by reference (internally a pointer).
+    kFlagIsDone       = 0x00000800u,   //!< Used internally by arguments allocator.
+
+    kStackOffsetShift = 12,            //!< Stack offset shift.
+    kStackOffsetMask  = 0xFFFFF000u,   //!< Stack offset mask (must occupy MSB bits).
+
+    kRegIdShift       = 16,            //!< RegId shift.
+    kRegIdMask        = 0x00FF0000u,   //!< RegId mask.
+
+    kRegTypeShift     = 24,            //!< RegType shift.
+    kRegTypeMask      = 0xFF000000u    //!< RegType mask.
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  uint32_t _data;
+
+  //! \}
+
+  //! \name Initializtion & Reset
+  //!
+  //! These initialize the whole `FuncValue` to either register or stack. Useful when you know all of these
+  //! properties and wanna just set it up.
+  //!
+  //! \{
+
+  //! Initializes the `typeId` of this `FuncValue`.
+  inline void initTypeId(TypeId typeId) noexcept {
+    _data = uint32_t(typeId) << kTypeIdShift;
+  }
+
+  inline void initReg(RegType regType, uint32_t regId, TypeId typeId, uint32_t flags = 0) noexcept {
+    _data = (uint32_t(regType) << kRegTypeShift) | (regId << kRegIdShift) | (uint32_t(typeId) << kTypeIdShift) | kFlagIsReg | flags;
+  }
+
+  inline void initStack(int32_t offset, TypeId typeId) noexcept {
+    _data = (uint32_t(offset) << kStackOffsetShift) | (uint32_t(typeId) << kTypeIdShift) | kFlagIsStack;
+  }
+
+  //! Resets the value to its unassigned state.
+  inline void reset() noexcept { _data = 0; }
+
+  //! \}
+
+  //! \name Assign
+  //!
+  //! These initialize only part of `FuncValue`, useful when building `FuncValue` incrementally. The caller
+  //! should first init the type-id by caliing `initTypeId` and then continue building either register or stack.
+  //!
+  //! \{
+
+  inline void assignRegData(RegType regType, uint32_t regId) noexcept {
+    ASMJIT_ASSERT((_data & (kRegTypeMask | kRegIdMask)) == 0);
+    _data |= (uint32_t(regType) << kRegTypeShift) | (regId << kRegIdShift) | kFlagIsReg;
+  }
+
+  inline void assignStackOffset(int32_t offset) noexcept {
+    ASMJIT_ASSERT((_data & kStackOffsetMask) == 0);
+    _data |= (uint32_t(offset) << kStackOffsetShift) | kFlagIsStack;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns true if the value is initialized (explicit bool cast).
+  inline explicit operator bool() const noexcept { return _data != 0; }
+
+  inline void _replaceValue(uint32_t mask, uint32_t value) noexcept { _data = (_data & ~mask) | value; }
+
+  //! Tests whether the `FuncValue` has a flag `flag` set.
+  inline bool hasFlag(uint32_t flag) const noexcept { return Support::test(_data, flag); }
+  //! Adds `flags` to `FuncValue`.
+  inline void addFlags(uint32_t flags) noexcept { _data |= flags; }
+  //! Clears `flags` of `FuncValue`.
+  inline void clearFlags(uint32_t flags) noexcept { _data &= ~flags; }
+
+  //! Tests whether the value is initialized (i.e. contains a valid data).
+  inline bool isInitialized() const noexcept { return _data != 0; }
+  //! Tests whether the argument is passed by register.
+  inline bool isReg() const noexcept { return hasFlag(kFlagIsReg); }
+  //! Tests whether the argument is passed by stack.
+  inline bool isStack() const noexcept { return hasFlag(kFlagIsStack); }
+  //! Tests whether the argument is passed by register.
+  inline bool isAssigned() const noexcept { return hasFlag(kFlagIsReg | kFlagIsStack); }
+  //! Tests whether the argument is passed through a pointer (used by WIN64 to pass XMM|YMM|ZMM).
+  inline bool isIndirect() const noexcept { return hasFlag(kFlagIsIndirect); }
+
+  //! Tests whether the argument was already processed (used internally).
+  inline bool isDone() const noexcept { return hasFlag(kFlagIsDone); }
+
+  //! Returns a register type of the register used to pass function argument or return value.
+  inline RegType regType() const noexcept { return RegType((_data & kRegTypeMask) >> kRegTypeShift); }
+  //! Sets a register type of the register used to pass function argument or return value.
+  inline void setRegType(RegType regType) noexcept { _replaceValue(kRegTypeMask, uint32_t(regType) << kRegTypeShift); }
+
+  //! Returns a physical id of the register used to pass function argument or return value.
+  inline uint32_t regId() const noexcept { return (_data & kRegIdMask) >> kRegIdShift; }
+  //! Sets a physical id of the register used to pass function argument or return value.
+  inline void setRegId(uint32_t regId) noexcept { _replaceValue(kRegIdMask, regId << kRegIdShift); }
+
+  //! Returns a stack offset of this argument.
+  inline int32_t stackOffset() const noexcept { return int32_t(_data & kStackOffsetMask) >> kStackOffsetShift; }
+  //! Sets a stack offset of this argument.
+  inline void setStackOffset(int32_t offset) noexcept { _replaceValue(kStackOffsetMask, uint32_t(offset) << kStackOffsetShift); }
+
+  //! Tests whether the argument or return value has associated `TypeId`.
+  inline bool hasTypeId() const noexcept { return Support::test(_data, kTypeIdMask); }
+  //! Returns a TypeId of this argument or return value.
+  inline TypeId typeId() const noexcept { return TypeId((_data & kTypeIdMask) >> kTypeIdShift); }
+  //! Sets a TypeId of this argument or return value.
+  inline void setTypeId(TypeId typeId) noexcept { _replaceValue(kTypeIdMask, uint32_t(typeId) << kTypeIdShift); }
+
+  //! \}
+};
+
+//! Contains multiple `FuncValue` instances in an array so functions that use multiple registers for arguments or
+//! return values can represent all inputs and outputs.
+struct FuncValuePack {
+public:
+  //! \name Members
+  //! \{
+
+  //! Values of the pack.
+  FuncValue _values[Globals::kMaxValuePack];
+
+  //! \}
+
+  //! \name Initialization & Reset
+  //! \{
+
+  //! Resets all values in the pack.
+  inline void reset() noexcept {
+    for (size_t i = 0; i < Globals::kMaxValuePack; i++)
+      _values[i].reset();
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Calculates how many values are in the pack, checking for non-values from the end.
+  inline uint32_t count() const noexcept {
+    uint32_t n = Globals::kMaxValuePack;
+    while (n && !_values[n - 1])
+      n--;
+    return n;
+  }
+
+  inline FuncValue* values() noexcept { return _values; }
+  inline const FuncValue* values() const noexcept { return _values; }
+
+  inline void resetValue(size_t index) noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    _values[index].reset();
+  }
+
+  inline bool hasValue(size_t index) noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    return _values[index].isInitialized();
+  }
+
+  inline void assignReg(size_t index, const BaseReg& reg, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    ASMJIT_ASSERT(reg.isPhysReg());
+    _values[index].initReg(reg.type(), reg.id(), typeId);
+  }
+
+  inline void assignReg(size_t index, RegType regType, uint32_t regId, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    _values[index].initReg(regType, regId, typeId);
+  }
+
+  inline void assignStack(size_t index, int32_t offset, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    _values[index].initStack(offset, typeId);
+  }
+
+  inline FuncValue& operator[](size_t index) {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    return _values[index];
+  }
+
+  inline const FuncValue& operator[](size_t index) const {
+    ASMJIT_ASSERT(index < Globals::kMaxValuePack);
+    return _values[index];
+  }
+
+  //! \}
+};
+
+//! Attributes are designed in a way that all are initially false, and user or \ref FuncFrame finalizer adds
+//! them when necessary.
+enum class FuncAttributes : uint32_t {
+  //! No attributes.
+  kNoAttributes = 0,
+
+  //! Function has variable number of arguments.
+  kHasVarArgs = 0x00000001u,
+  //! Preserve frame pointer (don't omit FP).
+  kHasPreservedFP = 0x00000010u,
+  //! Function calls other functions (is not leaf).
+  kHasFuncCalls = 0x00000020u,
+  //! Function has aligned save/restore of vector registers.
+  kAlignedVecSR = 0x00000040u,
+  //! FuncFrame is finalized and can be used by prolog/epilog inserter (PEI).
+  kIsFinalized = 0x00000800u,
+
+  // X86 Specific Attributes
+  // -----------------------
+
+  //! Enables the use of AVX within the function's body, prolog, and epilog (X86).
+  //!
+  //! This flag instructs prolog and epilog emitter to use AVX instead of SSE for manipulating XMM registers.
+  kX86_AVXEnabled = 0x00010000u,
+
+  //! Enables the use of AVX-512 within the function's body, prolog, and epilog (X86).
+  //!
+  //! This flag instructs Compiler register allocator to use additional 16 registers introduced by AVX-512.
+  //! Additionally, if the functions saves full width of ZMM registers (custom calling conventions only) then
+  //! the prolog/epilog inserter would use AVX-512 move instructions to emit the save and restore sequence.
+  kX86_AVX512Enabled = 0x00020000u,
+
+  //! This flag instructs the epilog writer to emit EMMS instruction before RET (X86).
+  kX86_MMXCleanup = 0x00040000u,
+
+  //! This flag instructs the epilog writer to emit VZEROUPPER instruction before RET (X86).
+  kX86_AVXCleanup = 0x00080000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(FuncAttributes)
+
+//! Function detail - \ref CallConv and expanded \ref FuncSignature.
+//!
+//! Function detail is architecture and OS dependent representation of a function. It contains a materialized
+//! calling convention and expanded function signature so all arguments have assigned either register type/id
+//! or stack address.
+class FuncDetail {
+public:
+  //! \name Constants
+  //! \{
+
+  enum : uint8_t {
+    //! Doesn't have variable number of arguments (`...`).
+    kNoVarArgs = 0xFFu
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Calling convention.
+  CallConv _callConv;
+  //! Number of function arguments.
+  uint8_t _argCount;
+  //! Variable arguments index of `kNoVarArgs`.
+  uint8_t _vaIndex;
+  //! Reserved for future use.
+  uint16_t _reserved;
+  //! Registers that contain arguments.
+  Support::Array<RegMask, Globals::kNumVirtGroups> _usedRegs;
+  //! Size of arguments passed by stack.
+  uint32_t _argStackSize;
+  //! Function return value(s).
+  FuncValuePack _rets;
+  //! Function arguments.
+  FuncValuePack _args[Globals::kMaxFuncArgs];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline FuncDetail() noexcept { reset(); }
+  inline FuncDetail(const FuncDetail& other) noexcept = default;
+
+  //! Initializes this `FuncDetail` to the given signature.
+  ASMJIT_API Error init(const FuncSignature& signature, const Environment& environment) noexcept;
+  inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the function's calling convention, see `CallConv`.
+  inline const CallConv& callConv() const noexcept { return _callConv; }
+
+  //! Returns the associated calling convention flags, see `CallConv::Flags`.
+  inline CallConvFlags flags() const noexcept { return _callConv.flags(); }
+  //! Checks whether a CallConv `flag` is set, see `CallConv::Flags`.
+  inline bool hasFlag(CallConvFlags ccFlag) const noexcept { return _callConv.hasFlag(ccFlag); }
+
+  //! Tests whether the function has a return value.
+  inline bool hasRet() const noexcept { return bool(_rets[0]); }
+  //! Returns the number of function arguments.
+  inline uint32_t argCount() const noexcept { return _argCount; }
+
+  //! Returns function return values.
+  inline FuncValuePack& retPack() noexcept { return _rets; }
+  //! Returns function return values.
+  inline const FuncValuePack& retPack() const noexcept { return _rets; }
+
+  //! Returns a function return value associated with the given `valueIndex`.
+  inline FuncValue& ret(size_t valueIndex = 0) noexcept { return _rets[valueIndex]; }
+  //! Returns a function return value associated with the given `valueIndex` (const).
+  inline const FuncValue& ret(size_t valueIndex = 0) const noexcept { return _rets[valueIndex]; }
+
+  //! Returns function argument packs array.
+  inline FuncValuePack* argPacks() noexcept { return _args; }
+  //! Returns function argument packs array (const).
+  inline const FuncValuePack* argPacks() const noexcept { return _args; }
+
+  //! Returns function argument pack at the given `argIndex`.
+  inline FuncValuePack& argPack(size_t argIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs);
+    return _args[argIndex];
+  }
+
+  //! Returns function argument pack at the given `argIndex` (const).
+  inline const FuncValuePack& argPack(size_t argIndex) const noexcept {
+    ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs);
+    return _args[argIndex];
+  }
+
+  //! Returns an argument at `valueIndex` from the argument pack at the given `argIndex`.
+  inline FuncValue& arg(size_t argIndex, size_t valueIndex = 0) noexcept {
+    ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs);
+    return _args[argIndex][valueIndex];
+  }
+
+  //! Returns an argument at `valueIndex` from the argument pack at the given `argIndex` (const).
+  inline const FuncValue& arg(size_t argIndex, size_t valueIndex = 0) const noexcept {
+    ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs);
+    return _args[argIndex][valueIndex];
+  }
+
+  //! Resets an argument at the given `argIndex`.
+  //!
+  //! If the argument is a parameter pack (has multiple values) all values are reset.
+  inline void resetArg(size_t argIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs);
+    _args[argIndex].reset();
+  }
+
+  //! Tests whether the function has variable arguments.
+  inline bool hasVarArgs() const noexcept { return _vaIndex != kNoVarArgs; }
+  //! Returns an index of a first variable argument.
+  inline uint32_t vaIndex() const noexcept { return _vaIndex; }
+
+  //! Tests whether the function passes one or more argument by stack.
+  inline bool hasStackArgs() const noexcept { return _argStackSize != 0; }
+  //! Returns stack size needed for function arguments passed on the stack.
+  inline uint32_t argStackSize() const noexcept { return _argStackSize; }
+
+  //! Returns red zone size.
+  inline uint32_t redZoneSize() const noexcept { return _callConv.redZoneSize(); }
+  //! Returns spill zone size.
+  inline uint32_t spillZoneSize() const noexcept { return _callConv.spillZoneSize(); }
+  //! Returns natural stack alignment.
+  inline uint32_t naturalStackAlignment() const noexcept { return _callConv.naturalStackAlignment(); }
+
+  //! Returns a mask of all passed registers of the given register `group`.
+  inline RegMask passedRegs(RegGroup group) const noexcept { return _callConv.passedRegs(group); }
+  //! Returns a mask of all preserved registers of the given register `group`.
+  inline RegMask preservedRegs(RegGroup group) const noexcept { return _callConv.preservedRegs(group); }
+
+  //! Returns a mask of all used registers of the given register `group`.
+  inline RegMask usedRegs(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _usedRegs[size_t(group)];
+  }
+
+  //! Adds `regs` to the mask of used registers of the given register `group`.
+  inline void addUsedRegs(RegGroup group, RegMask regs) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    _usedRegs[size_t(group)] |= regs;
+  }
+
+  //! \}
+};
+
+//! Function frame.
+//!
+//! Function frame is used directly by prolog and epilog insertion (PEI) utils. It provides information necessary to
+//! insert a proper and ABI comforming prolog and epilog. Function frame calculation is based on `CallConv` and
+//! other function attributes.
+//!
+//! SSE vs AVX vs AVX-512
+//! ---------------------
+//!
+//! Function frame provides a way to tell prolog/epilog inserter to use AVX instructions instead of SSE. Use
+//! `setAvxEnabled()` and `setAvx512Enabled()`  to enable AVX and/or AVX-512, respectively. Enabling AVX-512
+//! is mostly for Compiler as it would use 32 SIMD registers instead of 16 when enabled.
+//!
+//! \note If your code uses AVX instructions and AVX is not enabled there would be a performance hit in case that
+//! some registers had to be saved/restored in function's prolog/epilog, respectively. Thus, it's recommended to
+//! always let the function frame know about the use of AVX.
+//!
+//! Function Frame Structure
+//! ------------------------
+//!
+//! Various properties can contribute to the size and structure of the function frame. The function frame in most
+//! cases won't use all of the properties illustrated (for example Spill Zone and Red Zone are never used together).
+//!
+//! ```
+//!   +-----------------------------+
+//!   | Arguments Passed by Stack   |
+//!   +-----------------------------+
+//!   | Spill Zone                  |
+//!   +-----------------------------+ <- Stack offset (args) starts from here.
+//!   | Return Address, if Pushed   |
+//!   +-----------------------------+ <- Stack pointer (SP) upon entry.
+//!   | Save/Restore Stack.         |
+//!   +-----------------------------+-----------------------------+
+//!   | Local Stack                 |                             |
+//!   +-----------------------------+          Final Stack        |
+//!   | Call Stack                  |                             |
+//!   +-----------------------------+-----------------------------+ <- SP after prolog.
+//!   | Red Zone                    |
+//!   +-----------------------------+
+//! ```
+class FuncFrame {
+public:
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    //! Tag used to inform that some offset is invalid.
+    kTagInvalidOffset = 0xFFFFFFFFu
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Function attributes.
+  FuncAttributes _attributes;
+
+  //! Target architecture.
+  Arch _arch;
+  //! SP register ID (to access call stack and local stack).
+  uint8_t _spRegId;
+  //! SA register ID (to access stack arguments).
+  uint8_t _saRegId;
+
+  //! Red zone size (copied from CallConv).
+  uint8_t _redZoneSize;
+  //! Spill zone size (copied from CallConv).
+  uint8_t _spillZoneSize;
+  //! Natural stack alignment (copied from CallConv).
+  uint8_t _naturalStackAlignment;
+  //! Minimum stack alignment to turn on dynamic alignment.
+  uint8_t _minDynamicAlignment;
+
+  //! Call stack alignment.
+  uint8_t _callStackAlignment;
+  //! Local stack alignment.
+  uint8_t _localStackAlignment;
+  //! Final stack alignment.
+  uint8_t _finalStackAlignment;
+
+  //! Adjustment of the stack before returning (X86-STDCALL).
+  uint16_t _calleeStackCleanup;
+
+  //! Call stack size.
+  uint32_t _callStackSize;
+  //! Local stack size.
+  uint32_t _localStackSize;
+  //! Final stack size (sum of call stack and local stack).
+  uint32_t _finalStackSize;
+
+  //! Local stack offset (non-zero only if call stack is used).
+  uint32_t _localStackOffset;
+  //! Offset relative to SP that contains previous SP (before alignment).
+  uint32_t _daOffset;
+  //! Offset of the first stack argument relative to SP.
+  uint32_t _saOffsetFromSP;
+  //! Offset of the first stack argument relative to SA (_saRegId or FP).
+  uint32_t _saOffsetFromSA;
+
+  //! Local stack adjustment in prolog/epilog.
+  uint32_t _stackAdjustment;
+
+  //! Registers that are dirty.
+  Support::Array<RegMask, Globals::kNumVirtGroups> _dirtyRegs;
+  //! Registers that must be preserved (copied from CallConv).
+  Support::Array<RegMask, Globals::kNumVirtGroups> _preservedRegs;
+  //! Size to save/restore per register group.
+  Support::Array<uint8_t, Globals::kNumVirtGroups> _saveRestoreRegSize;
+  //! Alignment of save/restore area per register group.
+  Support::Array<uint8_t, Globals::kNumVirtGroups> _saveRestoreAlignment;
+
+  //! Stack size required to save registers with push/pop.
+  uint16_t _pushPopSaveSize;
+  //! Stack size required to save extra registers that cannot use push/pop.
+  uint16_t _extraRegSaveSize;
+  //! Offset where registers saved/restored via push/pop are stored
+  uint32_t _pushPopSaveOffset;
+  //! Offset where extra ragisters that cannot use push/pop are stored.
+  uint32_t _extraRegSaveOffset;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline FuncFrame() noexcept { reset(); }
+  inline FuncFrame(const FuncFrame& other) noexcept = default;
+
+  ASMJIT_API Error init(const FuncDetail& func) noexcept;
+
+  inline void reset() noexcept {
+    memset(this, 0, sizeof(FuncFrame));
+    _spRegId = BaseReg::kIdBad;
+    _saRegId = BaseReg::kIdBad;
+    _daOffset = kTagInvalidOffset;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the target architecture of the function frame.
+  inline Arch arch() const noexcept { return _arch; }
+
+  //! Returns function frame attributes, see `Attributes`.
+  inline FuncAttributes attributes() const noexcept { return _attributes; }
+  //! Checks whether the FuncFame contains an attribute `attr`.
+  inline bool hasAttribute(FuncAttributes attr) const noexcept { return Support::test(_attributes, attr); }
+  //! Adds attributes `attrs` to the FuncFrame.
+  inline void addAttributes(FuncAttributes attrs) noexcept { _attributes |= attrs; }
+  //! Clears attributes `attrs` from the FrameFrame.
+  inline void clearAttributes(FuncAttributes attrs) noexcept { _attributes &= ~attrs; }
+
+  //! Tests whether the function has variable number of arguments.
+  inline bool hasVarArgs() const noexcept { return hasAttribute(FuncAttributes::kHasVarArgs); }
+  //! Sets the variable arguments flag.
+  inline void setVarArgs() noexcept { addAttributes(FuncAttributes::kHasVarArgs); }
+  //! Resets variable arguments flag.
+  inline void resetVarArgs() noexcept { clearAttributes(FuncAttributes::kHasVarArgs); }
+
+  //! Tests whether the function preserves frame pointer (EBP|ESP on X86).
+  inline bool hasPreservedFP() const noexcept { return hasAttribute(FuncAttributes::kHasPreservedFP); }
+  //! Enables preserved frame pointer.
+  inline void setPreservedFP() noexcept { addAttributes(FuncAttributes::kHasPreservedFP); }
+  //! Disables preserved frame pointer.
+  inline void resetPreservedFP() noexcept { clearAttributes(FuncAttributes::kHasPreservedFP); }
+
+  //! Tests whether the function calls other functions.
+  inline bool hasFuncCalls() const noexcept { return hasAttribute(FuncAttributes::kHasFuncCalls); }
+  //! Sets `kFlagHasCalls` to true.
+  inline void setFuncCalls() noexcept { addAttributes(FuncAttributes::kHasFuncCalls); }
+  //! Sets `kFlagHasCalls` to false.
+  inline void resetFuncCalls() noexcept { clearAttributes(FuncAttributes::kHasFuncCalls); }
+
+  //! Tests whether the function has AVX enabled.
+  inline bool isAvxEnabled() const noexcept { return hasAttribute(FuncAttributes::kX86_AVXEnabled); }
+  //! Enables AVX use.
+  inline void setAvxEnabled() noexcept { addAttributes(FuncAttributes::kX86_AVXEnabled); }
+  //! Disables AVX use.
+  inline void resetAvxEnabled() noexcept { clearAttributes(FuncAttributes::kX86_AVXEnabled); }
+
+  //! Tests whether the function has AVX-512 enabled.
+  inline bool isAvx512Enabled() const noexcept { return hasAttribute(FuncAttributes::kX86_AVX512Enabled); }
+  //! Enables AVX-512 use.
+  inline void setAvx512Enabled() noexcept { addAttributes(FuncAttributes::kX86_AVX512Enabled); }
+  //! Disables AVX-512 use.
+  inline void resetAvx512Enabled() noexcept { clearAttributes(FuncAttributes::kX86_AVX512Enabled); }
+
+  //! Tests whether the function has MMX cleanup - 'emms' instruction in epilog.
+  inline bool hasMmxCleanup() const noexcept { return hasAttribute(FuncAttributes::kX86_MMXCleanup); }
+  //! Enables MMX cleanup.
+  inline void setMmxCleanup() noexcept { addAttributes(FuncAttributes::kX86_MMXCleanup); }
+  //! Disables MMX cleanup.
+  inline void resetMmxCleanup() noexcept { clearAttributes(FuncAttributes::kX86_MMXCleanup); }
+
+  //! Tests whether the function has AVX cleanup - 'vzeroupper' instruction in epilog.
+  inline bool hasAvxCleanup() const noexcept { return hasAttribute(FuncAttributes::kX86_AVXCleanup); }
+  //! Enables AVX cleanup.
+  inline void setAvxCleanup() noexcept { addAttributes(FuncAttributes::kX86_AVXCleanup); }
+  //! Disables AVX cleanup.
+  inline void resetAvxCleanup() noexcept { clearAttributes(FuncAttributes::kX86_AVXCleanup); }
+
+  //! Tests whether the function uses call stack.
+  inline bool hasCallStack() const noexcept { return _callStackSize != 0; }
+  //! Tests whether the function uses local stack.
+  inline bool hasLocalStack() const noexcept { return _localStackSize != 0; }
+  //! Tests whether vector registers can be saved and restored by using aligned reads and writes.
+  inline bool hasAlignedVecSR() const noexcept { return hasAttribute(FuncAttributes::kAlignedVecSR); }
+  //! Tests whether the function has to align stack dynamically.
+  inline bool hasDynamicAlignment() const noexcept { return _finalStackAlignment >= _minDynamicAlignment; }
+
+  //! Tests whether the calling convention specifies 'RedZone'.
+  inline bool hasRedZone() const noexcept { return _redZoneSize != 0; }
+  //! Tests whether the calling convention specifies 'SpillZone'.
+  inline bool hasSpillZone() const noexcept { return _spillZoneSize != 0; }
+
+  //! Returns the size of 'RedZone'.
+  inline uint32_t redZoneSize() const noexcept { return _redZoneSize; }
+  //! Returns the size of 'SpillZone'.
+  inline uint32_t spillZoneSize() const noexcept { return _spillZoneSize; }
+  //! Returns natural stack alignment (guaranteed stack alignment upon entry).
+  inline uint32_t naturalStackAlignment() const noexcept { return _naturalStackAlignment; }
+  //! Returns natural stack alignment (guaranteed stack alignment upon entry).
+  inline uint32_t minDynamicAlignment() const noexcept { return _minDynamicAlignment; }
+
+  //! Tests whether the callee must adjust SP before returning (X86-STDCALL only)
+  inline bool hasCalleeStackCleanup() const noexcept { return _calleeStackCleanup != 0; }
+  //! Returns home many bytes of the stack the callee must adjust before returning (X86-STDCALL only)
+  inline uint32_t calleeStackCleanup() const noexcept { return _calleeStackCleanup; }
+
+  //! Returns call stack alignment.
+  inline uint32_t callStackAlignment() const noexcept { return _callStackAlignment; }
+  //! Returns local stack alignment.
+  inline uint32_t localStackAlignment() const noexcept { return _localStackAlignment; }
+  //! Returns final stack alignment (the maximum value of call, local, and natural stack alignments).
+  inline uint32_t finalStackAlignment() const noexcept { return _finalStackAlignment; }
+
+  //! Sets call stack alignment.
+  //!
+  //! \note This also updates the final stack alignment.
+  inline void setCallStackAlignment(uint32_t alignment) noexcept {
+    _callStackAlignment = uint8_t(alignment);
+    _finalStackAlignment = Support::max(_naturalStackAlignment, _callStackAlignment, _localStackAlignment);
+  }
+
+  //! Sets local stack alignment.
+  //!
+  //! \note This also updates the final stack alignment.
+  inline void setLocalStackAlignment(uint32_t value) noexcept {
+    _localStackAlignment = uint8_t(value);
+    _finalStackAlignment = Support::max(_naturalStackAlignment, _callStackAlignment, _localStackAlignment);
+  }
+
+  //! Combines call stack alignment with `alignment`, updating it to the greater value.
+  //!
+  //! \note This also updates the final stack alignment.
+  inline void updateCallStackAlignment(uint32_t alignment) noexcept {
+    _callStackAlignment = uint8_t(Support::max<uint32_t>(_callStackAlignment, alignment));
+    _finalStackAlignment = Support::max(_finalStackAlignment, _callStackAlignment);
+  }
+
+  //! Combines local stack alignment with `alignment`, updating it to the greater value.
+  //!
+  //! \note This also updates the final stack alignment.
+  inline void updateLocalStackAlignment(uint32_t alignment) noexcept {
+    _localStackAlignment = uint8_t(Support::max<uint32_t>(_localStackAlignment, alignment));
+    _finalStackAlignment = Support::max(_finalStackAlignment, _localStackAlignment);
+  }
+
+  //! Returns call stack size.
+  inline uint32_t callStackSize() const noexcept { return _callStackSize; }
+  //! Returns local stack size.
+  inline uint32_t localStackSize() const noexcept { return _localStackSize; }
+
+  //! Sets call stack size.
+  inline void setCallStackSize(uint32_t size) noexcept { _callStackSize = size; }
+  //! Sets local stack size.
+  inline void setLocalStackSize(uint32_t size) noexcept { _localStackSize = size; }
+
+  //! Combines call stack size with `size`, updating it to the greater value.
+  inline void updateCallStackSize(uint32_t size) noexcept { _callStackSize = Support::max(_callStackSize, size); }
+  //! Combines local stack size with `size`, updating it to the greater value.
+  inline void updateLocalStackSize(uint32_t size) noexcept { _localStackSize = Support::max(_localStackSize, size); }
+
+  //! Returns final stack size (only valid after the FuncFrame is finalized).
+  inline uint32_t finalStackSize() const noexcept { return _finalStackSize; }
+
+  //! Returns an offset to access the local stack (non-zero only if call stack is used).
+  inline uint32_t localStackOffset() const noexcept { return _localStackOffset; }
+
+  //! Tests whether the function prolog/epilog requires a memory slot for storing unaligned SP.
+  inline bool hasDAOffset() const noexcept { return _daOffset != kTagInvalidOffset; }
+  //! Returns a memory offset used to store DA (dynamic alignment) slot (relative to SP).
+  inline uint32_t daOffset() const noexcept { return _daOffset; }
+
+  inline uint32_t saOffset(uint32_t regId) const noexcept {
+    return regId == _spRegId ? saOffsetFromSP()
+                             : saOffsetFromSA();
+  }
+
+  inline uint32_t saOffsetFromSP() const noexcept { return _saOffsetFromSP; }
+  inline uint32_t saOffsetFromSA() const noexcept { return _saOffsetFromSA; }
+
+  //! Returns mask of registers of the given register `group` that are modified by the function. The engine would
+  //! then calculate which registers must be saved & restored by the function by using the data provided by the
+  //! calling convention.
+  inline RegMask dirtyRegs(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _dirtyRegs[group];
+  }
+
+  //! Sets which registers (as a mask) are modified by the function.
+  //!
+  //! \remarks Please note that this will completely overwrite the existing register mask, use `addDirtyRegs()`
+  //! to modify the existing register mask.
+  inline void setDirtyRegs(RegGroup group, RegMask regs) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    _dirtyRegs[group] = regs;
+  }
+
+  //! Adds which registers (as a mask) are modified by the function.
+  inline void addDirtyRegs(RegGroup group, RegMask regs) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    _dirtyRegs[group] |= regs;
+  }
+
+  //! \overload
+  inline void addDirtyRegs(const BaseReg& reg) noexcept {
+    ASMJIT_ASSERT(reg.id() < Globals::kMaxPhysRegs);
+    addDirtyRegs(reg.group(), Support::bitMask(reg.id()));
+  }
+
+  //! \overload
+  template<typename... Args>
+  inline void addDirtyRegs(const BaseReg& reg, Args&&... args) noexcept {
+    addDirtyRegs(reg);
+    addDirtyRegs(std::forward<Args>(args)...);
+  }
+
+  inline void setAllDirty() noexcept {
+    for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_dirtyRegs); i++)
+      _dirtyRegs[i] = 0xFFFFFFFFu;
+  }
+
+  inline void setAllDirty(RegGroup group) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    _dirtyRegs[group] = 0xFFFFFFFFu;
+  }
+
+  //! Returns a calculated mask of registers of the given `group` that will be saved and restored in the function's
+  //! prolog and epilog, respectively. The register mask is calculated from both `dirtyRegs` (provided by user) and
+  //! `preservedMask` (provided by the calling convention).
+  inline RegMask savedRegs(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _dirtyRegs[group] & _preservedRegs[group];
+  }
+
+  //! Returns the mask of preserved registers of the given register `group`.
+  //!
+  //! Preserved registers are those that must survive the function call unmodified. The function can only modify
+  //! preserved registers it they are saved and restored in funciton's prolog and epilog, respectively.
+  inline RegMask preservedRegs(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _preservedRegs[group];
+  }
+
+  inline uint32_t saveRestoreRegSize(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _saveRestoreRegSize[group];
+  }
+
+  inline uint32_t saveRestoreAlignment(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _saveRestoreAlignment[group];
+  }
+
+  inline bool hasSARegId() const noexcept { return _saRegId != BaseReg::kIdBad; }
+  inline uint32_t saRegId() const noexcept { return _saRegId; }
+  inline void setSARegId(uint32_t regId) { _saRegId = uint8_t(regId); }
+  inline void resetSARegId() { setSARegId(BaseReg::kIdBad); }
+
+  //! Returns stack size required to save/restore registers via push/pop.
+  inline uint32_t pushPopSaveSize() const noexcept { return _pushPopSaveSize; }
+  //! Returns an offset to the stack where registers are saved via push/pop.
+  inline uint32_t pushPopSaveOffset() const noexcept { return _pushPopSaveOffset; }
+
+  //! Returns stack size required to save/restore extra registers that don't use push/pop/
+  //!
+  //! \note On X86 this covers all registers except GP registers, on other architectures it can be always
+  //! zero (for example AArch64 saves all registers via push/pop like instructions, so this would be zero).
+  inline uint32_t extraRegSaveSize() const noexcept { return _extraRegSaveSize; }
+  //! Returns an offset to the stack where extra registers are saved.
+  inline uint32_t extraRegSaveOffset() const noexcept { return _extraRegSaveOffset; }
+
+  //! Tests whether the functions contains stack adjustment.
+  inline bool hasStackAdjustment() const noexcept { return _stackAdjustment != 0; }
+  //! Returns function's stack adjustment used in function's prolog and epilog.
+  //!
+  //! If the returned value is zero it means that the stack is not adjusted. This can mean both that the stack
+  //! is not used and/or the stack is only adjusted by instructions that pust/pop registers into/from stack.
+  inline uint32_t stackAdjustment() const noexcept { return _stackAdjustment; }
+
+  //! \}
+
+  //! \name Finaliztion
+  //! \{
+
+  ASMJIT_API Error finalize() noexcept;
+
+  //! \}
+};
+
+//! A helper class that can be used to assign a physical register for each function argument. Use with
+//! `BaseEmitter::emitArgsAssignment()`.
+class FuncArgsAssignment {
+public:
+  //! \name Members
+  //! \{
+
+  //! Function detail.
+  const FuncDetail* _funcDetail;
+  //! Register that can be used to access arguments passed by stack.
+  uint8_t _saRegId;
+  //! Reserved for future use.
+  uint8_t _reserved[3];
+  //! Mapping of each function argument.
+  FuncValuePack _argPacks[Globals::kMaxFuncArgs];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline explicit FuncArgsAssignment(const FuncDetail* fd = nullptr) noexcept { reset(fd); }
+
+  inline FuncArgsAssignment(const FuncArgsAssignment& other) noexcept {
+    memcpy(this, &other, sizeof(*this));
+  }
+
+  inline void reset(const FuncDetail* fd = nullptr) noexcept {
+    _funcDetail = fd;
+    _saRegId = uint8_t(BaseReg::kIdBad);
+    memset(_reserved, 0, sizeof(_reserved));
+    memset(_argPacks, 0, sizeof(_argPacks));
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline const FuncDetail* funcDetail() const noexcept { return _funcDetail; }
+  inline void setFuncDetail(const FuncDetail* fd) noexcept { _funcDetail = fd; }
+
+  inline bool hasSARegId() const noexcept { return _saRegId != BaseReg::kIdBad; }
+  inline uint32_t saRegId() const noexcept { return _saRegId; }
+  inline void setSARegId(uint32_t regId) { _saRegId = uint8_t(regId); }
+  inline void resetSARegId() { _saRegId = uint8_t(BaseReg::kIdBad); }
+
+  inline FuncValue& arg(size_t argIndex, size_t valueIndex) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    return _argPacks[argIndex][valueIndex];
+  }
+  inline const FuncValue& arg(size_t argIndex, size_t valueIndex) const noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    return _argPacks[argIndex][valueIndex];
+  }
+
+  inline bool isAssigned(size_t argIndex, size_t valueIndex) const noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    return _argPacks[argIndex][valueIndex].isAssigned();
+  }
+
+  inline void assignReg(size_t argIndex, const BaseReg& reg, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    ASMJIT_ASSERT(reg.isPhysReg());
+    _argPacks[argIndex][0].initReg(reg.type(), reg.id(), typeId);
+  }
+
+  inline void assignReg(size_t argIndex, RegType regType, uint32_t regId, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    _argPacks[argIndex][0].initReg(regType, regId, typeId);
+  }
+
+  inline void assignStack(size_t argIndex, int32_t offset, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    _argPacks[argIndex][0].initStack(offset, typeId);
+  }
+
+  inline void assignRegInPack(size_t argIndex, size_t valueIndex, const BaseReg& reg, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    ASMJIT_ASSERT(reg.isPhysReg());
+    _argPacks[argIndex][valueIndex].initReg(reg.type(), reg.id(), typeId);
+  }
+
+  inline void assignRegInPack(size_t argIndex, size_t valueIndex, RegType regType, uint32_t regId, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    _argPacks[argIndex][valueIndex].initReg(regType, regId, typeId);
+  }
+
+  inline void assignStackInPack(size_t argIndex, size_t valueIndex, int32_t offset, TypeId typeId = TypeId::kVoid) noexcept {
+    ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks));
+    _argPacks[argIndex][valueIndex].initStack(offset, typeId);
+  }
+
+  // NOTE: All `assignAll()` methods are shortcuts to assign all arguments at once, however, since registers are
+  // passed all at once these initializers don't provide any way to pass TypeId and/or to keep any argument between
+  // the arguments passed unassigned.
+  inline void _assignAllInternal(size_t argIndex, const BaseReg& reg) noexcept {
+    assignReg(argIndex, reg);
+  }
+
+  template<typename... Args>
+  inline void _assignAllInternal(size_t argIndex, const BaseReg& reg, Args&&... args) noexcept {
+    assignReg(argIndex, reg);
+    _assignAllInternal(argIndex + 1, std::forward<Args>(args)...);
+  }
+
+  template<typename... Args>
+  inline void assignAll(Args&&... args) noexcept {
+    _assignAllInternal(0, std::forward<Args>(args)...);
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Update `FuncFrame` based on function's arguments assignment.
+  //!
+  //! \note You MUST call this in orher to use `BaseEmitter::emitArgsAssignment()`, otherwise the FuncFrame would
+  //! not contain the information necessary to assign all arguments into the registers and/or stack specified.
+  ASMJIT_API Error updateFuncFrame(FuncFrame& frame) const noexcept;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_FUNC_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/core/funcargscontext.cpp b/lib/lepton/asmjit/core/funcargscontext.cpp
new file mode 100644
index 0000000000..1db50a7082
--- /dev/null
+++ b/lib/lepton/asmjit/core/funcargscontext.cpp
@@ -0,0 +1,293 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/funcargscontext_p.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_core
+//! \{
+
+FuncArgsContext::FuncArgsContext() noexcept {
+  for (RegGroup group : RegGroupVirtValues{})
+    _workData[size_t(group)].reset();
+}
+
+ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args, const RAConstraints* constraints) noexcept {
+  Arch arch = frame.arch();
+  const FuncDetail& func = *args.funcDetail();
+
+  _archTraits = &ArchTraits::byArch(arch);
+  _constraints = constraints;
+  _arch = arch;
+
+  // Initialize `_archRegs`.
+  for (RegGroup group : RegGroupVirtValues{})
+    _workData[group]._archRegs = _constraints->availableRegs(group);
+
+  if (frame.hasPreservedFP())
+    _workData[size_t(RegGroup::kGp)]._archRegs &= ~Support::bitMask(archTraits().fpRegId());
+
+  // Extract information from all function arguments/assignments and build Var[] array.
+  uint32_t varId = 0;
+  for (uint32_t argIndex = 0; argIndex < Globals::kMaxFuncArgs; argIndex++) {
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      const FuncValue& dst_ = args.arg(argIndex, valueIndex);
+      if (!dst_.isAssigned())
+        continue;
+
+      const FuncValue& src_ = func.arg(argIndex, valueIndex);
+      if (ASMJIT_UNLIKELY(!src_.isAssigned()))
+        return DebugUtils::errored(kErrorInvalidState);
+
+      Var& var = _vars[varId];
+      var.init(src_, dst_);
+
+      FuncValue& src = var.cur;
+      FuncValue& dst = var.out;
+
+      RegGroup dstGroup = RegGroup::kMaxValue;
+      uint32_t dstId = BaseReg::kIdBad;
+      WorkData* dstWd = nullptr;
+
+      // Not supported.
+      if (src.isIndirect())
+        return DebugUtils::errored(kErrorInvalidAssignment);
+
+      if (dst.isReg()) {
+        RegType dstType = dst.regType();
+        if (ASMJIT_UNLIKELY(!archTraits().hasRegType(dstType)))
+          return DebugUtils::errored(kErrorInvalidRegType);
+
+        // Copy TypeId from source if the destination doesn't have it. The RA used by BaseCompiler would never
+        // leave TypeId undefined, but users of FuncAPI can just assign phys regs without specifying the type.
+        if (!dst.hasTypeId())
+          dst.setTypeId(archTraits().regTypeToTypeId(dst.regType()));
+
+        dstGroup = archTraits().regTypeToGroup(dstType);
+        if (ASMJIT_UNLIKELY(dstGroup > RegGroup::kMaxVirt))
+          return DebugUtils::errored(kErrorInvalidRegGroup);
+
+        dstWd = &_workData[dstGroup];
+        dstId = dst.regId();
+        if (ASMJIT_UNLIKELY(dstId >= 32 || !Support::bitTest(dstWd->archRegs(), dstId)))
+          return DebugUtils::errored(kErrorInvalidPhysId);
+
+        if (ASMJIT_UNLIKELY(Support::bitTest(dstWd->dstRegs(), dstId)))
+          return DebugUtils::errored(kErrorOverlappedRegs);
+
+        dstWd->_dstRegs  |= Support::bitMask(dstId);
+        dstWd->_dstShuf  |= Support::bitMask(dstId);
+        dstWd->_usedRegs |= Support::bitMask(dstId);
+      }
+      else {
+        if (!dst.hasTypeId())
+          dst.setTypeId(src.typeId());
+
+        OperandSignature signature = getSuitableRegForMemToMemMove(arch, dst.typeId(), src.typeId());
+        if (ASMJIT_UNLIKELY(!signature.isValid()))
+          return DebugUtils::errored(kErrorInvalidState);
+        _stackDstMask = uint8_t(_stackDstMask | Support::bitMask(signature.regGroup()));
+      }
+
+      if (src.isReg()) {
+        uint32_t srcId = src.regId();
+        RegGroup srcGroup = archTraits().regTypeToGroup(src.regType());
+
+        if (dstGroup == srcGroup) {
+          ASMJIT_ASSERT(dstWd != nullptr);
+          dstWd->assign(varId, srcId);
+
+          // The best case, register is allocated where it is expected to be.
+          if (dstId == srcId)
+            var.markDone();
+        }
+        else {
+          if (ASMJIT_UNLIKELY(srcGroup > RegGroup::kMaxVirt))
+            return DebugUtils::errored(kErrorInvalidState);
+
+          WorkData& srcData = _workData[size_t(srcGroup)];
+          srcData.assign(varId, srcId);
+        }
+      }
+      else {
+        if (dstWd)
+          dstWd->_numStackArgs++;
+        _hasStackSrc = true;
+      }
+
+      varId++;
+    }
+  }
+
+  // Initialize WorkData::workRegs.
+  for (RegGroup group : RegGroupVirtValues{}) {
+    _workData[group]._workRegs =
+      (_workData[group].archRegs() & (frame.dirtyRegs(group) | ~frame.preservedRegs(group))) | _workData[group].dstRegs() | _workData[group].assignedRegs();
+  }
+
+  // Create a variable that represents `SARegId` if necessary.
+  bool saRegRequired = _hasStackSrc && frame.hasDynamicAlignment() && !frame.hasPreservedFP();
+
+  WorkData& gpRegs = _workData[RegGroup::kGp];
+  uint32_t saCurRegId = frame.saRegId();
+  uint32_t saOutRegId = args.saRegId();
+
+  if (saCurRegId != BaseReg::kIdBad) {
+    // Check if the provided `SARegId` doesn't collide with input registers.
+    if (ASMJIT_UNLIKELY(gpRegs.isAssigned(saCurRegId)))
+      return DebugUtils::errored(kErrorOverlappedRegs);
+  }
+
+  if (saOutRegId != BaseReg::kIdBad) {
+    // Check if the provided `SARegId` doesn't collide with argument assignments.
+    if (ASMJIT_UNLIKELY(Support::bitTest(gpRegs.dstRegs(), saOutRegId)))
+      return DebugUtils::errored(kErrorOverlappedRegs);
+    saRegRequired = true;
+  }
+
+  if (saRegRequired) {
+    TypeId ptrTypeId = Environment::is32Bit(arch) ? TypeId::kUInt32 : TypeId::kUInt64;
+    RegType ptrRegType = Environment::is32Bit(arch) ? RegType::kGp32 : RegType::kGp64;
+
+    _saVarId = uint8_t(varId);
+    _hasPreservedFP = frame.hasPreservedFP();
+
+    Var& var = _vars[varId];
+    var.reset();
+
+    if (saCurRegId == BaseReg::kIdBad) {
+      if (saOutRegId != BaseReg::kIdBad && !gpRegs.isAssigned(saOutRegId)) {
+        saCurRegId = saOutRegId;
+      }
+      else {
+        RegMask availableRegs = gpRegs.availableRegs();
+        if (!availableRegs)
+          availableRegs = gpRegs.archRegs() & ~gpRegs.workRegs();
+
+        if (ASMJIT_UNLIKELY(!availableRegs))
+          return DebugUtils::errored(kErrorNoMorePhysRegs);
+
+        saCurRegId = Support::ctz(availableRegs);
+      }
+    }
+
+    var.cur.initReg(ptrRegType, saCurRegId, ptrTypeId);
+    gpRegs.assign(varId, saCurRegId);
+    gpRegs._workRegs |= Support::bitMask(saCurRegId);
+
+    if (saOutRegId != BaseReg::kIdBad) {
+      var.out.initReg(ptrRegType, saOutRegId, ptrTypeId);
+      gpRegs._dstRegs  |= Support::bitMask(saOutRegId);
+      gpRegs._workRegs |= Support::bitMask(saOutRegId);
+    }
+    else {
+      var.markDone();
+    }
+
+    varId++;
+  }
+
+  _varCount = varId;
+
+  // Detect register swaps.
+  for (varId = 0; varId < _varCount; varId++) {
+    Var& var = _vars[varId];
+    if (var.cur.isReg() && var.out.isReg()) {
+      uint32_t srcId = var.cur.regId();
+      uint32_t dstId = var.out.regId();
+
+      RegGroup group = archTraits().regTypeToGroup(var.cur.regType());
+      if (group != archTraits().regTypeToGroup(var.out.regType()))
+        continue;
+
+      WorkData& wd = _workData[group];
+      if (wd.isAssigned(dstId)) {
+        Var& other = _vars[wd._physToVarId[dstId]];
+        if (archTraits().regTypeToGroup(other.out.regType()) == group && other.out.regId() == srcId) {
+          wd._numSwaps++;
+          _regSwapsMask = uint8_t(_regSwapsMask | Support::bitMask(group));
+        }
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error FuncArgsContext::markDstRegsDirty(FuncFrame& frame) noexcept {
+  for (RegGroup group : RegGroupVirtValues{}) {
+    WorkData& wd = _workData[group];
+    uint32_t regs = wd.usedRegs() | wd._dstShuf;
+
+    wd._workRegs |= regs;
+    frame.addDirtyRegs(group, regs);
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error FuncArgsContext::markScratchRegs(FuncFrame& frame) noexcept {
+  uint32_t groupMask = 0;
+
+  // Handle stack to stack moves.
+  groupMask |= _stackDstMask;
+
+  // Handle register swaps.
+  groupMask |= _regSwapsMask & ~Support::bitMask(RegGroup::kGp);
+
+  if (!groupMask)
+    return kErrorOk;
+
+  // Selects one dirty register per affected group that can be used as a scratch register.
+  for (RegGroup group : RegGroupVirtValues{}) {
+    if (Support::bitTest(groupMask, group)) {
+      WorkData& wd = _workData[group];
+
+      // Initially, pick some clobbered or dirty register.
+      RegMask workRegs = wd.workRegs();
+      RegMask regs = workRegs & ~(wd.usedRegs() | wd._dstShuf);
+
+      // If that didn't work out pick some register which is not in 'used'.
+      if (!regs)
+        regs = workRegs & ~wd.usedRegs();
+
+      // If that didn't work out pick any other register that is allocable.
+      // This last resort case will, however, result in marking one more
+      // register dirty.
+      if (!regs)
+        regs = wd.archRegs() & ~workRegs;
+
+      // If that didn't work out we will have to use XORs instead of MOVs.
+      if (!regs)
+        continue;
+
+      RegMask regMask = Support::blsi(regs);
+      wd._workRegs |= regMask;
+      frame.addDirtyRegs(group, regMask);
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error FuncArgsContext::markStackArgsReg(FuncFrame& frame) noexcept {
+  if (_saVarId != kVarIdNone) {
+    const Var& var = _vars[_saVarId];
+    frame.setSARegId(var.cur.regId());
+  }
+  else if (frame.hasPreservedFP()) {
+    frame.setSARegId(archTraits().fpRegId());
+  }
+
+  return kErrorOk;
+}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/funcargscontext_p.h b/lib/lepton/asmjit/core/funcargscontext_p.h
new file mode 100644
index 0000000000..72ee10585a
--- /dev/null
+++ b/lib/lepton/asmjit/core/funcargscontext_p.h
@@ -0,0 +1,199 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_FUNCARGSCONTEXT_P_H_INCLUDED
+#define ASMJIT_CORE_FUNCARGSCONTEXT_P_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/environment.h"
+#include "../core/func.h"
+#include "../core/operand.h"
+#include "../core/radefs_p.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_core
+//! \{
+
+static inline OperandSignature getSuitableRegForMemToMemMove(Arch arch, TypeId dstTypeId, TypeId srcTypeId) noexcept {
+  const ArchTraits& archTraits = ArchTraits::byArch(arch);
+
+  uint32_t dstSize = TypeUtils::sizeOf(dstTypeId);
+  uint32_t srcSize = TypeUtils::sizeOf(srcTypeId);
+  uint32_t maxSize = Support::max<uint32_t>(dstSize, srcSize);
+  uint32_t regSize = Environment::registerSizeFromArch(arch);
+
+  OperandSignature signature{0};
+  if (maxSize <= regSize || (TypeUtils::isInt(dstTypeId) && TypeUtils::isInt(srcTypeId)))
+    signature = maxSize <= 4 ? archTraits.regTypeToSignature(RegType::kGp32)
+                             : archTraits.regTypeToSignature(RegType::kGp64);
+  else if (maxSize <= 8 && archTraits.hasRegType(RegType::kVec64))
+    signature = archTraits.regTypeToSignature(RegType::kVec64);
+  else if (maxSize <= 16 && archTraits.hasRegType(RegType::kVec128))
+    signature = archTraits.regTypeToSignature(RegType::kVec128);
+  else if (maxSize <= 32 && archTraits.hasRegType(RegType::kVec256))
+    signature = archTraits.regTypeToSignature(RegType::kVec256);
+  else if (maxSize <= 64 && archTraits.hasRegType(RegType::kVec512))
+    signature = archTraits.regTypeToSignature(RegType::kVec512);
+
+  return signature;
+}
+
+class FuncArgsContext {
+public:
+  enum VarId : uint32_t {
+    kVarIdNone = 0xFF
+  };
+
+  //! Contains information about a single argument or SA register that may need shuffling.
+  struct Var {
+    FuncValue cur;
+    FuncValue out;
+
+    inline void init(const FuncValue& cur_, const FuncValue& out_) noexcept {
+      cur = cur_;
+      out = out_;
+    }
+
+    //! Reset the value to its unassigned state.
+    inline void reset() noexcept {
+      cur.reset();
+      out.reset();
+    }
+
+    inline bool isDone() const noexcept { return cur.isDone(); }
+    inline void markDone() noexcept { cur.addFlags(FuncValue::kFlagIsDone); }
+  };
+
+  struct WorkData {
+    //! All allocable registers provided by the architecture.
+    RegMask _archRegs;
+    //! All registers that can be used by the shuffler.
+    RegMask _workRegs;
+    //! Registers used by the shuffler (all).
+    RegMask _usedRegs;
+    //! Assigned registers.
+    RegMask _assignedRegs;
+    //! Destination registers assigned to arguments or SA.
+    RegMask _dstRegs;
+    //! Destination registers that require shuffling.
+    RegMask _dstShuf;
+    //! Number of register swaps.
+    uint8_t _numSwaps;
+    //! Number of stack loads.
+    uint8_t _numStackArgs;
+    //! Reserved (only used as padding).
+    uint8_t _reserved[6];
+    //! Physical ID to variable ID mapping.
+    uint8_t _physToVarId[32];
+
+    inline void reset() noexcept {
+      _archRegs = 0;
+      _workRegs = 0;
+      _usedRegs = 0;
+      _assignedRegs = 0;
+      _dstRegs = 0;
+      _dstShuf = 0;
+      _numSwaps = 0;
+      _numStackArgs = 0;
+      memset(_reserved, 0, sizeof(_reserved));
+      memset(_physToVarId, kVarIdNone, 32);
+    }
+
+    inline bool isAssigned(uint32_t regId) const noexcept {
+      ASMJIT_ASSERT(regId < 32);
+      return Support::bitTest(_assignedRegs, regId);
+    }
+
+    inline void assign(uint32_t varId, uint32_t regId) noexcept {
+      ASMJIT_ASSERT(!isAssigned(regId));
+      ASMJIT_ASSERT(_physToVarId[regId] == kVarIdNone);
+
+      _physToVarId[regId] = uint8_t(varId);
+      _assignedRegs ^= Support::bitMask(regId);
+    }
+
+    inline void reassign(uint32_t varId, uint32_t newId, uint32_t oldId) noexcept {
+      ASMJIT_ASSERT( isAssigned(oldId));
+      ASMJIT_ASSERT(!isAssigned(newId));
+      ASMJIT_ASSERT(_physToVarId[oldId] == varId);
+      ASMJIT_ASSERT(_physToVarId[newId] == kVarIdNone);
+
+      _physToVarId[oldId] = uint8_t(kVarIdNone);
+      _physToVarId[newId] = uint8_t(varId);
+      _assignedRegs ^= Support::bitMask(newId) ^ Support::bitMask(oldId);
+    }
+
+    inline void swap(uint32_t aVarId, uint32_t aRegId, uint32_t bVarId, uint32_t bRegId) noexcept {
+      ASMJIT_ASSERT(isAssigned(aRegId));
+      ASMJIT_ASSERT(isAssigned(bRegId));
+      ASMJIT_ASSERT(_physToVarId[aRegId] == aVarId);
+      ASMJIT_ASSERT(_physToVarId[bRegId] == bVarId);
+
+      _physToVarId[aRegId] = uint8_t(bVarId);
+      _physToVarId[bRegId] = uint8_t(aVarId);
+    }
+
+    inline void unassign(uint32_t varId, uint32_t regId) noexcept {
+      ASMJIT_ASSERT(isAssigned(regId));
+      ASMJIT_ASSERT(_physToVarId[regId] == varId);
+
+      DebugUtils::unused(varId);
+      _physToVarId[regId] = uint8_t(kVarIdNone);
+      _assignedRegs ^= Support::bitMask(regId);
+    }
+
+    inline RegMask archRegs() const noexcept { return _archRegs; }
+    inline RegMask workRegs() const noexcept { return _workRegs; }
+    inline RegMask usedRegs() const noexcept { return _usedRegs; }
+    inline RegMask assignedRegs() const noexcept { return _assignedRegs; }
+    inline RegMask dstRegs() const noexcept { return _dstRegs; }
+    inline RegMask availableRegs() const noexcept { return _workRegs & ~_assignedRegs; }
+  };
+
+  //! Architecture traits.
+  const ArchTraits* _archTraits = nullptr;
+  //! Architecture constraints.
+  const RAConstraints* _constraints = nullptr;
+  //! Target architecture.
+  Arch _arch = Arch::kUnknown;
+  //! Has arguments passed via stack (SRC).
+  bool _hasStackSrc = false;
+  //! Has preserved frame-pointer (FP).
+  bool _hasPreservedFP = false;
+  //! Has arguments assigned to stack (DST).
+  uint8_t _stackDstMask = 0;
+  //! Register swap groups (bit-mask).
+  uint8_t _regSwapsMask = 0;
+  uint8_t _saVarId = kVarIdNone;
+  uint32_t _varCount = 0;
+  Support::Array<WorkData, Globals::kNumVirtGroups> _workData;
+  Var _vars[Globals::kMaxFuncArgs * Globals::kMaxValuePack + 1];
+
+  FuncArgsContext() noexcept;
+
+  inline const ArchTraits& archTraits() const noexcept { return *_archTraits; }
+  inline Arch arch() const noexcept { return _arch; }
+
+  inline uint32_t varCount() const noexcept { return _varCount; }
+  inline size_t indexOf(const Var* var) const noexcept { return (size_t)(var - _vars); }
+
+  inline Var& var(size_t varId) noexcept { return _vars[varId]; }
+  inline const Var& var(size_t varId) const noexcept { return _vars[varId]; }
+
+  Error initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args, const RAConstraints* constraints) noexcept;
+  Error markScratchRegs(FuncFrame& frame) noexcept;
+  Error markDstRegsDirty(FuncFrame& frame) noexcept;
+  Error markStackArgsReg(FuncFrame& frame) noexcept;
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_FUNCARGSCONTEXT_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/globals.cpp b/lib/lepton/asmjit/core/globals.cpp
new file mode 100644
index 0000000000..2bbd0c0577
--- /dev/null
+++ b/lib/lepton/asmjit/core/globals.cpp
@@ -0,0 +1,133 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/globals.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// DebugUtils - Error As String
+// ============================
+
+ASMJIT_FAVOR_SIZE const char* DebugUtils::errorAsString(Error err) noexcept {
+#ifndef ASMJIT_NO_TEXT
+  // @EnumStringBegin{"enum": "ErrorCode", "output": "sError", "strip": "kError"}@
+  static const char sErrorString[] =
+    "Ok\0"
+    "OutOfMemory\0"
+    "InvalidArgument\0"
+    "InvalidState\0"
+    "InvalidArch\0"
+    "NotInitialized\0"
+    "AlreadyInitialized\0"
+    "FeatureNotEnabled\0"
+    "TooManyHandles\0"
+    "TooLarge\0"
+    "NoCodeGenerated\0"
+    "InvalidDirective\0"
+    "InvalidLabel\0"
+    "TooManyLabels\0"
+    "LabelAlreadyBound\0"
+    "LabelAlreadyDefined\0"
+    "LabelNameTooLong\0"
+    "InvalidLabelName\0"
+    "InvalidParentLabel\0"
+    "InvalidSection\0"
+    "TooManySections\0"
+    "InvalidSectionName\0"
+    "TooManyRelocations\0"
+    "InvalidRelocEntry\0"
+    "RelocOffsetOutOfRange\0"
+    "InvalidAssignment\0"
+    "InvalidInstruction\0"
+    "InvalidRegType\0"
+    "InvalidRegGroup\0"
+    "InvalidPhysId\0"
+    "InvalidVirtId\0"
+    "InvalidElementIndex\0"
+    "InvalidPrefixCombination\0"
+    "InvalidLockPrefix\0"
+    "InvalidXAcquirePrefix\0"
+    "InvalidXReleasePrefix\0"
+    "InvalidRepPrefix\0"
+    "InvalidRexPrefix\0"
+    "InvalidExtraReg\0"
+    "InvalidKMaskUse\0"
+    "InvalidKZeroUse\0"
+    "InvalidBroadcast\0"
+    "InvalidEROrSAE\0"
+    "InvalidAddress\0"
+    "InvalidAddressIndex\0"
+    "InvalidAddressScale\0"
+    "InvalidAddress64Bit\0"
+    "InvalidAddress64BitZeroExtension\0"
+    "InvalidDisplacement\0"
+    "InvalidSegment\0"
+    "InvalidImmediate\0"
+    "InvalidOperandSize\0"
+    "AmbiguousOperandSize\0"
+    "OperandSizeMismatch\0"
+    "InvalidOption\0"
+    "OptionAlreadyDefined\0"
+    "InvalidTypeId\0"
+    "InvalidUseOfGpbHi\0"
+    "InvalidUseOfGpq\0"
+    "InvalidUseOfF80\0"
+    "NotConsecutiveRegs\0"
+    "ConsecutiveRegsAllocation\0"
+    "IllegalVirtReg\0"
+    "TooManyVirtRegs\0"
+    "NoMorePhysRegs\0"
+    "OverlappedRegs\0"
+    "OverlappingStackRegWithRegArg\0"
+    "ExpressionLabelNotBound\0"
+    "ExpressionOverflow\0"
+    "FailedToOpenAnonymousMemory\0"
+    "<Unknown>\0";
+
+  static const uint16_t sErrorIndex[] = {
+    0, 3, 15, 31, 44, 56, 71, 90, 108, 123, 132, 148, 165, 178, 192, 210, 230,
+    247, 264, 283, 298, 314, 333, 352, 370, 392, 410, 429, 444, 460, 474, 488,
+    508, 533, 551, 573, 595, 612, 629, 645, 661, 677, 694, 709, 724, 744, 764,
+    784, 817, 837, 852, 869, 888, 909, 929, 943, 964, 978, 996, 1012, 1028, 1047,
+    1073, 1088, 1104, 1119, 1134, 1164, 1188, 1207, 1235
+  };
+  // @EnumStringEnd@
+
+  return sErrorString + sErrorIndex[Support::min<Error>(err, kErrorCount)];
+#else
+  DebugUtils::unused(err);
+  static const char noMessage[] = "";
+  return noMessage;
+#endif
+}
+
+// DebugUtils - Debug Output
+// =========================
+
+ASMJIT_FAVOR_SIZE void DebugUtils::debugOutput(const char* str) noexcept {
+#if defined(_WIN32)
+  ::OutputDebugStringA(str);
+#else
+  ::fputs(str, stderr);
+#endif
+}
+
+// DebugUtils - Fatal Errors
+// =========================
+
+ASMJIT_FAVOR_SIZE void DebugUtils::assertionFailed(const char* file, int line, const char* msg) noexcept {
+  char str[1024];
+
+  snprintf(str, 1024,
+    "[asmjit] Assertion failed at %s (line %d):\n"
+    "[asmjit] %s\n", file, line, msg);
+
+  debugOutput(str);
+  ::abort();
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/globals.h b/lib/lepton/asmjit/core/globals.h
new file mode 100644
index 0000000000..f2d3c6e63d
--- /dev/null
+++ b/lib/lepton/asmjit/core/globals.h
@@ -0,0 +1,393 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_GLOBALS_H_INCLUDED
+#define ASMJIT_CORE_GLOBALS_H_INCLUDED
+
+#include "../core/api-config.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_utilities
+//! \{
+namespace Support {
+  //! Cast designed to cast between function and void* pointers.
+  template<typename Dst, typename Src>
+  static inline Dst ptr_cast_impl(Src p) noexcept { return (Dst)p; }
+} // {Support}
+
+#if defined(ASMJIT_NO_STDCXX)
+namespace Support {
+  ASMJIT_FORCE_INLINE void* operatorNew(size_t n) noexcept { return malloc(n); }
+  ASMJIT_FORCE_INLINE void operatorDelete(void* p) noexcept { if (p) free(p); }
+} // {Support}
+
+#define ASMJIT_BASE_CLASS(TYPE)                                                  \
+  ASMJIT_FORCE_INLINE void* operator new(size_t n) noexcept {                    \
+    return Support::operatorNew(n);                                              \
+  }                                                                              \
+                                                                                 \
+  ASMJIT_FORCE_INLINE void  operator delete(void* p) noexcept {                  \
+    Support::operatorDelete(p);                                                  \
+  }                                                                              \
+                                                                                 \
+  ASMJIT_FORCE_INLINE void* operator new(size_t, void* p) noexcept { return p; } \
+  ASMJIT_FORCE_INLINE void  operator delete(void*, void*) noexcept {}
+#else
+#define ASMJIT_BASE_CLASS(TYPE)
+#endif
+
+//! \}
+//! \endcond
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! Byte order.
+enum class ByteOrder {
+  //! Little endian.
+  kLE = 0,
+  //! Big endian.
+  kBE = 1,
+  //! Native byte order of the target architecture.
+  kNative = ASMJIT_ARCH_LE ? kLE : kBE,
+  //! Swapped byte order of the target architecture.
+  kSwapped = ASMJIT_ARCH_LE ? kBE : kLE
+};
+
+//! A policy that can be used with some `reset()` member functions.
+enum class ResetPolicy : uint32_t {
+  //! Soft reset, doesn't deallocate memory (default).
+  kSoft = 0,
+  //! Hard reset, releases all memory used, if any.
+  kHard = 1
+};
+
+//! Contains typedefs, constants, and variables used globally by AsmJit.
+namespace Globals {
+
+//! Host memory allocator overhead.
+static constexpr uint32_t kAllocOverhead = uint32_t(sizeof(intptr_t) * 4);
+
+//! Host memory allocator alignment.
+static constexpr uint32_t kAllocAlignment = 8;
+
+//! Aggressive growing strategy threshold.
+static constexpr uint32_t kGrowThreshold = 1024 * 1024 * 16;
+
+//! Maximum depth of RB-Tree is:
+//!
+//!   `2 * log2(n + 1)`
+//!
+//! Size of RB node is at least two pointers (without data), so a theoretical architecture limit would be:
+//!
+//!   `2 * log2(addressableMemorySize / sizeof(Node) + 1)`
+//!
+//! Which yields 30 on 32-bit arch and 61 on 64-bit arch. The final value was adjusted by +1 for safety reasons.
+static constexpr uint32_t kMaxTreeHeight = (ASMJIT_ARCH_BITS == 32 ? 30 : 61) + 1;
+
+//! Maximum number of operands per a single instruction.
+static constexpr uint32_t kMaxOpCount = 6;
+
+//! Maximum arguments of a function supported by the Compiler / Function API.
+static constexpr uint32_t kMaxFuncArgs = 16;
+
+//! The number of values that can be assigned to a single function argument or
+//! return value.
+static constexpr uint32_t kMaxValuePack = 4;
+
+//! Maximum number of physical registers AsmJit can use per register group.
+static constexpr uint32_t kMaxPhysRegs = 32;
+
+//! Maximum alignment.
+static constexpr uint32_t kMaxAlignment = 64;
+
+//! Maximum label or symbol size in bytes.
+static constexpr uint32_t kMaxLabelNameSize = 2048;
+
+//! Maximum section name size.
+static constexpr uint32_t kMaxSectionNameSize = 35;
+
+//! Maximum size of comment.
+static constexpr uint32_t kMaxCommentSize = 1024;
+
+//! Invalid identifier.
+static constexpr uint32_t kInvalidId = 0xFFFFFFFFu;
+
+//! Returned by `indexOf()` and similar when working with containers that use 32-bit index/size.
+static constexpr uint32_t kNotFound = 0xFFFFFFFFu;
+
+//! Invalid base address.
+static constexpr uint64_t kNoBaseAddress = ~uint64_t(0);
+
+//! Number of virtual register groups.
+static constexpr uint32_t kNumVirtGroups = 4;
+
+struct Init_ {};
+struct NoInit_ {};
+
+static const constexpr Init_ Init {};
+static const constexpr NoInit_ NoInit {};
+
+} // {Globals}
+
+template<typename Func>
+static inline Func ptr_as_func(void* func) noexcept { return Support::ptr_cast_impl<Func, void*>(func); }
+
+template<typename Func>
+static inline void* func_as_ptr(Func func) noexcept { return Support::ptr_cast_impl<void*, Func>(func); }
+
+//! \}
+
+//! \addtogroup asmjit_error_handling
+//! \{
+
+//! AsmJit error type (uint32_t).
+typedef uint32_t Error;
+
+//! AsmJit error codes.
+enum ErrorCode : uint32_t {
+  // @EnumValuesBegin{"enum": "ErrorCode"}@
+
+  //! No error (success).
+  kErrorOk = 0,
+
+  //! Out of memory.
+  kErrorOutOfMemory,
+
+  //! Invalid argument.
+  kErrorInvalidArgument,
+
+  //! Invalid state.
+  //!
+  //! If this error is returned it means that either you are doing something wrong or AsmJit caught itself by
+  //! doing something wrong. This error should never be ignored.
+  kErrorInvalidState,
+
+  //! Invalid or incompatible architecture.
+  kErrorInvalidArch,
+
+  //! The object is not initialized.
+  kErrorNotInitialized,
+  //! The object is already initialized.
+  kErrorAlreadyInitialized,
+
+  //! Built-in feature was disabled at compile time and it's not available.
+  kErrorFeatureNotEnabled,
+
+  //! Too many handles (Windows) or file descriptors (Unix/Posix).
+  kErrorTooManyHandles,
+  //! Code generated is larger than allowed.
+  kErrorTooLarge,
+
+  //! No code generated.
+  //!
+  //! Returned by runtime if the \ref CodeHolder contains no code.
+  kErrorNoCodeGenerated,
+
+  //! Invalid directive.
+  kErrorInvalidDirective,
+  //! Attempt to use uninitialized label.
+  kErrorInvalidLabel,
+  //! Label index overflow - a single \ref BaseAssembler instance can hold almost 2^32 (4 billion) labels. If
+  //! there is an attempt to create more labels then this error is returned.
+  kErrorTooManyLabels,
+  //! Label is already bound.
+  kErrorLabelAlreadyBound,
+  //! Label is already defined (named labels).
+  kErrorLabelAlreadyDefined,
+  //! Label name is too long.
+  kErrorLabelNameTooLong,
+  //! Label must always be local if it's anonymous (without a name).
+  kErrorInvalidLabelName,
+  //! Parent id passed to \ref CodeHolder::newNamedLabelEntry() was either invalid or parent is not supported
+  //! by the requested `LabelType`.
+  kErrorInvalidParentLabel,
+
+  //! Invalid section.
+  kErrorInvalidSection,
+  //! Too many sections (section index overflow).
+  kErrorTooManySections,
+  //! Invalid section name (most probably too long).
+  kErrorInvalidSectionName,
+
+  //! Relocation index overflow (too many relocations).
+  kErrorTooManyRelocations,
+  //! Invalid relocation entry.
+  kErrorInvalidRelocEntry,
+  //! Reloc entry contains address that is out of range (unencodable).
+  kErrorRelocOffsetOutOfRange,
+
+  //! Invalid assignment to a register, function argument, or function return value.
+  kErrorInvalidAssignment,
+  //! Invalid instruction.
+  kErrorInvalidInstruction,
+  //! Invalid register type.
+  kErrorInvalidRegType,
+  //! Invalid register group.
+  kErrorInvalidRegGroup,
+  //! Invalid physical register id.
+  kErrorInvalidPhysId,
+  //! Invalid virtual register id.
+  kErrorInvalidVirtId,
+  //! Invalid element index (ARM).
+  kErrorInvalidElementIndex,
+  //! Invalid prefix combination (X86|X64).
+  kErrorInvalidPrefixCombination,
+  //! Invalid LOCK prefix (X86|X64).
+  kErrorInvalidLockPrefix,
+  //! Invalid XACQUIRE prefix (X86|X64).
+  kErrorInvalidXAcquirePrefix,
+  //! Invalid XRELEASE prefix (X86|X64).
+  kErrorInvalidXReleasePrefix,
+  //! Invalid REP prefix (X86|X64).
+  kErrorInvalidRepPrefix,
+  //! Invalid REX prefix (X86|X64).
+  kErrorInvalidRexPrefix,
+  //! Invalid {...} register (X86|X64).
+  kErrorInvalidExtraReg,
+  //! Invalid {k} use (not supported by the instruction) (X86|X64).
+  kErrorInvalidKMaskUse,
+  //! Invalid {k}{z} use (not supported by the instruction) (X86|X64).
+  kErrorInvalidKZeroUse,
+  //! Invalid broadcast - Currently only related to invalid use of AVX-512 {1tox} (X86|X64).
+  kErrorInvalidBroadcast,
+  //! Invalid 'embedded-rounding' {er} or 'suppress-all-exceptions' {sae} (AVX-512) (X86|X64).
+  kErrorInvalidEROrSAE,
+  //! Invalid address used (not encodable).
+  kErrorInvalidAddress,
+  //! Invalid index register used in memory address (not encodable).
+  kErrorInvalidAddressIndex,
+  //! Invalid address scale (not encodable).
+  kErrorInvalidAddressScale,
+  //! Invalid use of 64-bit address.
+  kErrorInvalidAddress64Bit,
+  //! Invalid use of 64-bit address that require 32-bit zero-extension (X64).
+  kErrorInvalidAddress64BitZeroExtension,
+  //! Invalid displacement (not encodable).
+  kErrorInvalidDisplacement,
+  //! Invalid segment (X86).
+  kErrorInvalidSegment,
+
+  //! Invalid immediate (out of bounds on X86 and invalid pattern on ARM).
+  kErrorInvalidImmediate,
+
+  //! Invalid operand size.
+  kErrorInvalidOperandSize,
+  //! Ambiguous operand size (memory has zero size while it's required to determine the operation type.
+  kErrorAmbiguousOperandSize,
+  //! Mismatching operand size (size of multiple operands doesn't match the operation size).
+  kErrorOperandSizeMismatch,
+
+  //! Invalid option.
+  kErrorInvalidOption,
+  //! Option already defined.
+  kErrorOptionAlreadyDefined,
+
+  //! Invalid TypeId.
+  kErrorInvalidTypeId,
+  //! Invalid use of a 8-bit GPB-HIGH register.
+  kErrorInvalidUseOfGpbHi,
+  //! Invalid use of a 64-bit GPQ register in 32-bit mode.
+  kErrorInvalidUseOfGpq,
+  //! Invalid use of an 80-bit float (\ref TypeId::kFloat80).
+  kErrorInvalidUseOfF80,
+  //! Instruction requires the use of consecutive registers, but registers in operands weren't (AVX512, ASIMD load/store, etc...).
+  kErrorNotConsecutiveRegs,
+  //! Failed to allocate consecutive registers - allocable registers either too restricted or a bug in RW info.
+  kErrorConsecutiveRegsAllocation,
+
+  //! Illegal virtual register - reported by instruction validation.
+  kErrorIllegalVirtReg,
+  //! AsmJit cannot create more virtual registers.
+  kErrorTooManyVirtRegs,
+
+  //! AsmJit requires a physical register, but no one is available.
+  kErrorNoMorePhysRegs,
+  //! A variable has been assigned more than once to a function argument (BaseCompiler).
+  kErrorOverlappedRegs,
+  //! Invalid register to hold stack arguments offset.
+  kErrorOverlappingStackRegWithRegArg,
+
+  //! Unbound label cannot be evaluated by expression.
+  kErrorExpressionLabelNotBound,
+  //! Arithmetic overflow during expression evaluation.
+  kErrorExpressionOverflow,
+
+  //! Failed to open anonymous memory handle or file descriptor.
+  kErrorFailedToOpenAnonymousMemory,
+
+  // @EnumValuesEnd@
+
+  //! Count of AsmJit error codes.
+  kErrorCount
+};
+
+//! Debugging utilities.
+namespace DebugUtils {
+
+//! \cond INTERNAL
+//! Used to silence warnings about unused arguments or variables.
+template<typename... Args>
+static inline void unused(Args&&...) noexcept {}
+//! \endcond
+
+//! Returns the error `err` passed.
+//!
+//! Provided for debugging purposes. Putting a breakpoint inside `errored` can help with tracing the origin of any
+//! error reported / returned by AsmJit.
+static constexpr Error errored(Error err) noexcept { return err; }
+
+//! Returns a printable version of `asmjit::Error` code.
+ASMJIT_API const char* errorAsString(Error err) noexcept;
+
+//! Called to output debugging message(s).
+ASMJIT_API void debugOutput(const char* str) noexcept;
+
+//! Called on assertion failure.
+//!
+//! \param file Source file name where it happened.
+//! \param line Line in the source file.
+//! \param msg Message to display.
+//!
+//! If you have problems with assertion failures a breakpoint can be put at \ref assertionFailed() function
+//! (asmjit/core/globals.cpp). A call stack will be available when such assertion failure is triggered. AsmJit
+//! always returns errors on failures, assertions are a last resort and usually mean unrecoverable state due to out
+//! of range array access or totally invalid arguments like nullptr where a valid pointer should be provided, etc...
+ASMJIT_API void ASMJIT_NORETURN assertionFailed(const char* file, int line, const char* msg) noexcept;
+
+} // {DebugUtils}
+
+//! \def ASMJIT_ASSERT(...)
+//!
+//! AsmJit's own assert macro used in AsmJit code-base.
+#if defined(ASMJIT_BUILD_DEBUG)
+#define ASMJIT_ASSERT(...)                                                     \
+  do {                                                                         \
+    if (ASMJIT_LIKELY(__VA_ARGS__))                                            \
+      break;                                                                   \
+    ::asmjit::DebugUtils::assertionFailed(__FILE__, __LINE__, #__VA_ARGS__);   \
+  } while (0)
+#else
+#define ASMJIT_ASSERT(...) ((void)0)
+#endif
+
+//! \def ASMJIT_PROPAGATE(...)
+//!
+//! Propagates a possible `Error` produced by `...` to the caller by returning the error immediately. Used by AsmJit
+//! internally, but kept public for users that want to use the same technique to propagate errors to the caller.
+#define ASMJIT_PROPAGATE(...)               \
+  do {                                      \
+    ::asmjit::Error _err = __VA_ARGS__;     \
+    if (ASMJIT_UNLIKELY(_err))              \
+      return _err;                          \
+  } while (0)
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_GLOBALS_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/inst.cpp b/lib/lepton/asmjit/core/inst.cpp
new file mode 100644
index 0000000000..8f29d8b758
--- /dev/null
+++ b/lib/lepton/asmjit/core/inst.cpp
@@ -0,0 +1,113 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/archtraits.h"
+#include "../core/inst.h"
+
+#if !defined(ASMJIT_NO_X86)
+  #include "../x86/x86instapi_p.h"
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  #include "../arm/a64instapi_p.h"
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+// InstAPI - InstId <-> String
+// ===========================
+
+#ifndef ASMJIT_NO_TEXT
+Error InstAPI::instIdToString(Arch arch, InstId instId, String& output) noexcept {
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::InstInternal::instIdToString(arch, instId, output);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::InstInternal::instIdToString(arch, instId, output);
+#endif
+
+  return DebugUtils::errored(kErrorInvalidArch);
+}
+
+InstId InstAPI::stringToInstId(Arch arch, const char* s, size_t len) noexcept {
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::InstInternal::stringToInstId(arch, s, len);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::InstInternal::stringToInstId(arch, s, len);
+#endif
+
+  return 0;
+}
+#endif // !ASMJIT_NO_TEXT
+
+// InstAPI - Validate
+// ==================
+
+#ifndef ASMJIT_NO_VALIDATION
+Error InstAPI::validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept {
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::InstInternal::validate(arch, inst, operands, opCount, validationFlags);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::InstInternal::validate(arch, inst, operands, opCount, validationFlags);
+#endif
+
+  return DebugUtils::errored(kErrorInvalidArch);
+}
+#endif // !ASMJIT_NO_VALIDATION
+
+// InstAPI - QueryRWInfo
+// =====================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+Error InstAPI::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept {
+  if (ASMJIT_UNLIKELY(opCount > Globals::kMaxOpCount))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::InstInternal::queryRWInfo(arch, inst, operands, opCount, out);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::InstInternal::queryRWInfo(arch, inst, operands, opCount, out);
+#endif
+
+  return DebugUtils::errored(kErrorInvalidArch);
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+// InstAPI - QueryFeatures
+// =======================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+Error InstAPI::queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept {
+#if !defined(ASMJIT_NO_X86)
+  if (Environment::isFamilyX86(arch))
+    return x86::InstInternal::queryFeatures(arch, inst, operands, opCount, out);
+#endif
+
+#if !defined(ASMJIT_NO_AARCH64)
+  if (Environment::isFamilyAArch64(arch))
+    return a64::InstInternal::queryFeatures(arch, inst, operands, opCount, out);
+#endif
+
+  return DebugUtils::errored(kErrorInvalidArch);
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/inst.h b/lib/lepton/asmjit/core/inst.h
new file mode 100644
index 0000000000..643678971a
--- /dev/null
+++ b/lib/lepton/asmjit/core/inst.h
@@ -0,0 +1,772 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_INST_H_INCLUDED
+#define ASMJIT_CORE_INST_H_INCLUDED
+
+#include "../core/cpuinfo.h"
+#include "../core/operand.h"
+#include "../core/string.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_instruction_db
+//! \{
+
+//! Describes an instruction id and modifiers used together with the id.
+//!
+//! Each architecture has a set of valid instructions indexed from 0. Instruction with 0 id is, however, a special
+//! instruction that describes a "no instruction" or "invalid instruction". Different architectures can assign a.
+//! different instruction to the same id, each architecture typicall has its own instructions indexed from 1.
+//!
+//! Instruction identifiers listed by architecture:
+//!
+//!   - \ref x86::Inst (X86 and X86_64)
+//!   - \ref a64::Inst (AArch64)
+typedef uint32_t InstId;
+
+//! Instruction id parts.
+//!
+//! A mask that specifies a bit-layout of \ref InstId.
+enum class InstIdParts : uint32_t {
+  // Common Masks
+  // ------------
+
+  //! Real id without any modifiers (always 16 least significant bits).
+  kRealId   = 0x0000FFFFu,
+  //! Instruction is abstract (or virtual, IR, etc...).
+  kAbstract = 0x80000000u,
+
+  // ARM Specific
+  // ------------
+
+  //! AArch32 first data type, used by ASIMD instructions (`inst.dt.dt2`).
+  kA32_DT   = 0x000F0000u,
+  //! AArch32 second data type, used by ASIMD instructions (`inst.dt.dt2`).
+  kA32_DT2  = 0x00F00000u,
+  //! AArch32/AArch64 condition code.
+  kARM_Cond = 0x78000000u
+};
+
+//! Instruction options.
+//!
+//! Instruction options complement instruction identifier and attributes.
+enum class InstOptions : uint32_t {
+  //! No options.
+  kNone = 0,
+
+  //! Used internally by emitters for handling errors and rare cases.
+  kReserved = 0x00000001u,
+
+  //! Prevents following a jump during compilation (Compiler).
+  kUnfollow = 0x00000002u,
+
+  //! Overwrite the destination operand(s) (Compiler).
+  //!
+  //! Hint that is important for register liveness analysis. It tells the compiler that the destination operand will
+  //! be overwritten now or by adjacent instructions. Compiler knows when a register is completely overwritten by a
+  //! single instruction, for example you don't have to mark "movaps" or "pxor x, x", however, if a pair of
+  //! instructions is used and the first of them doesn't completely overwrite the content of the destination,
+  //! Compiler fails to mark that register as dead.
+  //!
+  //! X86 Specific
+  //! ------------
+  //!
+  //!   - All instructions that always overwrite at least the size of the register the virtual-register uses, for
+  //!     example "mov", "movq", "movaps" don't need the overwrite option to be used - conversion, shuffle, and
+  //!     other miscellaneous instructions included.
+  //!
+  //!   - All instructions that clear the destination register if all operands are the same, for example "xor x, x",
+  //!     "pcmpeqb x x", etc...
+  //!
+  //!   - Consecutive instructions that partially overwrite the variable until there is no old content require
+  //!     `BaseCompiler::overwrite()` to be used. Some examples (not always the best use cases thought):
+  //!
+  //!     - `movlps xmm0, ?` followed by `movhps xmm0, ?` and vice versa
+  //!     - `movlpd xmm0, ?` followed by `movhpd xmm0, ?` and vice versa
+  //!     - `mov al, ?` followed by `and ax, 0xFF`
+  //!     - `mov al, ?` followed by `mov ah, al`
+  //!     - `pinsrq xmm0, ?, 0` followed by `pinsrq xmm0, ?, 1`
+  //!
+  //!   - If the allocated virtual register is used temporarily for scalar operations. For example if you allocate a
+  //!     full vector like `x86::Compiler::newXmm()` and then use that vector for scalar operations you should use
+  //!     `overwrite()` directive:
+  //!
+  //!     - `sqrtss x, y` - only LO element of `x` is changed, if you don't
+  //!       use HI elements, use `compiler.overwrite().sqrtss(x, y)`.
+  kOverwrite = 0x00000004u,
+
+  //! Emit short-form of the instruction.
+  kShortForm = 0x00000010u,
+  //! Emit long-form of the instruction.
+  kLongForm = 0x00000020u,
+
+  //! Conditional jump is likely to be taken.
+  kTaken = 0x00000040u,
+  //! Conditional jump is unlikely to be taken.
+  kNotTaken = 0x00000080u,
+
+  // X86 & X64 Options
+  // -----------------
+
+  //! Use ModMR instead of ModRM if applicable.
+  kX86_ModMR = 0x00000100u,
+  //! Use ModRM instead of ModMR if applicable.
+  kX86_ModRM = 0x00000200u,
+  //! Use 3-byte VEX prefix if possible (AVX) (must be 0x00000400).
+  kX86_Vex3 = 0x00000400u,
+  //! Use VEX prefix when both VEX|EVEX prefixes are available (HINT: AVX_VNNI).
+  kX86_Vex = 0x00000800u,
+  //! Use 4-byte EVEX prefix if possible (AVX-512) (must be 0x00001000).
+  kX86_Evex = 0x00001000u,
+
+  //! LOCK prefix (lock-enabled instructions only).
+  kX86_Lock = 0x00002000u,
+  //! REP prefix (string instructions only).
+  kX86_Rep = 0x00004000u,
+  //! REPNE prefix (string instructions only).
+  kX86_Repne = 0x00008000u,
+
+  //! XACQUIRE prefix (only allowed instructions).
+  kX86_XAcquire = 0x00010000u,
+  //! XRELEASE prefix (only allowed instructions).
+  kX86_XRelease = 0x00020000u,
+
+  //! AVX-512: embedded-rounding {er} and implicit {sae}.
+  kX86_ER = 0x00040000u,
+  //! AVX-512: suppress-all-exceptions {sae}.
+  kX86_SAE = 0x00080000u,
+  //! AVX-512: round-to-nearest (even) {rn-sae} (bits 00).
+  kX86_RN_SAE = 0x00000000u,
+  //! AVX-512: round-down (toward -inf) {rd-sae} (bits 01).
+  kX86_RD_SAE = 0x00200000u,
+  //! AVX-512: round-up (toward +inf) {ru-sae} (bits 10).
+  kX86_RU_SAE = 0x00400000u,
+  //! AVX-512: round-toward-zero (truncate) {rz-sae} (bits 11).
+  kX86_RZ_SAE = 0x00600000u,
+  //! AVX-512: Use zeroing {k}{z} instead of merging {k}.
+  kX86_ZMask = 0x00800000u,
+
+  //! AVX-512: Mask to get embedded rounding bits (2 bits).
+  kX86_ERMask = kX86_RZ_SAE,
+  //! AVX-512: Mask of all possible AVX-512 options except EVEX prefix flag.
+  kX86_AVX512Mask = 0x00FC0000u,
+
+  //! Force REX.B and/or VEX.B field (X64 only).
+  kX86_OpCodeB = 0x01000000u,
+  //! Force REX.X and/or VEX.X field (X64 only).
+  kX86_OpCodeX = 0x02000000u,
+  //! Force REX.R and/or VEX.R field (X64 only).
+  kX86_OpCodeR = 0x04000000u,
+  //! Force REX.W and/or VEX.W field (X64 only).
+  kX86_OpCodeW = 0x08000000u,
+  //! Force REX prefix (X64 only).
+  kX86_Rex = 0x40000000u,
+  //! Invalid REX prefix (set by X86 or when AH|BH|CH|DH regs are used on X64).
+  kX86_InvalidRex = 0x80000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(InstOptions)
+
+//! Instruction control flow.
+enum class InstControlFlow : uint32_t {
+  //! Regular instruction.
+  kRegular = 0u,
+  //! Unconditional jump.
+  kJump = 1u,
+  //! Conditional jump (branch).
+  kBranch = 2u,
+  //! Function call.
+  kCall = 3u,
+  //! Function return.
+  kReturn = 4u,
+
+  //! Maximum value of `InstType`.
+  kMaxValue = kReturn
+};
+
+//! Hint that is used when both input operands to the instruction are the same.
+//!
+//! Provides hints to the instrution RW query regarding special cases in which two or more operands are the same
+//! registers. This is required by instructions such as XOR, AND, OR, SUB, etc... These hints will influence the
+//! RW operations query.
+enum class InstSameRegHint : uint8_t {
+  //! No special handling.
+  kNone = 0,
+  //! Operands become read-only, the operation doesn't change the content - `X & X` and similar.
+  kRO = 1,
+  //! Operands become write-only, the content of the input(s) don't matter - `X ^ X`, `X - X`, and similar.
+  kWO = 2
+};
+
+//! Instruction id, options, and extraReg in a single structure. This structure exists mainly to simplify analysis
+//! and validation API that requires `BaseInst` and `Operand[]` array.
+class BaseInst {
+public:
+  //! \name Members
+  //! \{
+
+  //! Instruction id with modifiers.
+  InstId _id;
+  //! Instruction options.
+  InstOptions _options;
+  //! Extra register used by the instruction (either REP register or AVX-512 selector).
+  RegOnly _extraReg;
+
+  enum Id : uint32_t {
+    //! Invalid or uninitialized instruction id.
+    kIdNone = 0x00000000u,
+    //! Abstract instruction (BaseBuilder and BaseCompiler).
+    kIdAbstract = 0x80000000u
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new BaseInst instance with `id` and `options` set.
+  //!
+  //! Default values of `id` and `options` are zero, which means 'none' instruction. Such instruction is guaranteed
+  //! to never exist for any architecture supported by AsmJit.
+  inline explicit BaseInst(InstId instId = 0, InstOptions options = InstOptions::kNone) noexcept
+    : _id(instId),
+      _options(options),
+      _extraReg() {}
+
+  inline BaseInst(InstId instId, InstOptions options, const RegOnly& extraReg) noexcept
+    : _id(instId),
+      _options(options),
+      _extraReg(extraReg) {}
+
+  inline BaseInst(InstId instId, InstOptions options, const BaseReg& extraReg) noexcept
+    : _id(instId),
+      _options(options),
+      _extraReg { extraReg.signature(), extraReg.id() } {}
+
+  //! \}
+
+  //! \name Instruction id and modifiers
+  //! \{
+
+  //! Returns the instruction id with modifiers.
+  inline InstId id() const noexcept { return _id; }
+  //! Sets the instruction id and modiiers from `id`.
+  inline void setId(InstId id) noexcept { _id = id; }
+  //! Resets the instruction id and modifiers to zero, see \ref kIdNone.
+  inline void resetId() noexcept { _id = 0; }
+
+  //! Returns a real instruction id that doesn't contain any modifiers.
+  inline InstId realId() const noexcept { return _id & uint32_t(InstIdParts::kRealId); }
+
+  template<InstIdParts kPart>
+  inline uint32_t getInstIdPart() const noexcept {
+    return (uint32_t(_id) & uint32_t(kPart)) >> Support::ConstCTZ<uint32_t(kPart)>::value;
+  }
+
+  template<InstIdParts kPart>
+  inline void setInstIdPart(uint32_t value) noexcept {
+    _id = (_id & ~uint32_t(kPart)) | (value << Support::ConstCTZ<uint32_t(kPart)>::value);
+  }
+
+  //! \}
+
+  //! \name Instruction Options
+  //! \{
+
+  inline InstOptions options() const noexcept { return _options; }
+  inline bool hasOption(InstOptions option) const noexcept { return Support::test(_options, option); }
+  inline void setOptions(InstOptions options) noexcept { _options = options; }
+  inline void addOptions(InstOptions options) noexcept { _options |= options; }
+  inline void clearOptions(InstOptions options) noexcept { _options &= ~options; }
+  inline void resetOptions() noexcept { _options = InstOptions::kNone; }
+
+  //! \}
+
+  //! \name Extra Register
+  //! \{
+
+  inline bool hasExtraReg() const noexcept { return _extraReg.isReg(); }
+  inline RegOnly& extraReg() noexcept { return _extraReg; }
+  inline const RegOnly& extraReg() const noexcept { return _extraReg; }
+  inline void setExtraReg(const BaseReg& reg) noexcept { _extraReg.init(reg); }
+  inline void setExtraReg(const RegOnly& reg) noexcept { _extraReg.init(reg); }
+  inline void resetExtraReg() noexcept { _extraReg.reset(); }
+
+  //! \}
+
+  //! \name ARM Specific
+  //! \{
+
+  inline arm::CondCode armCondCode() const noexcept { return (arm::CondCode)getInstIdPart<InstIdParts::kARM_Cond>(); }
+  inline void setArmCondCode(arm::CondCode cc) noexcept { setInstIdPart<InstIdParts::kARM_Cond>(uint32_t(cc)); }
+
+  //! \}
+
+  //! \name Statics
+  //! \{
+
+  static inline constexpr InstId composeARMInstId(uint32_t id, arm::CondCode cc) noexcept {
+    return id | (uint32_t(cc) << Support::ConstCTZ<uint32_t(InstIdParts::kARM_Cond)>::value);
+  }
+
+  static inline constexpr InstId extractRealId(uint32_t id) noexcept {
+    return id & uint32_t(InstIdParts::kRealId);
+  }
+
+  static inline constexpr arm::CondCode extractARMCondCode(uint32_t id) noexcept {
+    return (arm::CondCode)((uint32_t(id) & uint32_t(InstIdParts::kARM_Cond)) >> Support::ConstCTZ<uint32_t(InstIdParts::kARM_Cond)>::value);
+  }
+
+  //! \}
+};
+
+//! CPU read/write flags used by \ref InstRWInfo.
+//!
+//! These flags can be used to get a basic overview about CPU specifics flags used by instructions.
+enum class CpuRWFlags : uint32_t {
+  //! No flags.
+  kNone = 0x00000000u,
+
+  // Common RW Flags (0x000000FF)
+  // ----------------------------
+
+  //! Carry flag.
+  kCF = 0x00000001u,
+  //! Signed overflow flag.
+  kOF = 0x00000002u,
+  //! Sign flag (negative/sign, if set).
+  kSF = 0x00000004u,
+  //! Zero and/or equality flag (1 if zero/equal).
+  kZF = 0x00000008u,
+
+  // X86 Specific RW Flags (0xFFFFFF00)
+  // ----------------------------------
+
+  //! Carry flag (X86, X86_64).
+  kX86_CF = kCF,
+  //! Overflow flag (X86, X86_64).
+  kX86_OF = kOF,
+  //! Sign flag (X86, X86_64).
+  kX86_SF = kSF,
+  //! Zero flag (X86, X86_64).
+  kX86_ZF = kZF,
+
+  //! Adjust flag (X86, X86_64).
+  kX86_AF = 0x00000100u,
+  //! Parity flag (X86, X86_64).
+  kX86_PF = 0x00000200u,
+  //! Direction flag (X86, X86_64).
+  kX86_DF = 0x00000400u,
+  //! Interrupt enable flag (X86, X86_64).
+  kX86_IF = 0x00000800u,
+
+  //! Alignment check flag (X86, X86_64).
+  kX86_AC = 0x00001000u,
+
+  //! FPU C0 status flag (X86, X86_64).
+  kX86_C0 = 0x00010000u,
+  //! FPU C1 status flag (X86, X86_64).
+  kX86_C1 = 0x00020000u,
+  //! FPU C2 status flag (X86, X86_64).
+  kX86_C2 = 0x00040000u,
+  //! FPU C3 status flag (X86, X86_64).
+  kX86_C3 = 0x00080000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(CpuRWFlags)
+
+//! Operand read/write flags describe how the operand is accessed and some additional features.
+enum class OpRWFlags {
+  //! No flags.
+  kNone = 0,
+
+  //! Operand is read.
+  kRead = 0x00000001u,
+
+  //! Operand is written.
+  kWrite = 0x00000002u,
+
+  //! Operand is both read and written.
+  kRW = 0x00000003u,
+
+  //! Register operand can be replaced by a memory operand.
+  kRegMem = 0x00000004u,
+
+  //! The register must be allocated to the index of the previous register + 1.
+  //!
+  //! This flag is used by all architectures to describe instructions that use consecutive registers, where only the
+  //! first one is encoded in the instruction, and the others are just a sequence that starts with the first one. On
+  //! X86/X86_64 architecture this is used by instructions such as V4FMADDPS, V4FMADDSS, V4FNMADDPS, V4FNMADDSS,
+  //! VP4DPWSSD, VP4DPWSSDS, VP2INTERSECTD, and VP2INTERSECTQ. On ARM/AArch64 this is used by vector load and store
+  //! instructions that can load or store multiple registers at once.
+  kConsecutive = 0x00000008u,
+
+  //! The `extendByteMask()` represents a zero extension.
+  kZExt = 0x00000010u,
+
+  //! Register operand must use \ref OpRWInfo::physId().
+  kRegPhysId = 0x00000100u,
+  //! Base register of a memory operand must use \ref OpRWInfo::physId().
+  kMemPhysId = 0x00000200u,
+
+  //! This memory operand is only used to encode registers and doesn't access memory.
+  //!
+  //! X86 Specific
+  //! ------------
+  //!
+  //! Instructions that use such feature include BNDLDX, BNDSTX, and LEA.
+  kMemFake = 0x000000400u,
+
+  //! Base register of the memory operand will be read.
+  kMemBaseRead = 0x00001000u,
+  //! Base register of the memory operand will be written.
+  kMemBaseWrite = 0x00002000u,
+  //! Base register of the memory operand will be read & written.
+  kMemBaseRW = 0x00003000u,
+
+  //! Index register of the memory operand will be read.
+  kMemIndexRead = 0x00004000u,
+  //! Index register of the memory operand will be written.
+  kMemIndexWrite = 0x00008000u,
+  //! Index register of the memory operand will be read & written.
+  kMemIndexRW = 0x0000C000u,
+
+  //! Base register of the memory operand will be modified before the operation.
+  kMemBasePreModify = 0x00010000u,
+  //! Base register of the memory operand will be modified after the operation.
+  kMemBasePostModify = 0x00020000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(OpRWFlags)
+
+// Don't remove these asserts. Read/Write flags are used extensively
+// by Compiler and they must always be compatible with constants below.
+static_assert(uint32_t(OpRWFlags::kRead) == 0x1, "OpRWFlags::kRead flag must be 0x1");
+static_assert(uint32_t(OpRWFlags::kWrite) == 0x2, "OpRWFlags::kWrite flag must be 0x2");
+static_assert(uint32_t(OpRWFlags::kRegMem) == 0x4, "OpRWFlags::kRegMem flag must be 0x4");
+
+//! Read/Write information related to a single operand, used by \ref InstRWInfo.
+struct OpRWInfo {
+  //! \name Members
+  //! \{
+
+  //! Read/Write flags.
+  OpRWFlags _opFlags;
+  //! Physical register index, if required.
+  uint8_t _physId;
+  //! Size of a possible memory operand that can replace a register operand.
+  uint8_t _rmSize;
+  //! If non-zero, then this is a consecutive lead register, and the value describes how many registers follow.
+  uint8_t _consecutiveLeadCount;
+  //! Reserved for future use.
+  uint8_t _reserved[1];
+  //! Read bit-mask where each bit represents one byte read from Reg/Mem.
+  uint64_t _readByteMask;
+  //! Write bit-mask where each bit represents one byte written to Reg/Mem.
+  uint64_t _writeByteMask;
+  //! Zero/Sign extend bit-mask where each bit represents one byte written to Reg/Mem.
+  uint64_t _extendByteMask;
+
+  //! \}
+
+  //! \name Reset
+  //! \{
+
+  //! Resets this operand information to all zeros.
+  inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+
+  //! Resets this operand info (resets all members) and set common information
+  //! to the given `opFlags`, `regSize`, and possibly `physId`.
+  inline void reset(OpRWFlags opFlags, uint32_t regSize, uint32_t physId = BaseReg::kIdBad) noexcept {
+    _opFlags = opFlags;
+    _physId = uint8_t(physId);
+    _rmSize = Support::test(opFlags, OpRWFlags::kRegMem) ? uint8_t(regSize) : uint8_t(0);
+    _consecutiveLeadCount = 0;
+    _resetReserved();
+
+    uint64_t mask = Support::lsbMask<uint64_t>(regSize);
+    _readByteMask = Support::test(opFlags, OpRWFlags::kRead) ? mask : uint64_t(0);
+    _writeByteMask = Support::test(opFlags, OpRWFlags::kWrite) ? mask : uint64_t(0);
+    _extendByteMask = 0;
+  }
+
+  inline void _resetReserved() noexcept {
+    _reserved[0] = 0;
+  }
+
+  //! \}
+
+  //! \name Operand Flags
+  //! \{
+
+  //! Returns operand flags.
+  inline OpRWFlags opFlags() const noexcept { return _opFlags; }
+  //! Tests whether operand flags contain the given `flag`.
+  inline bool hasOpFlag(OpRWFlags flag) const noexcept { return Support::test(_opFlags, flag); }
+
+  //! Adds the given `flags` to operand flags.
+  inline void addOpFlags(OpRWFlags flags) noexcept { _opFlags |= flags; }
+  //! Removes the given `flags` from operand flags.
+  inline void clearOpFlags(OpRWFlags flags) noexcept { _opFlags &= ~flags; }
+
+  //! Tests whether this operand is read from.
+  inline bool isRead() const noexcept { return hasOpFlag(OpRWFlags::kRead); }
+  //! Tests whether this operand is written to.
+  inline bool isWrite() const noexcept { return hasOpFlag(OpRWFlags::kWrite); }
+  //! Tests whether this operand is both read and write.
+  inline bool isReadWrite() const noexcept { return (_opFlags & OpRWFlags::kRW) == OpRWFlags::kRW; }
+  //! Tests whether this operand is read only.
+  inline bool isReadOnly() const noexcept { return (_opFlags & OpRWFlags::kRW) == OpRWFlags::kRead; }
+  //! Tests whether this operand is write only.
+  inline bool isWriteOnly() const noexcept { return (_opFlags & OpRWFlags::kRW) == OpRWFlags::kWrite; }
+
+  //! Returns the type of a lead register, which is followed by consecutive registers.
+  inline uint32_t consecutiveLeadCount() const noexcept { return _consecutiveLeadCount; }
+
+  //! Tests whether this operand is Reg/Mem
+  //!
+  //! Reg/Mem operands can use either register or memory.
+  inline bool isRm() const noexcept { return hasOpFlag(OpRWFlags::kRegMem); }
+
+  //! Tests whether the operand will be zero extended.
+  inline bool isZExt() const noexcept { return hasOpFlag(OpRWFlags::kZExt); }
+
+  //! \}
+
+  //! \name Memory Flags
+  //! \{
+
+  //! Tests whether this is a fake memory operand, which is only used, because of encoding. Fake memory operands do
+  //! not access any memory, they are only used to encode registers.
+  inline bool isMemFake() const noexcept { return hasOpFlag(OpRWFlags::kMemFake); }
+
+  //! Tests whether the instruction's memory BASE register is used.
+  inline bool isMemBaseUsed() const noexcept { return hasOpFlag(OpRWFlags::kMemBaseRW); }
+  //! Tests whether the instruction reads from its BASE registers.
+  inline bool isMemBaseRead() const noexcept { return hasOpFlag(OpRWFlags::kMemBaseRead); }
+  //! Tests whether the instruction writes to its BASE registers.
+  inline bool isMemBaseWrite() const noexcept { return hasOpFlag(OpRWFlags::kMemBaseWrite); }
+  //! Tests whether the instruction reads and writes from/to its BASE registers.
+  inline bool isMemBaseReadWrite() const noexcept { return (_opFlags & OpRWFlags::kMemBaseRW) == OpRWFlags::kMemBaseRW; }
+  //! Tests whether the instruction only reads from its BASE registers.
+  inline bool isMemBaseReadOnly() const noexcept { return (_opFlags & OpRWFlags::kMemBaseRW) == OpRWFlags::kMemBaseRead; }
+  //! Tests whether the instruction only writes to its BASE registers.
+  inline bool isMemBaseWriteOnly() const noexcept { return (_opFlags & OpRWFlags::kMemBaseRW) == OpRWFlags::kMemBaseWrite; }
+
+  //! Tests whether the instruction modifies the BASE register before it uses it to calculate the target address.
+  inline bool isMemBasePreModify() const noexcept { return hasOpFlag(OpRWFlags::kMemBasePreModify); }
+  //! Tests whether the instruction modifies the BASE register after it uses it to calculate the target address.
+  inline bool isMemBasePostModify() const noexcept { return hasOpFlag(OpRWFlags::kMemBasePostModify); }
+
+  //! Tests whether the instruction's memory INDEX register is used.
+  inline bool isMemIndexUsed() const noexcept { return hasOpFlag(OpRWFlags::kMemIndexRW); }
+  //! Tests whether the instruction reads the INDEX registers.
+  inline bool isMemIndexRead() const noexcept { return hasOpFlag(OpRWFlags::kMemIndexRead); }
+  //! Tests whether the instruction writes to its INDEX registers.
+  inline bool isMemIndexWrite() const noexcept { return hasOpFlag(OpRWFlags::kMemIndexWrite); }
+  //! Tests whether the instruction reads and writes from/to its INDEX registers.
+  inline bool isMemIndexReadWrite() const noexcept { return (_opFlags & OpRWFlags::kMemIndexRW) == OpRWFlags::kMemIndexRW; }
+  //! Tests whether the instruction only reads from its INDEX registers.
+  inline bool isMemIndexReadOnly() const noexcept { return (_opFlags & OpRWFlags::kMemIndexRW) == OpRWFlags::kMemIndexRead; }
+  //! Tests whether the instruction only writes to its INDEX registers.
+  inline bool isMemIndexWriteOnly() const noexcept { return (_opFlags & OpRWFlags::kMemIndexRW) == OpRWFlags::kMemIndexWrite; }
+
+  //! \}
+
+  //! \name Physical Register ID
+  //! \{
+
+  //! Returns a physical id of the register that is fixed for this operand.
+  //!
+  //! Returns \ref BaseReg::kIdBad if any register can be used.
+  inline uint32_t physId() const noexcept { return _physId; }
+  //! Tests whether \ref physId() would return a valid physical register id.
+  inline bool hasPhysId() const noexcept { return _physId != BaseReg::kIdBad; }
+  //! Sets physical register id, which would be fixed for this operand.
+  inline void setPhysId(uint32_t physId) noexcept { _physId = uint8_t(physId); }
+
+  //! \}
+
+  //! \name Reg/Mem Information
+  //! \{
+
+  //! Returns Reg/Mem size of the operand.
+  inline uint32_t rmSize() const noexcept { return _rmSize; }
+  //! Sets Reg/Mem size of the operand.
+  inline void setRmSize(uint32_t rmSize) noexcept { _rmSize = uint8_t(rmSize); }
+
+  //! \}
+
+  //! \name Read & Write Masks
+  //! \{
+
+  //! Returns read mask.
+  inline uint64_t readByteMask() const noexcept { return _readByteMask; }
+  //! Returns write mask.
+  inline uint64_t writeByteMask() const noexcept { return _writeByteMask; }
+  //! Returns extend mask.
+  inline uint64_t extendByteMask() const noexcept { return _extendByteMask; }
+
+  //! Sets read mask.
+  inline void setReadByteMask(uint64_t mask) noexcept { _readByteMask = mask; }
+  //! Sets write mask.
+  inline void setWriteByteMask(uint64_t mask) noexcept { _writeByteMask = mask; }
+  //! Sets externd mask.
+  inline void setExtendByteMask(uint64_t mask) noexcept { _extendByteMask = mask; }
+
+  //! \}
+};
+
+//! Flags used by \ref InstRWInfo.
+enum class InstRWFlags : uint32_t {
+  //! No flags.
+  kNone = 0x00000000u,
+
+  //! Describes a move operation.
+  //!
+  //! This flag is used by RA to eliminate moves that are guaranteed to be moves only.
+  kMovOp = 0x00000001u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(InstRWFlags)
+
+//! Read/Write information of an instruction.
+struct InstRWInfo {
+  //! \name Members
+  //! \{
+
+  //! Instruction flags (there are no flags at the moment, this field is reserved).
+  InstRWFlags _instFlags;
+  //! CPU flags read.
+  CpuRWFlags _readFlags;
+  //! CPU flags written.
+  CpuRWFlags _writeFlags;
+  //! Count of operands.
+  uint8_t _opCount;
+  //! CPU feature required for replacing register operand with memory operand.
+  uint8_t _rmFeature;
+  //! Reserved for future use.
+  uint8_t _reserved[18];
+  //! Read/Write onfo of extra register (rep{} or kz{}).
+  OpRWInfo _extraReg;
+  //! Read/Write info of instruction operands.
+  OpRWInfo _operands[Globals::kMaxOpCount];
+
+  //! \}
+
+  //! \name Commons
+  //! \{
+
+  //! Resets this RW information to all zeros.
+  inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+
+  //! \}
+
+  //! \name Instruction Flags
+  //! \{
+
+  //! Returns flags associated with the instruction, see \ref InstRWFlags.
+  inline InstRWFlags instFlags() const noexcept { return _instFlags; }
+
+  //! Tests whether the instruction flags contain `flag`.
+  inline bool hasInstFlag(InstRWFlags flag) const noexcept { return Support::test(_instFlags, flag); }
+
+  //! Tests whether the instruction flags contain \ref InstRWFlags::kMovOp.
+  inline bool isMovOp() const noexcept { return hasInstFlag(InstRWFlags::kMovOp); }
+
+  //! \}
+
+  //! \name CPU Flags Information
+  //! \{
+
+  //! Returns a mask of CPU flags read.
+  inline CpuRWFlags readFlags() const noexcept { return _readFlags; }
+  //! Returns a mask of CPU flags written.
+  inline CpuRWFlags writeFlags() const noexcept { return _writeFlags; }
+
+  //! \}
+
+  //! \name Reg/Mem Information
+  //! \{
+
+  //! Returns the CPU feature required to replace a register operand with memory operand. If the returned feature is
+  //! zero (none) then this instruction either doesn't provide memory operand combination or there is no extra CPU
+  //! feature required.
+  //!
+  //! X86 Specific
+  //! ------------
+  //!
+  //! Some AVX+ instructions may require extra features for replacing registers with memory operands, for example
+  //! VPSLLDQ instruction only supports `vpslldq reg, reg, imm` combination on AVX/AVX2 capable CPUs and requires
+  //! AVX-512 for `vpslldq reg, mem, imm` combination.
+  inline uint32_t rmFeature() const noexcept { return _rmFeature; }
+
+  //! \}
+
+  //! \name Operand Read/Write Information
+  //! \{
+
+  //! Returns RW information of extra register operand (extraReg).
+  inline const OpRWInfo& extraReg() const noexcept { return _extraReg; }
+
+  //! Returns RW information of all instruction's operands.
+  inline const OpRWInfo* operands() const noexcept { return _operands; }
+
+  //! Returns RW information of the operand at the given `index`.
+  inline const OpRWInfo& operand(size_t index) const noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxOpCount);
+    return _operands[index];
+  }
+
+  //! Returns the number of operands this instruction has.
+  inline uint32_t opCount() const noexcept { return _opCount; }
+
+  //! \}
+};
+
+//! Validation flags that can be used with \ref InstAPI::validate().
+enum class ValidationFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+  //! Allow virtual registers in the instruction.
+  kEnableVirtRegs = 0x01u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(ValidationFlags)
+
+//! Instruction API.
+namespace InstAPI {
+
+#ifndef ASMJIT_NO_TEXT
+//! Appends the name of the instruction specified by `instId` and `instOptions` into the `output` string.
+//!
+//! \note Instruction options would only affect instruction prefix & suffix, other options would be ignored.
+//! If `instOptions` is zero then only raw instruction name (without any additional text) will be appended.
+ASMJIT_API Error instIdToString(Arch arch, InstId instId, String& output) noexcept;
+
+//! Parses an instruction name in the given string `s`. Length is specified by `len` argument, which can be
+//! `SIZE_MAX` if `s` is known to be null terminated.
+//!
+//! Returns the parsed instruction id or \ref BaseInst::kIdNone if no such instruction exists.
+ASMJIT_API InstId stringToInstId(Arch arch, const char* s, size_t len) noexcept;
+#endif // !ASMJIT_NO_TEXT
+
+#ifndef ASMJIT_NO_VALIDATION
+//! Validates the given instruction considering the given `validationFlags`.
+ASMJIT_API Error validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags = ValidationFlags::kNone) noexcept;
+#endif // !ASMJIT_NO_VALIDATION
+
+#ifndef ASMJIT_NO_INTROSPECTION
+//! Gets Read/Write information of the given instruction.
+ASMJIT_API Error queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept;
+
+//! Gets CPU features required by the given instruction.
+ASMJIT_API Error queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept;
+#endif // !ASMJIT_NO_INTROSPECTION
+
+} // {InstAPI}
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_INST_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/jitallocator.cpp b/lib/lepton/asmjit/core/jitallocator.cpp
new file mode 100644
index 0000000000..19fbe4b233
--- /dev/null
+++ b/lib/lepton/asmjit/core/jitallocator.cpp
@@ -0,0 +1,1242 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_JIT
+
+#include "../core/archtraits.h"
+#include "../core/jitallocator.h"
+#include "../core/osutils_p.h"
+#include "../core/support.h"
+#include "../core/virtmem.h"
+#include "../core/zone.h"
+#include "../core/zonelist.h"
+#include "../core/zonetree.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// JitAllocator - Constants
+// ========================
+
+//! Number of pools to use when `JitAllocatorOptions::kUseMultiplePools` is set.
+//!
+//! Each pool increases granularity twice to make memory management more
+//! efficient. Ideal number of pools appears to be 3 to 4 as it distributes
+//! small and large functions properly.
+static constexpr uint32_t kJitAllocatorMultiPoolCount = 3;
+
+//! Minimum granularity (and the default granularity for pool #0).
+static constexpr uint32_t kJitAllocatorBaseGranularity = 64;
+
+//! Maximum block size (32MB).
+static constexpr uint32_t kJitAllocatorMaxBlockSize = 1024 * 1024 * 32;
+
+// JitAllocator - Fill Pattern
+// ===========================
+
+static inline uint32_t JitAllocator_defaultFillPattern() noexcept {
+  // X86 and X86_64 - 4x 'int3' instruction.
+  if (ASMJIT_ARCH_X86)
+    return 0xCCCCCCCCu;
+
+  // Unknown...
+  return 0u;
+}
+
+// JitAllocator - BitVectorRangeIterator
+// =====================================
+
+template<typename T, uint32_t B>
+class BitVectorRangeIterator {
+public:
+  const T* _ptr;
+  size_t _idx;
+  size_t _end;
+  T _bitWord;
+
+  enum : uint32_t { kBitWordSize = Support::bitSizeOf<T>() };
+  enum : T { kXorMask = B == 0 ? Support::allOnes<T>() : T(0) };
+
+  ASMJIT_FORCE_INLINE BitVectorRangeIterator(const T* data, size_t numBitWords) noexcept {
+    init(data, numBitWords);
+  }
+
+  ASMJIT_FORCE_INLINE BitVectorRangeIterator(const T* data, size_t numBitWords, size_t start, size_t end) noexcept {
+    init(data, numBitWords, start, end);
+  }
+
+  ASMJIT_FORCE_INLINE void init(const T* data, size_t numBitWords) noexcept {
+    init(data, numBitWords, 0, numBitWords * kBitWordSize);
+  }
+
+  ASMJIT_FORCE_INLINE void init(const T* data, size_t numBitWords, size_t start, size_t end) noexcept {
+    ASMJIT_ASSERT(numBitWords >= (end + kBitWordSize - 1) / kBitWordSize);
+    DebugUtils::unused(numBitWords);
+
+    size_t idx = Support::alignDown(start, kBitWordSize);
+    const T* ptr = data + (idx / kBitWordSize);
+
+    T bitWord = 0;
+    if (idx < end)
+      bitWord = (*ptr ^ kXorMask) & (Support::allOnes<T>() << (start % kBitWordSize));
+
+    _ptr = ptr;
+    _idx = idx;
+    _end = end;
+    _bitWord = bitWord;
+  }
+
+  ASMJIT_FORCE_INLINE bool nextRange(size_t* rangeStart, size_t* rangeEnd, size_t rangeHint = std::numeric_limits<size_t>::max()) noexcept {
+    // Skip all empty BitWords.
+    while (_bitWord == 0) {
+      _idx += kBitWordSize;
+      if (_idx >= _end)
+        return false;
+      _bitWord = (*++_ptr) ^ kXorMask;
+    }
+
+    size_t i = Support::ctz(_bitWord);
+
+    *rangeStart = _idx + i;
+    _bitWord = ~(_bitWord ^ ~(Support::allOnes<T>() << i));
+
+    if (_bitWord == 0) {
+      *rangeEnd = Support::min(_idx + kBitWordSize, _end);
+      while (*rangeEnd - *rangeStart < rangeHint) {
+        _idx += kBitWordSize;
+        if (_idx >= _end)
+          break;
+
+        _bitWord = (*++_ptr) ^ kXorMask;
+        if (_bitWord != Support::allOnes<T>()) {
+          size_t j = Support::ctz(~_bitWord);
+          *rangeEnd = Support::min(_idx + j, _end);
+          _bitWord = _bitWord ^ ~(Support::allOnes<T>() << j);
+          break;
+        }
+
+        *rangeEnd = Support::min(_idx + kBitWordSize, _end);
+        _bitWord = 0;
+        continue;
+      }
+
+      return true;
+    }
+    else {
+      size_t j = Support::ctz(_bitWord);
+      *rangeEnd = Support::min(_idx + j, _end);
+
+      _bitWord = ~(_bitWord ^ ~(Support::allOnes<T>() << j));
+      return true;
+    }
+  }
+};
+
+// JitAllocator - Pool
+// ===================
+
+class JitAllocatorBlock;
+
+class JitAllocatorPool {
+public:
+  ASMJIT_NONCOPYABLE(JitAllocatorPool)
+
+  //! Double linked list of blocks.
+  ZoneList<JitAllocatorBlock> blocks;
+  //! Where to start looking first.
+  JitAllocatorBlock* cursor;
+
+  //! Count of blocks.
+  uint32_t blockCount;
+  //! Allocation granularity.
+  uint16_t granularity;
+  //! Log2(granularity).
+  uint8_t granularityLog2;
+  //! Count of empty blocks (either 0 or 1 as we won't keep more blocks empty).
+  uint8_t emptyBlockCount;
+
+  //! Number of bits reserved across all blocks.
+  size_t totalAreaSize;
+  //! Number of bits used across all blocks.
+  size_t totalAreaUsed;
+  //! Overhead of all blocks (in bytes).
+  size_t totalOverheadBytes;
+
+  inline JitAllocatorPool(uint32_t granularity) noexcept
+    : blocks(),
+      cursor(nullptr),
+      blockCount(0),
+      granularity(uint16_t(granularity)),
+      granularityLog2(uint8_t(Support::ctz(granularity))),
+      emptyBlockCount(0),
+      totalAreaSize(0),
+      totalAreaUsed(0),
+      totalOverheadBytes(0) {}
+
+  inline void reset() noexcept {
+    blocks.reset();
+    cursor = nullptr;
+    blockCount = 0;
+    totalAreaSize = 0;
+    totalAreaUsed = 0;
+    totalOverheadBytes = 0;
+  }
+
+  inline size_t byteSizeFromAreaSize(uint32_t areaSize) const noexcept { return size_t(areaSize) * granularity; }
+  inline uint32_t areaSizeFromByteSize(size_t size) const noexcept { return uint32_t((size + granularity - 1) >> granularityLog2); }
+
+  inline size_t bitWordCountFromAreaSize(uint32_t areaSize) const noexcept {
+    using namespace Support;
+    return alignUp<size_t>(areaSize, kBitWordSizeInBits) / kBitWordSizeInBits;
+  }
+};
+
+// JitAllocator - Block
+// ====================
+
+class JitAllocatorBlock : public ZoneTreeNodeT<JitAllocatorBlock>,
+                          public ZoneListNode<JitAllocatorBlock> {
+public:
+  ASMJIT_NONCOPYABLE(JitAllocatorBlock)
+
+  enum Flags : uint32_t {
+    //! Block is empty.
+    kFlagEmpty = 0x00000001u,
+    //! Block is dirty (largestUnusedArea, searchStart, searchEnd).
+    kFlagDirty = 0x00000002u,
+    //! Block is dual-mapped.
+    kFlagDualMapped = 0x00000004u
+  };
+
+  //! Link to the pool that owns this block.
+  JitAllocatorPool* _pool;
+  //! Virtual memory mapping - either single mapping (both pointers equal) or
+  //! dual mapping, where one pointer is Read+Execute and the second Read+Write.
+  VirtMem::DualMapping _mapping;
+  //! Virtual memory size (block size) [bytes].
+  size_t _blockSize;
+
+  //! Block flags.
+  uint32_t _flags;
+  //! Size of the whole block area (bit-vector size).
+  uint32_t _areaSize;
+  //! Used area (number of bits in bit-vector used).
+  uint32_t _areaUsed;
+  //! The largest unused continuous area in the bit-vector (or `areaSize` to initiate rescan).
+  uint32_t _largestUnusedArea;
+  //! Start of a search range (for unused bits).
+  uint32_t _searchStart;
+  //! End of a search range (for unused bits).
+  uint32_t _searchEnd;
+
+  //! Used bit-vector (0 = unused, 1 = used).
+  Support::BitWord* _usedBitVector;
+  //! Stop bit-vector (0 = don't care, 1 = stop).
+  Support::BitWord* _stopBitVector;
+
+  inline JitAllocatorBlock(
+    JitAllocatorPool* pool,
+    VirtMem::DualMapping mapping,
+    size_t blockSize,
+    uint32_t blockFlags,
+    Support::BitWord* usedBitVector,
+    Support::BitWord* stopBitVector,
+    uint32_t areaSize) noexcept
+    : ZoneTreeNodeT(),
+      _pool(pool),
+      _mapping(mapping),
+      _blockSize(blockSize),
+      _flags(blockFlags),
+      _areaSize(areaSize),
+      _areaUsed(0),
+      _largestUnusedArea(areaSize),
+      _searchStart(0),
+      _searchEnd(areaSize),
+      _usedBitVector(usedBitVector),
+      _stopBitVector(stopBitVector) {}
+
+  inline JitAllocatorPool* pool() const noexcept { return _pool; }
+
+  inline uint8_t* rxPtr() const noexcept { return static_cast<uint8_t*>(_mapping.rx); }
+  inline uint8_t* rwPtr() const noexcept { return static_cast<uint8_t*>(_mapping.rw); }
+
+  inline bool hasFlag(uint32_t f) const noexcept { return (_flags & f) != 0; }
+  inline void addFlags(uint32_t f) noexcept { _flags |= f; }
+  inline void clearFlags(uint32_t f) noexcept { _flags &= ~f; }
+
+  inline bool isDirty() const noexcept { return hasFlag(kFlagDirty); }
+  inline void makeDirty() noexcept { addFlags(kFlagDirty); }
+
+  inline size_t blockSize() const noexcept { return _blockSize; }
+
+  inline uint32_t areaSize() const noexcept { return _areaSize; }
+  inline uint32_t areaUsed() const noexcept { return _areaUsed; }
+  inline uint32_t areaAvailable() const noexcept { return _areaSize - _areaUsed; }
+  inline uint32_t largestUnusedArea() const noexcept { return _largestUnusedArea; }
+
+  inline void decreaseUsedArea(uint32_t value) noexcept {
+    _areaUsed -= value;
+    _pool->totalAreaUsed -= value;
+  }
+
+  inline void markAllocatedArea(uint32_t allocatedAreaStart, uint32_t allocatedAreaEnd) noexcept {
+    uint32_t allocatedAreaSize = allocatedAreaEnd - allocatedAreaStart;
+
+    // Mark the newly allocated space as occupied and also the sentinel.
+    Support::bitVectorFill(_usedBitVector, allocatedAreaStart, allocatedAreaSize);
+    Support::bitVectorSetBit(_stopBitVector, allocatedAreaEnd - 1, true);
+
+    // Update search region and statistics.
+    _pool->totalAreaUsed += allocatedAreaSize;
+    _areaUsed += allocatedAreaSize;
+
+    if (areaAvailable() == 0) {
+      _searchStart = _areaSize;
+      _searchEnd = 0;
+      _largestUnusedArea = 0;
+      clearFlags(kFlagDirty);
+    }
+    else {
+      if (_searchStart == allocatedAreaStart)
+        _searchStart = allocatedAreaEnd;
+      if (_searchEnd == allocatedAreaEnd)
+        _searchEnd = allocatedAreaStart;
+      addFlags(kFlagDirty);
+    }
+  }
+
+  inline void markReleasedArea(uint32_t releasedAreaStart, uint32_t releasedAreaEnd) noexcept {
+    uint32_t releasedAreaSize = releasedAreaEnd - releasedAreaStart;
+
+    // Update the search region and statistics.
+    _pool->totalAreaUsed -= releasedAreaSize;
+    _areaUsed -= releasedAreaSize;
+    _searchStart = Support::min(_searchStart, releasedAreaStart);
+    _searchEnd = Support::max(_searchEnd, releasedAreaEnd);
+
+    // Unmark occupied bits and also the sentinel.
+    Support::bitVectorClear(_usedBitVector, releasedAreaStart, releasedAreaSize);
+    Support::bitVectorSetBit(_stopBitVector, releasedAreaEnd - 1, false);
+
+    if (areaUsed() == 0) {
+      _searchStart = 0;
+      _searchEnd = _areaSize;
+      _largestUnusedArea = _areaSize;
+      addFlags(kFlagEmpty);
+      clearFlags(kFlagDirty);
+    }
+    else {
+      addFlags(kFlagDirty);
+    }
+  }
+
+  inline void markShrunkArea(uint32_t shrunkAreaStart, uint32_t shrunkAreaEnd) noexcept {
+    uint32_t shrunkAreaSize = shrunkAreaEnd - shrunkAreaStart;
+
+    // Shrunk area cannot start at zero as it would mean that we have shrunk the first
+    // block to zero bytes, which is not allowed as such block must be released instead.
+    ASMJIT_ASSERT(shrunkAreaStart != 0);
+    ASMJIT_ASSERT(shrunkAreaSize != 0);
+
+    // Update the search region and statistics.
+    _pool->totalAreaUsed -= shrunkAreaSize;
+    _areaUsed -= shrunkAreaSize;
+    _searchStart = Support::min(_searchStart, shrunkAreaStart);
+    _searchEnd = Support::max(_searchEnd, shrunkAreaEnd);
+
+    // Unmark the released space and move the sentinel.
+    Support::bitVectorClear(_usedBitVector, shrunkAreaStart, shrunkAreaSize);
+    Support::bitVectorSetBit(_stopBitVector, shrunkAreaEnd - 1, false);
+    Support::bitVectorSetBit(_stopBitVector, shrunkAreaStart - 1, true);
+
+    addFlags(kFlagDirty);
+  }
+
+  // RBTree default CMP uses '<' and '>' operators.
+  inline bool operator<(const JitAllocatorBlock& other) const noexcept { return rxPtr() < other.rxPtr(); }
+  inline bool operator>(const JitAllocatorBlock& other) const noexcept { return rxPtr() > other.rxPtr(); }
+
+  // Special implementation for querying blocks by `key`, which must be in `[BlockPtr, BlockPtr + BlockSize)` range.
+  inline bool operator<(const uint8_t* key) const noexcept { return rxPtr() + _blockSize <= key; }
+  inline bool operator>(const uint8_t* key) const noexcept { return rxPtr() > key; }
+};
+
+// JitAllocator - PrivateImpl
+// ==========================
+
+class JitAllocatorPrivateImpl : public JitAllocator::Impl {
+public:
+  //! Lock for thread safety.
+  mutable Lock lock;
+  //! System page size (also a minimum block size).
+  uint32_t pageSize;
+  //! Number of active allocations.
+  size_t allocationCount;
+
+  //! Blocks from all pools in RBTree.
+  ZoneTree<JitAllocatorBlock> tree;
+  //! Allocator pools.
+  JitAllocatorPool* pools;
+  //! Number of allocator pools.
+  size_t poolCount;
+
+  inline JitAllocatorPrivateImpl(JitAllocatorPool* pools, size_t poolCount) noexcept
+    : JitAllocator::Impl {},
+      pageSize(0),
+      allocationCount(0),
+      pools(pools),
+      poolCount(poolCount) {}
+  inline ~JitAllocatorPrivateImpl() noexcept {}
+};
+
+static const JitAllocator::Impl JitAllocatorImpl_none {};
+static const JitAllocator::CreateParams JitAllocatorParams_none {};
+
+// JitAllocator - Utilities
+// ========================
+
+static inline JitAllocatorPrivateImpl* JitAllocatorImpl_new(const JitAllocator::CreateParams* params) noexcept {
+  VirtMem::Info vmInfo = VirtMem::info();
+
+  if (!params)
+    params = &JitAllocatorParams_none;
+
+  JitAllocatorOptions options = params->options;
+  uint32_t blockSize = params->blockSize;
+  uint32_t granularity = params->granularity;
+  uint32_t fillPattern = params->fillPattern;
+
+  // Setup pool count to [1..3].
+  size_t poolCount = 1;
+  if (Support::test(options, JitAllocatorOptions::kUseMultiplePools))
+    poolCount = kJitAllocatorMultiPoolCount;;
+
+  // Setup block size [64kB..256MB].
+  if (blockSize < 64 * 1024 || blockSize > 256 * 1024 * 1024 || !Support::isPowerOf2(blockSize))
+    blockSize = vmInfo.pageGranularity;
+
+  // Setup granularity [64..256].
+  if (granularity < 64 || granularity > 256 || !Support::isPowerOf2(granularity))
+    granularity = kJitAllocatorBaseGranularity;
+
+  // Setup fill-pattern.
+  if (uint32_t(options & JitAllocatorOptions::kCustomFillPattern) == 0)
+    fillPattern = JitAllocator_defaultFillPattern();
+
+  size_t size = sizeof(JitAllocatorPrivateImpl) + sizeof(JitAllocatorPool) * poolCount;
+  void* p = ::malloc(size);
+  if (ASMJIT_UNLIKELY(!p))
+    return nullptr;
+
+  JitAllocatorPool* pools = reinterpret_cast<JitAllocatorPool*>((uint8_t*)p + sizeof(JitAllocatorPrivateImpl));
+  JitAllocatorPrivateImpl* impl = new(p) JitAllocatorPrivateImpl(pools, poolCount);
+
+  impl->options = options;
+  impl->blockSize = blockSize;
+  impl->granularity = granularity;
+  impl->fillPattern = fillPattern;
+  impl->pageSize = vmInfo.pageSize;
+
+  for (size_t poolId = 0; poolId < poolCount; poolId++)
+    new(&pools[poolId]) JitAllocatorPool(granularity << poolId);
+
+  return impl;
+}
+
+static inline void JitAllocatorImpl_destroy(JitAllocatorPrivateImpl* impl) noexcept {
+  impl->~JitAllocatorPrivateImpl();
+  ::free(impl);
+}
+
+static inline size_t JitAllocatorImpl_sizeToPoolId(const JitAllocatorPrivateImpl* impl, size_t size) noexcept {
+  size_t poolId = impl->poolCount - 1;
+  size_t granularity = size_t(impl->granularity) << poolId;
+
+  while (poolId) {
+    if (Support::alignUp(size, granularity) == size)
+      break;
+    poolId--;
+    granularity >>= 1;
+  }
+
+  return poolId;
+}
+
+static inline size_t JitAllocatorImpl_bitVectorSizeToByteSize(uint32_t areaSize) noexcept {
+  using Support::kBitWordSizeInBits;
+  return ((areaSize + kBitWordSizeInBits - 1u) / kBitWordSizeInBits) * sizeof(Support::BitWord);
+}
+
+static inline size_t JitAllocatorImpl_calculateIdealBlockSize(JitAllocatorPrivateImpl* impl, JitAllocatorPool* pool, size_t allocationSize) noexcept {
+  JitAllocatorBlock* last = pool->blocks.last();
+  size_t blockSize = last ? last->blockSize() : size_t(impl->blockSize);
+
+  if (blockSize < kJitAllocatorMaxBlockSize)
+    blockSize *= 2u;
+
+  if (allocationSize > blockSize) {
+    blockSize = Support::alignUp(allocationSize, impl->blockSize);
+    if (ASMJIT_UNLIKELY(blockSize < allocationSize))
+      return 0; // Overflown.
+  }
+
+  return blockSize;
+}
+
+ASMJIT_FAVOR_SPEED static void JitAllocatorImpl_fillPattern(void* mem, uint32_t pattern, size_t sizeInBytes) noexcept {
+  size_t n = sizeInBytes / 4u;
+  uint32_t* p = static_cast<uint32_t*>(mem);
+
+  for (size_t i = 0; i < n; i++)
+    p[i] = pattern;
+}
+
+// Allocate a new `JitAllocatorBlock` for the given `blockSize`.
+//
+// NOTE: The block doesn't have `kFlagEmpty` flag set, because the new block
+// is only allocated when it's actually needed, so it would be cleared anyway.
+static JitAllocatorBlock* JitAllocatorImpl_newBlock(JitAllocatorPrivateImpl* impl, JitAllocatorPool* pool, size_t blockSize) noexcept {
+  using Support::BitWord;
+  using Support::kBitWordSizeInBits;
+
+  uint32_t areaSize = uint32_t((blockSize + pool->granularity - 1) >> pool->granularityLog2);
+  uint32_t numBitWords = (areaSize + kBitWordSizeInBits - 1u) / kBitWordSizeInBits;
+
+  JitAllocatorBlock* block = static_cast<JitAllocatorBlock*>(::malloc(sizeof(JitAllocatorBlock)));
+  BitWord* bitWords = nullptr;
+  VirtMem::DualMapping virtMem {};
+  Error err = kErrorOutOfMemory;
+
+  if (block != nullptr)
+    bitWords = static_cast<BitWord*>(::malloc(size_t(numBitWords) * 2 * sizeof(BitWord)));
+
+  uint32_t blockFlags = 0;
+  if (bitWords != nullptr) {
+    if (Support::test(impl->options, JitAllocatorOptions::kUseDualMapping)) {
+      err = VirtMem::allocDualMapping(&virtMem, blockSize, VirtMem::MemoryFlags::kAccessRWX);
+      blockFlags |= JitAllocatorBlock::kFlagDualMapped;
+    }
+    else {
+      err = VirtMem::alloc(&virtMem.rx, blockSize, VirtMem::MemoryFlags::kAccessRWX);
+      virtMem.rw = virtMem.rx;
+    }
+  }
+
+  // Out of memory.
+  if (ASMJIT_UNLIKELY(!block || !bitWords || err != kErrorOk)) {
+    if (bitWords)
+      ::free(bitWords);
+
+    if (block)
+      ::free(block);
+
+    return nullptr;
+  }
+
+  // Fill the memory if the secure mode is enabled.
+  if (Support::test(impl->options, JitAllocatorOptions::kFillUnusedMemory)) {
+    VirtMem::ProtectJitReadWriteScope scope(virtMem.rw, blockSize);
+    JitAllocatorImpl_fillPattern(virtMem.rw, impl->fillPattern, blockSize);
+  }
+
+  memset(bitWords, 0, size_t(numBitWords) * 2 * sizeof(BitWord));
+  return new(block) JitAllocatorBlock(pool, virtMem, blockSize, blockFlags, bitWords, bitWords + numBitWords, areaSize);
+}
+
+static void JitAllocatorImpl_deleteBlock(JitAllocatorPrivateImpl* impl, JitAllocatorBlock* block) noexcept {
+  DebugUtils::unused(impl);
+
+  if (block->hasFlag(JitAllocatorBlock::kFlagDualMapped))
+    VirtMem::releaseDualMapping(&block->_mapping, block->blockSize());
+  else
+    VirtMem::release(block->rxPtr(), block->blockSize());
+
+  ::free(block->_usedBitVector);
+  ::free(block);
+}
+
+static void JitAllocatorImpl_insertBlock(JitAllocatorPrivateImpl* impl, JitAllocatorBlock* block) noexcept {
+  JitAllocatorPool* pool = block->pool();
+
+  if (!pool->cursor)
+    pool->cursor = block;
+
+  // Add to RBTree and List.
+  impl->tree.insert(block);
+  pool->blocks.append(block);
+
+  // Update statistics.
+  pool->blockCount++;
+  pool->totalAreaSize += block->areaSize();
+  pool->totalOverheadBytes += sizeof(JitAllocatorBlock) + JitAllocatorImpl_bitVectorSizeToByteSize(block->areaSize()) * 2u;
+}
+
+static void JitAllocatorImpl_removeBlock(JitAllocatorPrivateImpl* impl, JitAllocatorBlock* block) noexcept {
+  JitAllocatorPool* pool = block->pool();
+
+  // Remove from RBTree and List.
+  if (pool->cursor == block)
+    pool->cursor = block->hasPrev() ? block->prev() : block->next();
+
+  impl->tree.remove(block);
+  pool->blocks.unlink(block);
+
+  // Update statistics.
+  pool->blockCount--;
+  pool->totalAreaSize -= block->areaSize();
+  pool->totalOverheadBytes -= sizeof(JitAllocatorBlock) + JitAllocatorImpl_bitVectorSizeToByteSize(block->areaSize()) * 2u;
+}
+
+static void JitAllocatorImpl_wipeOutBlock(JitAllocatorPrivateImpl* impl, JitAllocatorBlock* block) noexcept {
+  if (block->hasFlag(JitAllocatorBlock::kFlagEmpty))
+    return;
+
+  JitAllocatorPool* pool = block->pool();
+  uint32_t areaSize = block->areaSize();
+  uint32_t granularity = pool->granularity;
+  size_t numBitWords = pool->bitWordCountFromAreaSize(areaSize);
+
+  VirtMem::protectJitMemory(VirtMem::ProtectJitAccess::kReadWrite);
+  if (Support::test(impl->options, JitAllocatorOptions::kFillUnusedMemory)) {
+    uint8_t* rwPtr = block->rwPtr();
+    BitVectorRangeIterator<Support::BitWord, 0> it(block->_usedBitVector, pool->bitWordCountFromAreaSize(block->areaSize()));
+
+    size_t rangeStart;
+    size_t rangeEnd;
+
+    while (it.nextRange(&rangeStart, &rangeEnd)) {
+      uint8_t* spanPtr = rwPtr + rangeStart * granularity;
+      size_t spanSize = (rangeEnd - rangeStart) * granularity;
+
+      JitAllocatorImpl_fillPattern(spanPtr, impl->fillPattern, spanSize);
+      VirtMem::flushInstructionCache(spanPtr, spanSize);
+    }
+  }
+  VirtMem::protectJitMemory(VirtMem::ProtectJitAccess::kReadExecute);
+
+  memset(block->_usedBitVector, 0, size_t(numBitWords) * sizeof(Support::BitWord));
+  memset(block->_stopBitVector, 0, size_t(numBitWords) * sizeof(Support::BitWord));
+
+  block->_areaUsed = 0;
+  block->_largestUnusedArea = areaSize;
+  block->_searchStart = 0;
+  block->_searchEnd = areaSize;
+  block->addFlags(JitAllocatorBlock::kFlagEmpty);
+  block->clearFlags(JitAllocatorBlock::kFlagDirty);
+}
+
+// JitAllocator - Construction & Destruction
+// =========================================
+
+JitAllocator::JitAllocator(const CreateParams* params) noexcept {
+  _impl = JitAllocatorImpl_new(params);
+  if (ASMJIT_UNLIKELY(!_impl))
+    _impl = const_cast<JitAllocator::Impl*>(&JitAllocatorImpl_none);
+}
+
+JitAllocator::~JitAllocator() noexcept {
+  if (_impl == &JitAllocatorImpl_none)
+    return;
+
+  reset(ResetPolicy::kHard);
+  JitAllocatorImpl_destroy(static_cast<JitAllocatorPrivateImpl*>(_impl));
+}
+
+// JitAllocator - Reset
+// ====================
+
+void JitAllocator::reset(ResetPolicy resetPolicy) noexcept {
+  if (_impl == &JitAllocatorImpl_none)
+    return;
+
+  JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
+  impl->tree.reset();
+  size_t poolCount = impl->poolCount;
+
+  for (size_t poolId = 0; poolId < poolCount; poolId++) {
+    JitAllocatorPool& pool = impl->pools[poolId];
+    JitAllocatorBlock* block = pool.blocks.first();
+
+    JitAllocatorBlock* blockToKeep = nullptr;
+    if (resetPolicy != ResetPolicy::kHard && uint32_t(impl->options & JitAllocatorOptions::kImmediateRelease) == 0) {
+      blockToKeep = block;
+      block = block->next();
+    }
+
+    while (block) {
+      JitAllocatorBlock* next = block->next();
+      JitAllocatorImpl_deleteBlock(impl, block);
+      block = next;
+    }
+
+    pool.reset();
+
+    if (blockToKeep) {
+      blockToKeep->_listNodes[0] = nullptr;
+      blockToKeep->_listNodes[1] = nullptr;
+      JitAllocatorImpl_wipeOutBlock(impl, blockToKeep);
+      JitAllocatorImpl_insertBlock(impl, blockToKeep);
+      pool.emptyBlockCount = 1;
+    }
+  }
+}
+
+// JitAllocator - Statistics
+// =========================
+
+JitAllocator::Statistics JitAllocator::statistics() const noexcept {
+  Statistics statistics;
+  statistics.reset();
+
+  if (ASMJIT_LIKELY(_impl != &JitAllocatorImpl_none)) {
+    JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
+    LockGuard guard(impl->lock);
+
+    size_t poolCount = impl->poolCount;
+    for (size_t poolId = 0; poolId < poolCount; poolId++) {
+      const JitAllocatorPool& pool = impl->pools[poolId];
+      statistics._blockCount   += size_t(pool.blockCount);
+      statistics._reservedSize += size_t(pool.totalAreaSize) * pool.granularity;
+      statistics._usedSize     += size_t(pool.totalAreaUsed) * pool.granularity;
+      statistics._overheadSize += size_t(pool.totalOverheadBytes);
+    }
+
+    statistics._allocationCount = impl->allocationCount;
+  }
+
+  return statistics;
+}
+
+// JitAllocator - Alloc & Release
+// ==============================
+
+Error JitAllocator::alloc(void** rxPtrOut, void** rwPtrOut, size_t size) noexcept {
+  if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
+  constexpr uint32_t kNoIndex = std::numeric_limits<uint32_t>::max();
+
+  *rxPtrOut = nullptr;
+  *rwPtrOut = nullptr;
+
+  // Align to the minimum granularity by default.
+  size = Support::alignUp<size_t>(size, impl->granularity);
+  if (ASMJIT_UNLIKELY(size == 0))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  if (ASMJIT_UNLIKELY(size > std::numeric_limits<uint32_t>::max() / 2))
+    return DebugUtils::errored(kErrorTooLarge);
+
+  LockGuard guard(impl->lock);
+  JitAllocatorPool* pool = &impl->pools[JitAllocatorImpl_sizeToPoolId(impl, size)];
+
+  uint32_t areaIndex = kNoIndex;
+  uint32_t areaSize = uint32_t(pool->areaSizeFromByteSize(size));
+
+  // Try to find the requested memory area in existing blocks.
+  JitAllocatorBlock* block = pool->blocks.first();
+  if (block) {
+    JitAllocatorBlock* initial = block;
+    do {
+      JitAllocatorBlock* next = block->hasNext() ? block->next() : pool->blocks.first();
+      if (block->areaAvailable() >= areaSize) {
+        if (block->isDirty() || block->largestUnusedArea() >= areaSize) {
+          BitVectorRangeIterator<Support::BitWord, 0> it(block->_usedBitVector, pool->bitWordCountFromAreaSize(block->areaSize()), block->_searchStart, block->_searchEnd);
+
+          size_t rangeStart = 0;
+          size_t rangeEnd = block->areaSize();
+
+          size_t searchStart = SIZE_MAX;
+          size_t largestArea = 0;
+
+          while (it.nextRange(&rangeStart, &rangeEnd, areaSize)) {
+            size_t rangeSize = rangeEnd - rangeStart;
+            if (rangeSize >= areaSize) {
+              areaIndex = uint32_t(rangeStart);
+              break;
+            }
+
+            searchStart = Support::min(searchStart, rangeStart);
+            largestArea = Support::max(largestArea, rangeSize);
+          }
+
+          if (areaIndex != kNoIndex)
+            break;
+
+          if (searchStart != SIZE_MAX) {
+            // Because we have iterated over the entire block, we can now mark the
+            // largest unused area that can be used to cache the next traversal.
+            size_t searchEnd = rangeEnd;
+
+            block->_searchStart = uint32_t(searchStart);
+            block->_searchEnd = uint32_t(searchEnd);
+            block->_largestUnusedArea = uint32_t(largestArea);
+            block->clearFlags(JitAllocatorBlock::kFlagDirty);
+          }
+        }
+      }
+
+      block = next;
+    } while (block != initial);
+  }
+
+  // Allocate a new block if there is no region of a required width.
+  if (areaIndex == kNoIndex) {
+    size_t blockSize = JitAllocatorImpl_calculateIdealBlockSize(impl, pool, size);
+    if (ASMJIT_UNLIKELY(!blockSize))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    block = JitAllocatorImpl_newBlock(impl, pool, blockSize);
+    areaIndex = 0;
+
+    if (ASMJIT_UNLIKELY(!block))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    JitAllocatorImpl_insertBlock(impl, block);
+    block->_searchStart = areaSize;
+    block->_largestUnusedArea = block->areaSize() - areaSize;
+  }
+  else if (block->hasFlag(JitAllocatorBlock::kFlagEmpty)) {
+    pool->emptyBlockCount--;
+    block->clearFlags(JitAllocatorBlock::kFlagEmpty);
+  }
+
+  // Update statistics.
+  impl->allocationCount++;
+  block->markAllocatedArea(areaIndex, areaIndex + areaSize);
+
+  // Return a pointer to the allocated memory.
+  size_t offset = pool->byteSizeFromAreaSize(areaIndex);
+  ASMJIT_ASSERT(offset <= block->blockSize() - size);
+
+  *rxPtrOut = block->rxPtr() + offset;
+  *rwPtrOut = block->rwPtr() + offset;
+  return kErrorOk;
+}
+
+Error JitAllocator::release(void* rxPtr) noexcept {
+  if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (ASMJIT_UNLIKELY(!rxPtr))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
+  LockGuard guard(impl->lock);
+
+  JitAllocatorBlock* block = impl->tree.get(static_cast<uint8_t*>(rxPtr));
+  if (ASMJIT_UNLIKELY(!block))
+    return DebugUtils::errored(kErrorInvalidState);
+
+  // Offset relative to the start of the block.
+  JitAllocatorPool* pool = block->pool();
+  size_t offset = (size_t)((uint8_t*)rxPtr - block->rxPtr());
+
+  // The first bit representing the allocated area and its size.
+  uint32_t areaIndex = uint32_t(offset >> pool->granularityLog2);
+  uint32_t areaEnd = uint32_t(Support::bitVectorIndexOf(block->_stopBitVector, areaIndex, true)) + 1;
+  uint32_t areaSize = areaEnd - areaIndex;
+
+  impl->allocationCount--;
+  block->markReleasedArea(areaIndex, areaEnd);
+
+  // Fill the released memory if the secure mode is enabled.
+  if (Support::test(impl->options, JitAllocatorOptions::kFillUnusedMemory)) {
+    uint8_t* spanPtr = block->rwPtr() + areaIndex * pool->granularity;
+    size_t spanSize = areaSize * pool->granularity;
+
+    VirtMem::ProtectJitReadWriteScope scope(spanPtr, spanSize);
+    JitAllocatorImpl_fillPattern(spanPtr, impl->fillPattern, spanSize);
+  }
+
+  // Release the whole block if it became empty.
+  if (block->areaUsed() == 0) {
+    if (pool->emptyBlockCount || Support::test(impl->options, JitAllocatorOptions::kImmediateRelease)) {
+      JitAllocatorImpl_removeBlock(impl, block);
+      JitAllocatorImpl_deleteBlock(impl, block);
+    }
+    else {
+      pool->emptyBlockCount++;
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error JitAllocator::shrink(void* rxPtr, size_t newSize) noexcept {
+  if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
+    return DebugUtils::errored(kErrorNotInitialized);
+
+  if (ASMJIT_UNLIKELY(!rxPtr))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  if (ASMJIT_UNLIKELY(newSize == 0))
+    return release(rxPtr);
+
+  JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
+  LockGuard guard(impl->lock);
+  JitAllocatorBlock* block = impl->tree.get(static_cast<uint8_t*>(rxPtr));
+
+  if (ASMJIT_UNLIKELY(!block))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  // Offset relative to the start of the block.
+  JitAllocatorPool* pool = block->pool();
+  size_t offset = (size_t)((uint8_t*)rxPtr - block->rxPtr());
+
+  // The first bit representing the allocated area and its size.
+  uint32_t areaStart = uint32_t(offset >> pool->granularityLog2);
+  uint32_t areaEnd = uint32_t(Support::bitVectorIndexOf(block->_stopBitVector, areaStart, true)) + 1;
+
+  uint32_t areaPrevSize = areaEnd - areaStart;
+  uint32_t areaShrunkSize = pool->areaSizeFromByteSize(newSize);
+
+  if (ASMJIT_UNLIKELY(areaShrunkSize > areaPrevSize))
+    return DebugUtils::errored(kErrorInvalidState);
+
+  uint32_t areaDiff = areaPrevSize - areaShrunkSize;
+  if (areaDiff) {
+    block->markShrunkArea(areaStart + areaShrunkSize, areaEnd);
+
+    // Fill released memory if the secure mode is enabled.
+    if (Support::test(impl->options, JitAllocatorOptions::kFillUnusedMemory))
+      JitAllocatorImpl_fillPattern(block->rwPtr() + (areaStart + areaShrunkSize) * pool->granularity, fillPattern(), areaDiff * pool->granularity);
+  }
+
+  return kErrorOk;
+}
+
+// JitAllocator - Tests
+// ====================
+
+#if defined(ASMJIT_TEST)
+// A pseudo random number generator based on a paper by Sebastiano Vigna:
+//   http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
+class Random {
+public:
+  // Constants suggested as `23/18/5`.
+  enum Steps : uint32_t {
+    kStep1_SHL = 23,
+    kStep2_SHR = 18,
+    kStep3_SHR = 5
+  };
+
+  inline explicit Random(uint64_t seed = 0) noexcept { reset(seed); }
+  inline Random(const Random& other) noexcept = default;
+
+  inline void reset(uint64_t seed = 0) noexcept {
+    // The number is arbitrary, it means nothing.
+    constexpr uint64_t kZeroSeed = 0x1F0A2BE71D163FA0u;
+
+    // Generate the state data by using splitmix64.
+    for (uint32_t i = 0; i < 2; i++) {
+      seed += 0x9E3779B97F4A7C15u;
+      uint64_t x = seed;
+      x = (x ^ (x >> 30)) * 0xBF58476D1CE4E5B9u;
+      x = (x ^ (x >> 27)) * 0x94D049BB133111EBu;
+      x = (x ^ (x >> 31));
+      _state[i] = x != 0 ? x : kZeroSeed;
+    }
+  }
+
+  inline uint32_t nextUInt32() noexcept {
+    return uint32_t(nextUInt64() >> 32);
+  }
+
+  inline uint64_t nextUInt64() noexcept {
+    uint64_t x = _state[0];
+    uint64_t y = _state[1];
+
+    x ^= x << kStep1_SHL;
+    y ^= y >> kStep3_SHR;
+    x ^= x >> kStep2_SHR;
+    x ^= y;
+
+    _state[0] = y;
+    _state[1] = x;
+    return x + y;
+  }
+
+  uint64_t _state[2];
+};
+
+// Helper class to verify that JitAllocator doesn't return addresses that overlap.
+class JitAllocatorWrapper {
+public:
+  // Address to a memory region of a given size.
+  class Range {
+  public:
+    inline Range(uint8_t* addr, size_t size) noexcept
+      : addr(addr),
+        size(size) {}
+    uint8_t* addr;
+    size_t size;
+  };
+
+  // Based on JitAllocator::Block, serves our purpose well...
+  class Record : public ZoneTreeNodeT<Record>,
+                 public Range {
+  public:
+    inline Record(uint8_t* addr, size_t size)
+      : ZoneTreeNodeT<Record>(),
+        Range(addr, size) {}
+
+    inline bool operator<(const Record& other) const noexcept { return addr < other.addr; }
+    inline bool operator>(const Record& other) const noexcept { return addr > other.addr; }
+
+    inline bool operator<(const uint8_t* key) const noexcept { return addr + size <= key; }
+    inline bool operator>(const uint8_t* key) const noexcept { return addr > key; }
+  };
+
+  Zone _zone;
+  ZoneAllocator _heap;
+  ZoneTree<Record> _records;
+  JitAllocator _allocator;
+
+  explicit JitAllocatorWrapper(const JitAllocator::CreateParams* params) noexcept
+    : _zone(1024 * 1024),
+      _heap(&_zone),
+      _allocator(params) {}
+
+  void _insert(void* p_, size_t size) noexcept {
+    uint8_t* p = static_cast<uint8_t*>(p_);
+    uint8_t* pEnd = p + size - 1;
+
+    Record* record;
+
+    record = _records.get(p);
+    if (record)
+      EXPECT(record == nullptr, "Address [%p:%p] collides with a newly allocated [%p:%p]\n", record->addr, record->addr + record->size, p, p + size);
+
+    record = _records.get(pEnd);
+    if (record)
+      EXPECT(record == nullptr, "Address [%p:%p] collides with a newly allocated [%p:%p]\n", record->addr, record->addr + record->size, p, p + size);
+
+    record = _heap.newT<Record>(p, size);
+    EXPECT(record != nullptr, "Out of memory, cannot allocate 'Record'");
+
+    _records.insert(record);
+  }
+
+  void _remove(void* p) noexcept {
+    Record* record = _records.get(static_cast<uint8_t*>(p));
+    EXPECT(record != nullptr, "Address [%p] doesn't exist\n", p);
+
+    _records.remove(record);
+    _heap.release(record, sizeof(Record));
+  }
+
+  void* alloc(size_t size) noexcept {
+    void* rxPtr;
+    void* rwPtr;
+
+    Error err = _allocator.alloc(&rxPtr, &rwPtr, size);
+    EXPECT(err == kErrorOk, "JitAllocator failed to allocate %zu bytes\n", size);
+
+    _insert(rxPtr, size);
+    return rxPtr;
+  }
+
+  void release(void* p) noexcept {
+    _remove(p);
+    EXPECT(_allocator.release(p) == kErrorOk, "JitAllocator failed to release '%p'\n", p);
+  }
+
+  void shrink(void* p, size_t newSize) noexcept {
+    Record* record = _records.get(static_cast<uint8_t*>(p));
+    EXPECT(record != nullptr, "Address [%p] doesn't exist\n", p);
+
+    if (!newSize)
+      return release(p);
+
+    Error err = _allocator.shrink(p, newSize);
+    EXPECT(err == kErrorOk, "JitAllocator failed to shrink %p to %zu bytes\n", p, newSize);
+
+    record->size = newSize;
+  }
+};
+
+static void JitAllocatorTest_shuffle(void** ptrArray, size_t count, Random& prng) noexcept {
+  for (size_t i = 0; i < count; ++i)
+    std::swap(ptrArray[i], ptrArray[size_t(prng.nextUInt32() % count)]);
+}
+
+static void JitAllocatorTest_usage(JitAllocator& allocator) noexcept {
+  JitAllocator::Statistics stats = allocator.statistics();
+  INFO("    Block Count       : %9llu [Blocks]"        , (unsigned long long)(stats.blockCount()));
+  INFO("    Reserved (VirtMem): %9llu [Bytes]"         , (unsigned long long)(stats.reservedSize()));
+  INFO("    Used     (VirtMem): %9llu [Bytes] (%.1f%%)", (unsigned long long)(stats.usedSize()), stats.usedSizeAsPercent());
+  INFO("    Overhead (HeapMem): %9llu [Bytes] (%.1f%%)", (unsigned long long)(stats.overheadSize()), stats.overheadSizeAsPercent());
+}
+
+template<typename T, size_t kPatternSize, bool Bit>
+static void BitVectorRangeIterator_testRandom(Random& rnd, size_t count) noexcept {
+  for (size_t i = 0; i < count; i++) {
+    T in[kPatternSize];
+    T out[kPatternSize];
+
+    for (size_t j = 0; j < kPatternSize; j++) {
+      in[j] = T(uint64_t(rnd.nextUInt32() & 0xFFu) * 0x0101010101010101);
+      out[j] = Bit == 0 ? Support::allOnes<T>() : T(0);
+    }
+
+    {
+      BitVectorRangeIterator<T, Bit> it(in, kPatternSize);
+      size_t rangeStart, rangeEnd;
+      while (it.nextRange(&rangeStart, &rangeEnd)) {
+        if (Bit)
+          Support::bitVectorFill(out, rangeStart, rangeEnd - rangeStart);
+        else
+          Support::bitVectorClear(out, rangeStart, rangeEnd - rangeStart);
+      }
+    }
+
+    for (size_t j = 0; j < kPatternSize; j++) {
+      EXPECT(in[j] == out[j], "Invalid pattern detected at [%zu] (%llX != %llX)", j, (unsigned long long)in[j], (unsigned long long)out[j]);
+    }
+  }
+}
+
+UNIT(jit_allocator) {
+  size_t kCount = BrokenAPI::hasArg("--quick") ? 1000 : 100000;
+
+  struct TestParams {
+    const char* name;
+    JitAllocatorOptions options;
+    uint32_t blockSize;
+    uint32_t granularity;
+  };
+
+  static TestParams testParams[] = {
+    { "Default", JitAllocatorOptions::kNone, 0, 0 },
+    { "16MB blocks", JitAllocatorOptions::kNone, 16 * 1024 * 1024, 0 },
+    { "256B granularity", JitAllocatorOptions::kNone, 0, 256 },
+    { "kUseDualMapping", JitAllocatorOptions::kUseDualMapping, 0, 0 },
+    { "kUseMultiplePools", JitAllocatorOptions::kUseMultiplePools, 0, 0 },
+    { "kFillUnusedMemory", JitAllocatorOptions::kFillUnusedMemory, 0, 0 },
+    { "kImmediateRelease", JitAllocatorOptions::kImmediateRelease, 0, 0 },
+    { "kUseDualMapping | kFillUnusedMemory", JitAllocatorOptions::kUseDualMapping | JitAllocatorOptions::kFillUnusedMemory, 0, 0 }
+  };
+
+  INFO("BitVectorRangeIterator<uint32_t>");
+  {
+    Random rnd;
+    BitVectorRangeIterator_testRandom<uint32_t, 64, 0>(rnd, kCount);
+  }
+
+  INFO("BitVectorRangeIterator<uint64_t>");
+  {
+    Random rnd;
+    BitVectorRangeIterator_testRandom<uint64_t, 64, 0>(rnd, kCount);
+  }
+
+  for (uint32_t testId = 0; testId < ASMJIT_ARRAY_SIZE(testParams); testId++) {
+    INFO("JitAllocator(%s)", testParams[testId].name);
+
+    JitAllocator::CreateParams params {};
+    params.options = testParams[testId].options;
+    params.blockSize = testParams[testId].blockSize;
+    params.granularity = testParams[testId].granularity;
+
+    size_t fixedBlockSize = 256;
+
+    JitAllocatorWrapper wrapper(&params);
+    Random prng(100);
+
+    size_t i;
+
+    INFO("  Memory alloc/release test - %d allocations", kCount);
+
+    void** ptrArray = (void**)::malloc(sizeof(void*) * size_t(kCount));
+    EXPECT(ptrArray != nullptr,
+          "Couldn't allocate '%u' bytes for pointer-array", unsigned(sizeof(void*) * size_t(kCount)));
+
+    // Random blocks tests...
+    INFO("  Allocating random blocks...");
+    for (i = 0; i < kCount; i++)
+      ptrArray[i] = wrapper.alloc((prng.nextUInt32() % 1024) + 8);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Releasing all allocated blocks from the beginning...");
+    for (i = 0; i < kCount; i++)
+      wrapper.release(ptrArray[i]);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Allocating random blocks again...", kCount);
+    for (i = 0; i < kCount; i++)
+      ptrArray[i] = wrapper.alloc((prng.nextUInt32() % 1024) + 8);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Shuffling allocated blocks...");
+    JitAllocatorTest_shuffle(ptrArray, unsigned(kCount), prng);
+
+    INFO("  Releasing 50%% of allocated blocks...");
+    for (i = 0; i < kCount / 2; i++)
+      wrapper.release(ptrArray[i]);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Allocating 50%% more blocks again...");
+    for (i = 0; i < kCount / 2; i++)
+      ptrArray[i] = wrapper.alloc((prng.nextUInt32() % 1024) + 8);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Releasing all allocated blocks from the end...");
+    for (i = 0; i < kCount; i++)
+      wrapper.release(ptrArray[kCount - i - 1]);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    // Fixed blocks tests...
+    INFO("  Allocating %zuB blocks...", fixedBlockSize);
+    for (i = 0; i < kCount / 2; i++)
+      ptrArray[i] = wrapper.alloc(fixedBlockSize);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Shrinking each %zuB block to 1 byte", fixedBlockSize);
+    for (i = 0; i < kCount / 2; i++)
+      wrapper.shrink(ptrArray[i], 1);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Allocating more 64B blocks...", 64);
+    for (i = kCount / 2; i < kCount; i++)
+      ptrArray[i] = wrapper.alloc(64);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Releasing all blocks from the beginning...");
+    for (i = 0; i < kCount; i++)
+      wrapper.release(ptrArray[i]);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Allocating %zuB blocks...", fixedBlockSize);
+    for (i = 0; i < kCount; i++)
+      ptrArray[i] = wrapper.alloc(fixedBlockSize);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Shuffling allocated blocks...");
+    JitAllocatorTest_shuffle(ptrArray, unsigned(kCount), prng);
+
+    INFO("  Releasing 50%% of allocated blocks...");
+    for (i = 0; i < kCount / 2; i++)
+      wrapper.release(ptrArray[i]);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Allocating 50%% more %zuB blocks again...", fixedBlockSize);
+    for (i = 0; i < kCount / 2; i++)
+      ptrArray[i] = wrapper.alloc(fixedBlockSize);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    INFO("  Releasing all allocated blocks from the end...");
+    for (i = 0; i < kCount; i++)
+      wrapper.release(ptrArray[kCount - i - 1]);
+    JitAllocatorTest_usage(wrapper._allocator);
+
+    ::free(ptrArray);
+  }
+}
+#endif
+
+ASMJIT_END_NAMESPACE
+
+#endif
diff --git a/lib/lepton/asmjit/core/jitallocator.h b/lib/lepton/asmjit/core/jitallocator.h
new file mode 100644
index 0000000000..e8fe69519e
--- /dev/null
+++ b/lib/lepton/asmjit/core/jitallocator.h
@@ -0,0 +1,261 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_JITALLOCATOR_H_INCLUDED
+#define ASMJIT_CORE_JITALLOCATOR_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_JIT
+
+#include "../core/globals.h"
+#include "../core/virtmem.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_virtual_memory
+//! \{
+
+//! Options used by \ref JitAllocator.
+enum class JitAllocatorOptions : uint32_t {
+  //! No options.
+  kNone = 0,
+
+  //! Enables the use of an anonymous memory-mapped memory that is mapped into two buffers having a different pointer.
+  //! The first buffer has read and execute permissions and the second buffer has read+write permissions.
+  //!
+  //! See \ref VirtMem::allocDualMapping() for more details about this feature.
+  kUseDualMapping = 0x00000001u,
+
+  //! Enables the use of multiple pools with increasing granularity instead of a single pool. This flag would enable
+  //! 3 internal pools in total having 64, 128, and 256 bytes granularity.
+  //!
+  //! This feature is only recommended for users that generate a lot of code and would like to minimize the overhead
+  //! of `JitAllocator` itself by having blocks of different allocation granularities. Using this feature only for
+  //! few allocations won't pay off as the allocator may need to create more blocks initially before it can take the
+  //! advantage of variable block granularity.
+  kUseMultiplePools = 0x00000002u,
+
+  //! Always fill reserved memory by a fill-pattern.
+  //!
+  //! Causes a new block to be cleared by the fill pattern and freshly released memory to be cleared before making
+  //! it ready for another use.
+  kFillUnusedMemory = 0x00000004u,
+
+  //! When this flag is set the allocator would immediately release unused blocks during `release()` or `reset()`.
+  //! When this flag is not set the allocator would keep one empty block in each pool to prevent excessive virtual
+  //! memory allocations and deallocations in border cases, which involve constantly allocating and deallocating a
+  //! single block caused by repetitive calling `alloc()` and `release()` when the allocator has either no blocks
+  //! or have all blocks fully occupied.
+  kImmediateRelease = 0x00000008u,
+
+  //! Use a custom fill pattern, must be combined with `kFlagFillUnusedMemory`.
+  kCustomFillPattern = 0x10000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(JitAllocatorOptions)
+
+//! A simple implementation of memory manager that uses `asmjit::VirtMem`
+//! functions to manage virtual memory for JIT compiled code.
+//!
+//! Implementation notes:
+//!
+//! - Granularity of allocated blocks is different than granularity for a typical C malloc. In addition, the allocator
+//!   can use several memory pools having a different granularity to minimize the maintenance overhead. Multiple pools
+//!   feature requires `kFlagUseMultiplePools` flag to be set.
+//!
+//! - The allocator doesn't store any information in executable memory, instead, the implementation uses two
+//!   bit-vectors to manage allocated memory of each allocator-block. The first bit-vector called 'used' is used to
+//!   track used memory (where each bit represents memory size defined by granularity) and the second bit vector called
+//!   'stop' is used as a sentinel to mark where the allocated area ends.
+//!
+//! - Internally, the allocator also uses RB tree to keep track of all blocks across all pools. Each inserted block is
+//!   added to the tree so it can be matched fast during `release()` and `shrink()`.
+class JitAllocator {
+public:
+  ASMJIT_NONCOPYABLE(JitAllocator)
+
+  struct Impl {
+    //! Allocator options.
+    JitAllocatorOptions options;
+    //! Base block size (0 if the allocator is not initialized).
+    uint32_t blockSize;
+    //! Base granularity (0 if the allocator is not initialized).
+    uint32_t granularity;
+    //! A pattern that is used to fill unused memory if secure mode is enabled.
+    uint32_t fillPattern;
+  };
+
+  //! Allocator implementation (private).
+  Impl* _impl;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Parameters that can be passed to `JitAllocator` constructor.
+  //!
+  //! Use it like this:
+  //!
+  //! ```
+  //! // Zero initialize (zero means the default value) and change what you need.
+  //! JitAllocator::CreateParams params {};
+  //! params.blockSize = 1024 * 1024;
+  //!
+  //! // Create the allocator.
+  //! JitAllocator allocator(&params);
+  //! ```
+  struct CreateParams {
+    //! Allocator options.
+    //!
+    //! No options are used by default.
+    JitAllocatorOptions options = JitAllocatorOptions::kNone;
+
+    //! Base size of a single block in bytes (default 64kB).
+    //!
+    //! \remarks Block size must be equal to or greater than page size and must be power of 2. If the input is not
+    //! valid then the default block size will be used instead.
+    uint32_t blockSize = 0;
+
+    //! Base granularity (and also natural alignment) of allocations in bytes (default 64).
+    //!
+    //! Since the `JitAllocator` uses bit-arrays to mark used memory the granularity also specifies how many bytes
+    //! correspond to a single bit in such bit-array. Higher granularity means more waste of virtual memory (as it
+    //! increases the natural alignment), but smaller bit-arrays as less bits would be required per a single block.
+    uint32_t granularity = 0;
+
+    //! Patter to use to fill unused memory.
+    //!
+    //! Only used if \ref JitAllocatorOptions::kCustomFillPattern is set.
+    uint32_t fillPattern = 0;
+
+    // Reset the content of `CreateParams`.
+    inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
+  };
+
+  //! Creates a `JitAllocator` instance.
+  ASMJIT_API explicit JitAllocator(const CreateParams* params = nullptr) noexcept;
+  //! Destroys the `JitAllocator` instance and release all blocks held.
+  ASMJIT_API ~JitAllocator() noexcept;
+
+  inline bool isInitialized() const noexcept { return _impl->blockSize == 0; }
+
+  //! Free all allocated memory - makes all pointers returned by `alloc()` invalid.
+  //!
+  //! \remarks This function is not thread-safe as it's designed to be used when nobody else is using allocator.
+  //! The reason is that there is no point of calling `reset()` when the allocator is still in use.
+  ASMJIT_API void reset(ResetPolicy resetPolicy = ResetPolicy::kSoft) noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns allocator options, see `Flags`.
+  inline JitAllocatorOptions options() const noexcept { return _impl->options; }
+  //! Tests whether the allocator has the given `option` set.
+  inline bool hasOption(JitAllocatorOptions option) const noexcept { return uint32_t(_impl->options & option) != 0; }
+
+  //! Returns a base block size (a minimum size of block that the allocator would allocate).
+  inline uint32_t blockSize() const noexcept { return _impl->blockSize; }
+  //! Returns granularity of the allocator.
+  inline uint32_t granularity() const noexcept { return _impl->granularity; }
+  //! Returns pattern that is used to fill unused memory if `kFlagUseFillPattern` is set.
+  inline uint32_t fillPattern() const noexcept { return _impl->fillPattern; }
+
+  //! \}
+
+  //! \name Alloc & Release
+  //! \{
+
+  //! Allocates a new memory block of the requested `size`.
+  //!
+  //! When the function is successful it stores two pointers in `rxPtrOut` and `rwPtrOut`. The pointers will be
+  //! different only if `kOptionUseDualMapping` was used to setup the allocator (in that case the `rxPtrOut` would
+  //! point to a Read+Execute region and `rwPtrOut` would point to a Read+Write region of the same memory-mapped block.
+  ASMJIT_API Error alloc(void** rxPtrOut, void** rwPtrOut, size_t size) noexcept;
+
+  //! Releases a memory block returned by `alloc()`.
+  //!
+  //! \remarks This function is thread-safe.
+  ASMJIT_API Error release(void* rxPtr) noexcept;
+
+  //! Frees extra memory allocated with `rxPtr` by shrinking it to the given `newSize`.
+  //!
+  //! \remarks This function is thread-safe.
+  ASMJIT_API Error shrink(void* rxPtr, size_t newSize) noexcept;
+
+  //! Queries information about an allocated memory block that contains the given `rxPtr`.
+  //!
+  //! The function returns `kErrorOk` when `rxPtr` is matched and fills `rxPtrOut`, `rwPtrOut`, and `sizeOut` output
+  //! arguments. The returned `rxPtrOut` and `rwPtrOut` pointers point to the beginning of the block, and `sizeOut`
+  //! describes the total amount of bytes this allocation uses - `sizeOut` will always be aligned to the allocation
+  //! granularity, so for example if an allocation was 1 byte and the size granularity is 64, the returned `sizeOut`
+  //! will be 64 bytes, because that's what the allocator sees.
+  ASMJIT_API Error query(void* rxPtr, void** rxPtrOut, void** rwPtrOut, size_t* sizeOut) const noexcept;
+
+  //! \}
+
+  //! \name Statistics
+  //! \{
+
+  //! Statistics about `JitAllocator`.
+  struct Statistics {
+    //! Number of blocks `JitAllocator` maintains.
+    size_t _blockCount;
+    //! Number of active allocations.
+    size_t _allocationCount;
+    //! How many bytes are currently used / allocated.
+    size_t _usedSize;
+    //! How many bytes are currently reserved by the allocator.
+    size_t _reservedSize;
+    //! Allocation overhead (in bytes) required to maintain all blocks.
+    size_t _overheadSize;
+
+    inline void reset() noexcept {
+      _blockCount = 0;
+      _usedSize = 0;
+      _reservedSize = 0;
+      _overheadSize = 0;
+    }
+
+    //! Returns count of blocks managed by `JitAllocator` at the moment.
+    inline size_t blockCount() const noexcept { return _blockCount; }
+    //! Returns the number of active allocations.
+    inline size_t allocationCount() const noexcept { return _allocationCount; }
+
+    //! Returns how many bytes are currently used.
+    inline size_t usedSize() const noexcept { return _usedSize; }
+    //! Returns the number of bytes unused by the allocator at the moment.
+    inline size_t unusedSize() const noexcept { return _reservedSize - _usedSize; }
+    //! Returns the total number of bytes bytes reserved by the allocator (sum of sizes of all blocks).
+    inline size_t reservedSize() const noexcept { return _reservedSize; }
+    //! Returns the number of bytes the allocator needs to manage the allocated memory.
+    inline size_t overheadSize() const noexcept { return _overheadSize; }
+
+    inline double usedSizeAsPercent() const noexcept {
+      return (double(usedSize()) / (double(reservedSize()) + 1e-16)) * 100.0;
+    }
+
+    inline double unusedSizeAsPercent() const noexcept {
+      return (double(unusedSize()) / (double(reservedSize()) + 1e-16)) * 100.0;
+    }
+
+    inline double overheadSizeAsPercent() const noexcept {
+      return (double(overheadSize()) / (double(reservedSize()) + 1e-16)) * 100.0;
+    }
+  };
+
+  //! Returns JIT allocator statistics.
+  //!
+  //! \remarks This function is thread-safe.
+  ASMJIT_API Statistics statistics() const noexcept;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif
+#endif
diff --git a/lib/lepton/asmjit/core/jitruntime.cpp b/lib/lepton/asmjit/core/jitruntime.cpp
new file mode 100644
index 0000000000..491c2040fb
--- /dev/null
+++ b/lib/lepton/asmjit/core/jitruntime.cpp
@@ -0,0 +1,80 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_JIT
+
+#include "../core/cpuinfo.h"
+#include "../core/jitruntime.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+JitRuntime::JitRuntime(const JitAllocator::CreateParams* params) noexcept
+  : _allocator(params) {
+  _environment = Environment::host();
+  _environment.setObjectFormat(ObjectFormat::kJIT);
+}
+
+JitRuntime::~JitRuntime() noexcept {}
+
+Error JitRuntime::_add(void** dst, CodeHolder* code) noexcept {
+  *dst = nullptr;
+
+  ASMJIT_PROPAGATE(code->flatten());
+  ASMJIT_PROPAGATE(code->resolveUnresolvedLinks());
+
+  size_t estimatedCodeSize = code->codeSize();
+  if (ASMJIT_UNLIKELY(estimatedCodeSize == 0))
+    return DebugUtils::errored(kErrorNoCodeGenerated);
+
+  uint8_t* rx;
+  uint8_t* rw;
+  ASMJIT_PROPAGATE(_allocator.alloc((void**)&rx, (void**)&rw, estimatedCodeSize));
+
+  // Relocate the code.
+  Error err = code->relocateToBase(uintptr_t((void*)rx));
+  if (ASMJIT_UNLIKELY(err)) {
+    _allocator.release(rx);
+    return err;
+  }
+
+  // Recalculate the final code size and shrink the memory we allocated for it
+  // in case that some relocations didn't require records in an address table.
+  size_t codeSize = code->codeSize();
+  if (codeSize < estimatedCodeSize)
+    _allocator.shrink(rx, codeSize);
+
+  if (codeSize < estimatedCodeSize)
+    _allocator.shrink(rx, codeSize);
+
+  {
+    VirtMem::ProtectJitReadWriteScope rwScope(rx, codeSize);
+
+    for (Section* section : code->_sections) {
+      size_t offset = size_t(section->offset());
+      size_t bufferSize = size_t(section->bufferSize());
+      size_t virtualSize = size_t(section->virtualSize());
+
+      ASMJIT_ASSERT(offset + bufferSize <= codeSize);
+      memcpy(rw + offset, section->data(), bufferSize);
+
+      if (virtualSize > bufferSize) {
+        ASMJIT_ASSERT(offset + virtualSize <= codeSize);
+        memset(rw + offset + bufferSize, 0, virtualSize - bufferSize);
+      }
+    }
+  }
+
+  *dst = rx;
+  return kErrorOk;
+}
+
+Error JitRuntime::_release(void* p) noexcept {
+  return _allocator.release(p);
+}
+
+ASMJIT_END_NAMESPACE
+
+#endif
diff --git a/lib/lepton/asmjit/core/jitruntime.h b/lib/lepton/asmjit/core/jitruntime.h
new file mode 100644
index 0000000000..6f35e21277
--- /dev/null
+++ b/lib/lepton/asmjit/core/jitruntime.h
@@ -0,0 +1,89 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_JITRUNTIME_H_INCLUDED
+#define ASMJIT_CORE_JITRUNTIME_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_JIT
+
+#include "../core/codeholder.h"
+#include "../core/jitallocator.h"
+#include "../core/target.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+class CodeHolder;
+
+//! \addtogroup asmjit_virtual_memory
+//! \{
+
+//! JIT execution runtime is a special `Target` that is designed to store and
+//! execute the generated code.
+class ASMJIT_VIRTAPI JitRuntime : public Target {
+public:
+  ASMJIT_NONCOPYABLE(JitRuntime)
+
+  //! Virtual memory allocator.
+  JitAllocator _allocator;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a `JitRuntime` instance.
+  ASMJIT_API explicit JitRuntime(const JitAllocator::CreateParams* params = nullptr) noexcept;
+  //! Destroys the `JitRuntime` instance.
+  ASMJIT_API virtual ~JitRuntime() noexcept;
+
+  inline void reset(ResetPolicy resetPolicy = ResetPolicy::kSoft) noexcept {
+    _allocator.reset(resetPolicy);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the associated `JitAllocator`.
+  inline JitAllocator* allocator() const noexcept { return const_cast<JitAllocator*>(&_allocator); }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  // NOTE: To allow passing function pointers to `add()` and `release()` the
+  // virtual methods are prefixed with `_` and called from templates instead.
+
+  //! Allocates memory needed for a code stored in the `CodeHolder` and relocates the code to the pointer allocated.
+  //!
+  //! The beginning of the memory allocated for the function is returned in `dst`. If failed `Error` code is returned
+  //! and `dst` is explicitly set to `nullptr`  (this means that you don't have to set it to null before calling `add()`).
+  template<typename Func>
+  inline Error add(Func* dst, CodeHolder* code) noexcept {
+    return _add(Support::ptr_cast_impl<void**, Func*>(dst), code);
+  }
+
+  //! Releases `p` which was obtained by calling `add()`.
+  template<typename Func>
+  inline Error release(Func p) noexcept {
+    return _release(Support::ptr_cast_impl<void*, Func>(p));
+  }
+
+  //! Type-unsafe version of `add()`.
+  ASMJIT_API virtual Error _add(void** dst, CodeHolder* code) noexcept;
+
+  //! Type-unsafe version of `release()`.
+  ASMJIT_API virtual Error _release(void* p) noexcept;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif
+#endif
diff --git a/lib/lepton/asmjit/core/logger.cpp b/lib/lepton/asmjit/core/logger.cpp
new file mode 100644
index 0000000000..4567b3c938
--- /dev/null
+++ b/lib/lepton/asmjit/core/logger.cpp
@@ -0,0 +1,69 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/logger.h"
+#include "../core/string.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// Logger - Implementation
+// =======================
+
+Logger::Logger() noexcept
+  : _options() {}
+Logger::~Logger() noexcept {}
+
+Error Logger::logf(const char* fmt, ...) noexcept {
+  Error err;
+  va_list ap;
+
+  va_start(ap, fmt);
+  err = logv(fmt, ap);
+  va_end(ap);
+
+  return err;
+}
+
+Error Logger::logv(const char* fmt, va_list ap) noexcept {
+  StringTmp<2048> sb;
+  ASMJIT_PROPAGATE(sb.appendVFormat(fmt, ap));
+  return log(sb);
+}
+
+// FileLogger - Implementation
+// ===========================
+
+FileLogger::FileLogger(FILE* file) noexcept
+  : _file(file) {}
+FileLogger::~FileLogger() noexcept {}
+
+Error FileLogger::_log(const char* data, size_t size) noexcept {
+  if (!_file)
+    return kErrorOk;
+
+  if (size == SIZE_MAX)
+    size = strlen(data);
+
+  fwrite(data, 1, size, _file);
+  return kErrorOk;
+}
+
+// StringLogger - Implementation
+// =============================
+
+StringLogger::StringLogger() noexcept {}
+StringLogger::~StringLogger() noexcept {}
+
+Error StringLogger::_log(const char* data, size_t size) noexcept {
+  return _content.append(data, size);
+}
+
+ASMJIT_END_NAMESPACE
+
+#endif
diff --git a/lib/lepton/asmjit/core/logger.h b/lib/lepton/asmjit/core/logger.h
new file mode 100644
index 0000000000..d416a50d84
--- /dev/null
+++ b/lib/lepton/asmjit/core/logger.h
@@ -0,0 +1,198 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_LOGGING_H_INCLUDED
+#define ASMJIT_CORE_LOGGING_H_INCLUDED
+
+#include "../core/inst.h"
+#include "../core/string.h"
+#include "../core/formatter.h"
+
+#ifndef ASMJIT_NO_LOGGING
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_logging
+//! \{
+
+//! Logging interface.
+//!
+//! This class can be inherited and reimplemented to fit into your own logging needs. When reimplementing a logger
+//! use \ref Logger::_log() method to log customize the output.
+//!
+//! There are two `Logger` implementations offered by AsmJit:
+//!   - \ref FileLogger - logs into a `FILE*`.
+//!   - \ref StringLogger - concatenates all logs into a \ref String.
+class ASMJIT_VIRTAPI Logger {
+public:
+  ASMJIT_BASE_CLASS(Logger)
+  ASMJIT_NONCOPYABLE(Logger)
+
+  //! Format options.
+  FormatOptions _options;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a `Logger` instance.
+  ASMJIT_API Logger() noexcept;
+  //! Destroys the `Logger` instance.
+  ASMJIT_API virtual ~Logger() noexcept;
+
+  //! \}
+
+  //! \name Format Options
+  //! \{
+
+  //! Returns \ref FormatOptions of this logger.
+  inline FormatOptions& options() noexcept { return _options; }
+  //! \overload
+  inline const FormatOptions& options() const noexcept { return _options; }
+  //! Sets formatting options of this Logger to `options`.
+  inline void setOptions(const FormatOptions& options) noexcept { _options = options; }
+  //! Resets formatting options of this Logger to defaults.
+  inline void resetOptions() noexcept { _options.reset(); }
+
+  //! Returns formatting flags.
+  inline FormatFlags flags() const noexcept { return _options.flags(); }
+  //! Tests whether the logger has the given `flag` enabled.
+  inline bool hasFlag(FormatFlags flag) const noexcept { return _options.hasFlag(flag); }
+  //! Sets formatting flags to `flags`.
+  inline void setFlags(FormatFlags flags) noexcept { _options.setFlags(flags); }
+  //! Enables the given formatting `flags`.
+  inline void addFlags(FormatFlags flags) noexcept { _options.addFlags(flags); }
+  //! Disables the given formatting `flags`.
+  inline void clearFlags(FormatFlags flags) noexcept { _options.clearFlags(flags); }
+
+  //! Returns indentation of a given indentation `group`.
+  inline uint32_t indentation(FormatIndentationGroup type) const noexcept { return _options.indentation(type); }
+  //! Sets indentation of the given indentation `group` to `n` spaces.
+  inline void setIndentation(FormatIndentationGroup type, uint32_t n) noexcept { _options.setIndentation(type, n); }
+  //! Resets indentation of the given indentation `group` to 0 spaces.
+  inline void resetIndentation(FormatIndentationGroup type) noexcept { _options.resetIndentation(type); }
+
+  //! Returns padding of a given padding `group`.
+  inline size_t padding(FormatPaddingGroup type) const noexcept { return _options.padding(type); }
+  //! Sets padding of a given padding `group` to `n`.
+  inline void setPadding(FormatPaddingGroup type, uint32_t n) noexcept { _options.setPadding(type, n); }
+  //! Resets padding of a given padding `group` to 0, which means that a default will be used.
+  inline void resetPadding(FormatPaddingGroup type) noexcept { _options.resetPadding(type); }
+
+  //! \}
+
+  //! \name Logging Interface
+  //! \{
+
+  //! Logs `str` - must be reimplemented.
+  //!
+  //! The function can accept either a null terminated string if `size` is `SIZE_MAX` or a non-null terminated
+  //! string of the given `size`. The function cannot assume that the data is null terminated and must handle
+  //! non-null terminated inputs.
+  virtual Error _log(const char* data, size_t size) noexcept = 0;
+
+  //! Logs string `str`, which is either null terminated or having size `size`.
+  inline Error log(const char* data, size_t size = SIZE_MAX) noexcept { return _log(data, size); }
+  //! Logs content of a string `str`.
+  inline Error log(const String& str) noexcept { return _log(str.data(), str.size()); }
+
+  //! Formats the message by using `snprintf()` and then passes the formatted string to \ref _log().
+  ASMJIT_API Error logf(const char* fmt, ...) noexcept;
+
+  //! Formats the message by using `vsnprintf()` and then passes the formatted string to \ref _log().
+  ASMJIT_API Error logv(const char* fmt, va_list ap) noexcept;
+
+  //! \}
+};
+
+//! Logger that can log to a `FILE*`.
+class ASMJIT_VIRTAPI FileLogger : public Logger {
+public:
+  ASMJIT_NONCOPYABLE(FileLogger)
+
+  FILE* _file;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `FileLogger` that logs to `FILE*`.
+  ASMJIT_API FileLogger(FILE* file = nullptr) noexcept;
+  //! Destroys the `FileLogger`.
+  ASMJIT_API virtual ~FileLogger() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the logging output stream or null if the logger has no output stream.
+  inline FILE* file() const noexcept { return _file; }
+
+  //! Sets the logging output stream to `stream` or null.
+  //!
+  //! \note If the `file` is null the logging will be disabled. When a logger is attached to `CodeHolder` or any
+  //! emitter the logging API will always be called regardless of the output file. This means that if you really
+  //! want to disable logging at emitter level you must not attach a logger to it.
+  inline void setFile(FILE* file) noexcept { _file = file; }
+
+  //! \}
+
+  ASMJIT_API Error _log(const char* data, size_t size = SIZE_MAX) noexcept override;
+};
+
+//! Logger that stores everything in an internal string buffer.
+class ASMJIT_VIRTAPI StringLogger : public Logger {
+public:
+  ASMJIT_NONCOPYABLE(StringLogger)
+
+  //! Logger data as string.
+  String _content;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Create new `StringLogger`.
+  ASMJIT_API StringLogger() noexcept;
+  //! Destroys the `StringLogger`.
+  ASMJIT_API virtual ~StringLogger() noexcept;
+
+  //! \}
+
+  //! \name Logger Data Accessors
+  //! \{
+
+  //! Returns the content of the logger as \ref String.
+  //!
+  //! It can be moved, if desired.
+  inline String& content() noexcept { return _content; }
+  //! \overload
+  inline const String& content() const noexcept { return _content; }
+
+  //! Returns aggregated logger data as `char*` pointer.
+  //!
+  //! The pointer is owned by `StringLogger`, it can't be modified or freed.
+  inline const char* data() const noexcept { return _content.data(); }
+  //! Returns size of the data returned by `data()`.
+  inline size_t dataSize() const noexcept { return _content.size(); }
+
+  //! \}
+
+  //! \name Logger Data Manipulation
+  //! \{
+
+  //! Clears the accumulated logger data.
+  inline void clear() noexcept { _content.clear(); }
+
+  //! \}
+
+  ASMJIT_API Error _log(const char* data, size_t size = SIZE_MAX) noexcept override;
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif
+
+#endif // ASMJIT_CORE_LOGGER_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/misc_p.h b/lib/lepton/asmjit/core/misc_p.h
new file mode 100644
index 0000000000..5cd934e462
--- /dev/null
+++ b/lib/lepton/asmjit/core/misc_p.h
@@ -0,0 +1,33 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_MISC_P_H_INCLUDED
+#define ASMJIT_CORE_MISC_P_H_INCLUDED
+
+#include "../core/api-config.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_utilities
+//! \{
+
+#define ASMJIT_LOOKUP_TABLE_4(T, I) T((I)), T((I+1)), T((I+2)), T((I+3))
+#define ASMJIT_LOOKUP_TABLE_8(T, I) ASMJIT_LOOKUP_TABLE_4(T, I), ASMJIT_LOOKUP_TABLE_4(T, I + 4)
+#define ASMJIT_LOOKUP_TABLE_16(T, I) ASMJIT_LOOKUP_TABLE_8(T, I), ASMJIT_LOOKUP_TABLE_8(T, I + 8)
+#define ASMJIT_LOOKUP_TABLE_32(T, I) ASMJIT_LOOKUP_TABLE_16(T, I), ASMJIT_LOOKUP_TABLE_16(T, I + 16)
+#define ASMJIT_LOOKUP_TABLE_40(T, I) ASMJIT_LOOKUP_TABLE_16(T, I), ASMJIT_LOOKUP_TABLE_16(T, I + 16), ASMJIT_LOOKUP_TABLE_8(T, I + 32)
+#define ASMJIT_LOOKUP_TABLE_64(T, I) ASMJIT_LOOKUP_TABLE_32(T, I), ASMJIT_LOOKUP_TABLE_32(T, I + 32)
+#define ASMJIT_LOOKUP_TABLE_128(T, I) ASMJIT_LOOKUP_TABLE_64(T, I), ASMJIT_LOOKUP_TABLE_64(T, I + 64)
+#define ASMJIT_LOOKUP_TABLE_256(T, I) ASMJIT_LOOKUP_TABLE_128(T, I), ASMJIT_LOOKUP_TABLE_128(T, I + 128)
+#define ASMJIT_LOOKUP_TABLE_512(T, I) ASMJIT_LOOKUP_TABLE_256(T, I), ASMJIT_LOOKUP_TABLE_256(T, I + 256)
+#define ASMJIT_LOOKUP_TABLE_1024(T, I) ASMJIT_LOOKUP_TABLE_512(T, I), ASMJIT_LOOKUP_TABLE_512(T, I + 512)
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_MISC_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/operand.cpp b/lib/lepton/asmjit/core/operand.cpp
new file mode 100644
index 0000000000..ee026817f8
--- /dev/null
+++ b/lib/lepton/asmjit/core/operand.cpp
@@ -0,0 +1,132 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/operand.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// Operand - Tests
+// ===============
+
+#if defined(ASMJIT_TEST)
+enum class StrongEnumForImmTests : uint32_t {
+  kValue0,
+  kValue0xFFFFFFFF = 0xFFFFFFFFu
+};
+
+UNIT(operand) {
+  INFO("Checking operand sizes");
+  EXPECT(sizeof(Operand) == 16);
+  EXPECT(sizeof(BaseReg) == 16);
+  EXPECT(sizeof(BaseMem) == 16);
+  EXPECT(sizeof(Imm)     == 16);
+  EXPECT(sizeof(Label)   == 16);
+
+  INFO("Checking basic functionality of Operand");
+  Operand a, b;
+  Operand dummy;
+
+  EXPECT(a.isNone() == true);
+  EXPECT(a.isReg() == false);
+  EXPECT(a.isMem() == false);
+  EXPECT(a.isImm() == false);
+  EXPECT(a.isLabel() == false);
+  EXPECT(a == b);
+  EXPECT(a._data[0] == 0);
+  EXPECT(a._data[1] == 0);
+
+  INFO("Checking basic functionality of Label");
+  Label label;
+  EXPECT(label.isValid() == false);
+  EXPECT(label.id() == Globals::kInvalidId);
+
+  INFO("Checking basic functionality of BaseReg");
+  EXPECT(BaseReg().isReg() == true);
+  EXPECT(BaseReg().isValid() == false);
+  EXPECT(BaseReg()._data[0] == 0);
+  EXPECT(BaseReg()._data[1] == 0);
+  EXPECT(dummy.as<BaseReg>().isValid() == false);
+
+  // Create some register (not specific to any architecture).
+  OperandSignature rSig = OperandSignature::fromOpType(OperandType::kReg) |
+                          OperandSignature::fromRegType(RegType::kVec128) |
+                          OperandSignature::fromRegGroup(RegGroup::kVec) |
+                          OperandSignature::fromSize(8);
+  BaseReg r1(rSig, 5);
+
+  EXPECT(r1.isValid()   == true);
+  EXPECT(r1.isReg()     == true);
+  EXPECT(r1.isReg(RegType::kVec128) == true);
+  EXPECT(r1.isPhysReg() == true);
+  EXPECT(r1.isVirtReg() == false);
+  EXPECT(r1.signature() == rSig);
+  EXPECT(r1.type()      == RegType::kVec128);
+  EXPECT(r1.group()     == RegGroup::kVec);
+  EXPECT(r1.size()      == 8);
+  EXPECT(r1.id()        == 5);
+  EXPECT(r1.isReg(RegType::kVec128, 5) == true); // RegType and Id.
+  EXPECT(r1._data[0]    == 0);
+  EXPECT(r1._data[1]    == 0);
+
+  // The same type of register having different id.
+  BaseReg r2(r1, 6);
+  EXPECT(r2.isValid()   == true);
+  EXPECT(r2.isReg()     == true);
+  EXPECT(r2.isReg(RegType::kVec128) == true);
+  EXPECT(r2.isPhysReg() == true);
+  EXPECT(r2.isVirtReg() == false);
+  EXPECT(r2.signature() == rSig);
+  EXPECT(r2.type()      == r1.type());
+  EXPECT(r2.group()     == r1.group());
+  EXPECT(r2.size()      == r1.size());
+  EXPECT(r2.id()        == 6);
+  EXPECT(r2.isReg(RegType::kVec128, 6) == true);
+
+  r1.reset();
+  EXPECT(!r1.isReg());
+  EXPECT(!r1.isValid());
+
+  INFO("Checking basic functionality of BaseMem");
+  BaseMem m;
+  EXPECT(m.isMem());
+  EXPECT(m == BaseMem());
+  EXPECT(m.hasBase() == false);
+  EXPECT(m.hasIndex() == false);
+  EXPECT(m.hasOffset() == false);
+  EXPECT(m.isOffset64Bit() == true);
+  EXPECT(m.offset() == 0);
+
+  m.setOffset(-1);
+  EXPECT(m.offsetLo32() == -1);
+  EXPECT(m.offset() == -1);
+
+  int64_t x = int64_t(0xFF00FF0000000001u);
+  int32_t xHi = int32_t(0xFF00FF00u);
+
+  m.setOffset(x);
+  EXPECT(m.offset() == x);
+  EXPECT(m.offsetLo32() == 1);
+  EXPECT(m.offsetHi32() == xHi);
+
+  INFO("Checking basic functionality of Imm");
+  Imm immValue(-42);
+  EXPECT(immValue.type() == ImmType::kInt);
+  EXPECT(Imm(-1).value() == -1);
+  EXPECT(imm(-1).value() == -1);
+  EXPECT(immValue.value() == -42);
+  EXPECT(imm(0xFFFFFFFF).value() == int64_t(0xFFFFFFFF));
+
+  Imm immDouble(0.4);
+  EXPECT(immDouble.type() == ImmType::kDouble);
+  EXPECT(immDouble.valueAs<double>() == 0.4);
+  EXPECT(immDouble == imm(0.4));
+
+  EXPECT(Imm(StrongEnumForImmTests::kValue0).value() == 0);
+  EXPECT(Imm(StrongEnumForImmTests::kValue0xFFFFFFFF).value() == 0xFFFFFFFFu);
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/operand.h b/lib/lepton/asmjit/core/operand.h
new file mode 100644
index 0000000000..2f81a217f1
--- /dev/null
+++ b/lib/lepton/asmjit/core/operand.h
@@ -0,0 +1,1611 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_OPERAND_H_INCLUDED
+#define ASMJIT_CORE_OPERAND_H_INCLUDED
+
+#include "../core/archcommons.h"
+#include "../core/support.h"
+#include "../core/type.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_assembler
+//! \{
+
+//! Operand type used by \ref Operand_.
+enum class OperandType : uint32_t {
+  //! Not an operand or not initialized.
+  kNone = 0,
+  //! Operand is a register.
+  kReg = 1,
+  //! Operand is a memory.
+  kMem = 2,
+  //! Operand is an immediate value.
+  kImm = 3,
+  //! Operand is a label.
+  kLabel = 4,
+
+  //! Maximum value of `OperandType`.
+  kMaxValue = kLabel
+};
+
+static_assert(uint32_t(OperandType::kMem) == uint32_t(OperandType::kReg) + 1,
+              "AsmJit requires that `OperandType::kMem` equals to `OperandType::kReg + 1`");
+
+//! Register mask is a convenience typedef that describes a mask where each bit describes a physical register id
+//! in the same \ref RegGroup. At the moment 32 bits are enough as AsmJit doesn't support any architecture that
+//! would provide more than 32 registers for a register group.
+typedef uint32_t RegMask;
+
+//! Register type.
+//!
+//! Provides a unique type that can be used to identify a register or its view.
+enum class RegType : uint8_t {
+  //! No register - unused, invalid, multiple meanings.
+  kNone = 0,
+
+  //! This is not a register type. This value is reserved for a \ref Label that used in \ref BaseMem as a base.
+  //!
+  //! Label tag is used as a sub-type, forming a unique signature across all operand types as 0x1 is never associated
+  //! with any register type. This means that a memory operand's BASE register can be constructed from virtually any
+  //! operand (register vs. label) by just assigning its type (register type or label-tag) and operand id.
+  kLabelTag = 1,
+
+  //! Universal type describing program counter (PC) or instruction pointer (IP) register, if the target architecture
+  //! actually exposes it as a separate register type, which most modern targets do.
+  kPC = 2,
+
+  //! 8-bit low general purpose register (X86).
+  kGp8Lo = 3,
+  //! 8-bit high general purpose register (X86).
+  kGp8Hi = 4,
+  //! 16-bit general purpose register (X86).
+  kGp16 = 5,
+  //! 32-bit general purpose register (X86|ARM).
+  kGp32 = 6,
+  //! 64-bit general purpose register (X86|ARM).
+  kGp64 = 7,
+  //! 8-bit view of a vector register (ARM).
+  kVec8 = 8,
+  //! 16-bit view of a vector register (ARM).
+  kVec16 = 9,
+  //! 32-bit view of a vector register (ARM).
+  kVec32 = 10,
+  //! 64-bit view of a vector register (ARM).
+  //!
+  //! \note This is never used for MMX registers on X86, MMX registers have its own category.
+  kVec64 = 11,
+  //! 128-bit view of a vector register (X86|ARM).
+  kVec128 = 12,
+  //! 256-bit view of a vector register (X86).
+  kVec256 = 13,
+  //! 512-bit view of a vector register (X86).
+  kVec512 = 14,
+  //! 1024-bit view of a vector register (future).
+  kVec1024 = 15,
+  //! View of a vector register, which width is implementation specific (AArch64).
+  kVecNLen = 16,
+
+  //! Mask register (X86).
+  kMask = 17,
+
+  //! Start of architecture dependent register types.
+  kExtra = 18,
+
+  // X86 Specific Register Types
+  // ---------------------------
+
+  // X86 Specific Register Types
+  // ===========================
+
+  //! Instruction pointer (RIP), only addressable in \ref x86::Mem in 64-bit targets.
+  kX86_Rip = kPC,
+  //! Low GPB register (AL, BL, CL, DL, ...).
+  kX86_GpbLo = kGp8Lo,
+  //! High GPB register (AH, BH, CH, DH only).
+  kX86_GpbHi = kGp8Hi,
+  //! GPW register.
+  kX86_Gpw = kGp16,
+  //! GPD register.
+  kX86_Gpd = kGp32,
+  //! GPQ register (64-bit).
+  kX86_Gpq = kGp64,
+  //! XMM register (SSE+).
+  kX86_Xmm = kVec128,
+  //! YMM register (AVX+).
+  kX86_Ymm = kVec256,
+  //! ZMM register (AVX512+).
+  kX86_Zmm = kVec512,
+  //! K register (AVX512+).
+  kX86_KReg = kMask,
+  //! MMX register.
+  kX86_Mm = kExtra + 0,
+  //! Segment register (None, ES, CS, SS, DS, FS, GS).
+  kX86_SReg = kExtra + 1,
+  //! Control register (CR).
+  kX86_CReg = kExtra + 2,
+  //! Debug register (DR).
+  kX86_DReg = kExtra + 3,
+  //! FPU (x87) register.
+  kX86_St = kExtra + 4,
+  //! Bound register (BND).
+  kX86_Bnd = kExtra + 5,
+  //! TMM register (AMX_TILE)
+  kX86_Tmm = kExtra + 6,
+
+  // ARM Specific Register Types
+  // ===========================
+
+  //! Program pointer (PC) register (AArch64).
+  kARM_PC = kPC,
+  //! 32-bit general purpose register (R or W).
+  kARM_GpW = kGp32,
+  //! 64-bit general purpose register (X).
+  kARM_GpX = kGp64,
+  //! 8-bit view of VFP/ASIMD register (B).
+  kARM_VecB = kVec8,
+  //! 16-bit view of VFP/ASIMD register (H).
+  kARM_VecH = kVec16,
+  //! 32-bit view of VFP/ASIMD register (S).
+  kARM_VecS = kVec32,
+  //! 64-bit view of VFP/ASIMD register (D).
+  kARM_VecD = kVec64,
+  //! 128-bit view of VFP/ASIMD register (Q|V).
+  kARM_VecV = kVec128,
+
+  //! Maximum value of `RegType`.
+  kMaxValue = 31
+};
+ASMJIT_DEFINE_ENUM_COMPARE(RegType)
+
+//! Register group.
+//!
+//! Provides a unique value that identifies groups of registers and their views.
+enum class RegGroup : uint8_t {
+  //! General purpose register group compatible with all backends.
+  kGp = 0,
+  //! Vector register group compatible with all backends.
+  //!
+  //! Describes X86 XMM|YMM|ZMM registers ARM/AArch64 V registers.
+  kVec = 1,
+
+  //! Extra virtual group #2 that can be used by Compiler for register allocation.
+  kExtraVirt2 = 2,
+  //! Extra virtual group #3 that can be used by Compiler for register allocation.
+  kExtraVirt3 = 3,
+
+  //! Program counter group.
+  kPC = 4,
+
+  //! Extra non-virtual group that can be used by registers not managed by Compiler.
+  kExtraNonVirt = 5,
+
+  // X86 Specific Register Groups
+  // ----------------------------
+
+  //! K register group (KReg) - maps to \ref RegGroup::kExtraVirt2 (X86, X86_64).
+  kX86_K = kExtraVirt2,
+  //! MMX register group (MM) - maps to \ref RegGroup::kExtraVirt3 (X86, X86_64).
+  kX86_MM = kExtraVirt3,
+
+  //! Instruction pointer (X86, X86_64).
+  kX86_Rip = kPC,
+  //! Segment register group (X86, X86_64).
+  kX86_SReg = kExtraNonVirt + 0,
+  //! CR register group (X86, X86_64).
+  kX86_CReg = kExtraNonVirt + 1,
+  //! DR register group (X86, X86_64).
+  kX86_DReg = kExtraNonVirt + 2,
+  //! FPU register group (X86, X86_64).
+  kX86_St = kExtraNonVirt + 3,
+  //! BND register group (X86, X86_64).
+  kX86_Bnd = kExtraNonVirt + 4,
+  //! TMM register group (X86, X86_64).
+  kX86_Tmm = kExtraNonVirt + 5,
+
+  //! First group - only used in loops.
+  k0 = 0,
+  //! Last value of a virtual register that is managed by \ref BaseCompiler.
+  kMaxVirt = Globals::kNumVirtGroups - 1,
+  //! Maximum value of `RegGroup`.
+  kMaxValue = 15
+};
+ASMJIT_DEFINE_ENUM_COMPARE(RegGroup)
+
+typedef Support::EnumValues<RegGroup, RegGroup::kGp, RegGroup::kMaxVirt> RegGroupVirtValues;
+
+//! Operand signature is a 32-bit number describing \ref Operand and some of its payload.
+//!
+//! In AsmJit operand signature is used to store additional payload of register, memory, and immediate operands.
+//! In practice the biggest pressure on OperandSignature is from \ref BaseMem and architecture specific memory
+//! operands that need to store additional payload that cannot be stored elsewhere as values of all other members
+//! are fully specified by \ref BaseMem.
+struct OperandSignature {
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    // Operand type (3 least significant bits).
+    // |........|........|........|.....XXX|
+    kOpTypeShift = 0,
+    kOpTypeMask = 0x07u << kOpTypeShift,
+
+    // Register type (5 bits).
+    // |........|........|........|XXXXX...|
+    kRegTypeShift = 3,
+    kRegTypeMask = 0x1Fu << kRegTypeShift,
+
+    // Register group (4 bits).
+    // |........|........|....XXXX|........|
+    kRegGroupShift = 8,
+    kRegGroupMask = 0x0Fu << kRegGroupShift,
+
+    // Memory base type (5 bits).
+    // |........|........|........|XXXXX...|
+    kMemBaseTypeShift = 3,
+    kMemBaseTypeMask = 0x1Fu << kMemBaseTypeShift,
+
+    // Memory index type (5 bits).
+    // |........|........|...XXXXX|........|
+    kMemIndexTypeShift = 8,
+    kMemIndexTypeMask = 0x1Fu << kMemIndexTypeShift,
+
+    // Memory base+index combined (10 bits).
+    // |........|........|...XXXXX|XXXXX...|
+    kMemBaseIndexShift = 3,
+    kMemBaseIndexMask = 0x3FFu << kMemBaseIndexShift,
+
+    // This memory operand represents a home-slot or stack (Compiler) (1 bit).
+    // |........|........|..X.....|........|
+    kMemRegHomeShift = 13,
+    kMemRegHomeFlag = 0x01u << kMemRegHomeShift,
+
+    // Immediate type (1 bit).
+    // |........|........|........|....X...|
+    kImmTypeShift = 3,
+    kImmTypeMask = 0x01u << kImmTypeShift,
+
+    // Predicate used by either registers or immediate values (4 bits).
+    // |........|XXXX....|........|........|
+    kPredicateShift = 20,
+    kPredicateMask = 0x0Fu << kPredicateShift,
+
+    // Operand size (8 most significant bits).
+    // |XXXXXXXX|........|........|........|
+    kSizeShift = 24,
+    kSizeMask = 0xFFu << kSizeShift
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  uint32_t _bits;
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //!
+  //! Overloaded operators make `OperandSignature` behave like regular integer.
+  //!
+  //! \{
+
+  inline constexpr bool operator!() const noexcept { return _bits != 0; }
+  inline constexpr explicit operator bool() const noexcept { return _bits != 0; }
+
+  inline OperandSignature& operator|=(uint32_t x) noexcept { _bits |= x; return *this; }
+  inline OperandSignature& operator&=(uint32_t x) noexcept { _bits &= x; return *this; }
+  inline OperandSignature& operator^=(uint32_t x) noexcept { _bits ^= x; return *this; }
+
+  inline OperandSignature& operator|=(const OperandSignature& other) noexcept { return operator|=(other._bits); }
+  inline OperandSignature& operator&=(const OperandSignature& other) noexcept { return operator&=(other._bits); }
+  inline OperandSignature& operator^=(const OperandSignature& other) noexcept { return operator^=(other._bits); }
+
+  inline constexpr OperandSignature operator~() const noexcept { return OperandSignature{~_bits}; }
+
+  inline constexpr OperandSignature operator|(uint32_t x) const noexcept { return OperandSignature{_bits | x}; }
+  inline constexpr OperandSignature operator&(uint32_t x) const noexcept { return OperandSignature{_bits & x}; }
+  inline constexpr OperandSignature operator^(uint32_t x) const noexcept { return OperandSignature{_bits ^ x}; }
+
+  inline constexpr OperandSignature operator|(const OperandSignature& other) const noexcept { return OperandSignature{_bits | other._bits}; }
+  inline constexpr OperandSignature operator&(const OperandSignature& other) const noexcept { return OperandSignature{_bits & other._bits}; }
+  inline constexpr OperandSignature operator^(const OperandSignature& other) const noexcept { return OperandSignature{_bits ^ other._bits}; }
+
+  inline constexpr bool operator==(uint32_t x) const noexcept { return _bits == x; }
+  inline constexpr bool operator!=(uint32_t x) const noexcept { return _bits != x; }
+
+  inline constexpr bool operator==(const OperandSignature& other) const noexcept { return _bits == other._bits; }
+  inline constexpr bool operator!=(const OperandSignature& other) const noexcept { return _bits != other._bits; }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline void reset() noexcept { _bits = 0; }
+
+  inline constexpr uint32_t bits() const noexcept { return _bits; }
+  inline void setBits(uint32_t bits) noexcept { _bits = bits; }
+
+  template<uint32_t kFieldMask, uint32_t kFieldShift = Support::ConstCTZ<kFieldMask>::value>
+  inline constexpr bool hasField() const noexcept {
+    return (_bits & kFieldMask) != 0;
+  }
+
+  template<uint32_t kFieldMask, uint32_t kFieldShift = Support::ConstCTZ<kFieldMask>::value>
+  inline constexpr bool hasField(uint32_t value) const noexcept {
+    return (_bits & kFieldMask) != value << kFieldShift;
+  }
+
+  template<uint32_t kFieldMask, uint32_t kFieldShift = Support::ConstCTZ<kFieldMask>::value>
+  inline constexpr uint32_t getField() const noexcept {
+    return (_bits >> kFieldShift) & (kFieldMask >> kFieldShift);
+  }
+
+  template<uint32_t kFieldMask, uint32_t kFieldShift = Support::ConstCTZ<kFieldMask>::value>
+  inline void setField(uint32_t value) noexcept {
+    ASMJIT_ASSERT((value & ~(kFieldMask >> kFieldShift)) == 0);
+    _bits = (_bits & ~kFieldMask) | (value << kFieldShift);
+  }
+
+  inline constexpr OperandSignature subset(uint32_t mask) const noexcept { return OperandSignature{_bits & mask}; }
+
+  template<uint32_t kFieldMask>
+  inline constexpr bool matchesSignature(const OperandSignature& signature) const noexcept {
+    return (_bits & kFieldMask) == signature._bits;
+  }
+
+  template<uint32_t kFieldMask>
+  inline constexpr bool matchesFields(uint32_t bits) const noexcept {
+    return (_bits & kFieldMask) == bits;
+  }
+
+  template<uint32_t kFieldMask>
+  inline constexpr bool matchesFields(const OperandSignature& fields) const noexcept {
+    return (_bits & kFieldMask) == fields._bits;
+  }
+
+  inline constexpr bool isValid() const noexcept { return _bits != 0; }
+
+  inline constexpr OperandType opType() const noexcept { return (OperandType)getField<kOpTypeMask>(); }
+
+  inline constexpr RegType regType() const noexcept { return (RegType)getField<kRegTypeMask>(); }
+  inline constexpr RegGroup regGroup() const noexcept { return (RegGroup)getField<kRegGroupMask>(); }
+
+  inline constexpr RegType memBaseType() const noexcept { return (RegType)getField<kMemBaseTypeMask>(); }
+  inline constexpr RegType memIndexType() const noexcept { return (RegType)getField<kMemIndexTypeMask>(); }
+
+  inline constexpr uint32_t predicate() const noexcept { return getField<kPredicateMask>(); }
+  inline constexpr uint32_t size() const noexcept { return getField<kSizeMask>(); }
+
+  inline void setOpType(OperandType opType) noexcept { setField<kOpTypeMask>(uint32_t(opType)); }
+  inline void setRegType(RegType regType) noexcept { setField<kRegTypeMask>(uint32_t(regType)); }
+  inline void setRegGroup(RegGroup regGroup) noexcept { setField<kRegGroupMask>(uint32_t(regGroup)); }
+
+  inline void setMemBaseType(RegGroup baseType) noexcept { setField<kMemBaseTypeMask>(uint32_t(baseType)); }
+  inline void setMemIndexType(RegGroup indexType) noexcept { setField<kMemIndexTypeMask>(uint32_t(indexType)); }
+
+  inline void setPredicate(uint32_t predicate) noexcept { setField<kPredicateMask>(predicate); }
+  inline void setSize(uint32_t size) noexcept { setField<kSizeMask>(size); }
+
+  //! \}
+
+  //! \name Static Constructors
+  //! \{
+
+  static inline constexpr OperandSignature fromBits(uint32_t bits) noexcept {
+    return OperandSignature{bits};
+  }
+
+  template<uint32_t kFieldMask, typename T>
+  static inline constexpr OperandSignature fromValue(const T& value) noexcept {
+    return OperandSignature{uint32_t(value) << Support::ConstCTZ<kFieldMask>::value};
+  }
+
+  static inline constexpr OperandSignature fromOpType(OperandType opType) noexcept {
+    return OperandSignature{uint32_t(opType) << kOpTypeShift};
+  }
+
+  static inline constexpr OperandSignature fromRegType(RegType regType) noexcept {
+    return OperandSignature{uint32_t(regType) << kRegTypeShift};
+  }
+
+  static inline constexpr OperandSignature fromRegGroup(RegGroup regGroup) noexcept {
+    return OperandSignature{uint32_t(regGroup) << kRegGroupShift};
+  }
+
+  static inline constexpr OperandSignature fromRegTypeAndGroup(RegType regType, RegGroup regGroup) noexcept {
+    return fromRegType(regType) | fromRegGroup(regGroup);
+  }
+
+  static inline constexpr OperandSignature fromMemBaseType(RegType baseType) noexcept {
+    return OperandSignature{uint32_t(baseType) << kMemBaseTypeShift};
+  }
+
+  static inline constexpr OperandSignature fromMemIndexType(RegType indexType) noexcept {
+    return OperandSignature{uint32_t(indexType) << kMemIndexTypeShift};
+  }
+
+  static inline constexpr OperandSignature fromPredicate(uint32_t predicate) noexcept {
+    return OperandSignature{predicate << kPredicateShift};
+  }
+
+  static inline constexpr OperandSignature fromSize(uint32_t size) noexcept {
+    return OperandSignature{size << kSizeShift};
+  }
+
+  //! \}
+};
+
+//! Base class representing an operand in AsmJit (non-default constructed version).
+//!
+//! Contains no initialization code and can be used safely to define an array of operands that won't be initialized.
+//! This is a \ref Operand base structure designed to be statically initialized, static const, or to be used by user
+//! code to define an array of operands without having them default initialized at construction time.
+//!
+//! The key difference between \ref Operand and \ref Operand_ is:
+//!
+//! ```
+//! Operand_ xArray[10];    // Not initialized, contains garbage.
+//! Operand_ yArray[10] {}; // All operands initialized to none explicitly (zero initialized).
+//! Operand  yArray[10];    // All operands initialized to none implicitly (zero initialized).
+//! ```
+struct Operand_ {
+  //! \name Types
+  //! \{
+
+  typedef OperandSignature Signature;
+
+  //! \}
+
+  //! \name Constants
+  //! \{
+
+  // Indexes to `_data` array.
+  enum DataIndex : uint32_t {
+    kDataMemIndexId = 0,
+    kDataMemOffsetLo = 1,
+
+    kDataImmValueLo = ASMJIT_ARCH_LE ? 0 : 1,
+    kDataImmValueHi = ASMJIT_ARCH_LE ? 1 : 0
+  };
+
+  //! Constants useful for VirtId <-> Index translation.
+  enum VirtIdConstants : uint32_t {
+    //! Minimum valid packed-id.
+    kVirtIdMin = 256,
+    //! Maximum valid packed-id, excludes Globals::kInvalidId.
+    kVirtIdMax = Globals::kInvalidId - 1,
+    //! Count of valid packed-ids.
+    kVirtIdCount = uint32_t(kVirtIdMax - kVirtIdMin + 1)
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Provides operand type and additional payload.
+  Signature _signature;
+  //! Either base id as used by memory operand or any id as used by others.
+  uint32_t _baseId;
+
+  //! Data specific to the operand type.
+  //!
+  //! The reason we don't use union is that we have `constexpr` constructors that construct operands and other
+  //!`constexpr` functions that return whether another Operand or something else. These cannot generally work with
+  //! unions so we also cannot use `union` if we want to be standard compliant.
+  uint32_t _data[2];
+
+  //! \}
+
+  //! Tests whether the given `id` is a valid virtual register id. Since AsmJit supports both physical and virtual
+  //! registers it must be able to distinguish between these two. The idea is that physical registers are always
+  //! limited in size, so virtual identifiers start from `kVirtIdMin` and end at `kVirtIdMax`.
+  static inline bool isVirtId(uint32_t id) noexcept { return id - kVirtIdMin < uint32_t(kVirtIdCount); }
+  //! Converts a real-id into a packed-id that can be stored in Operand.
+  static inline uint32_t indexToVirtId(uint32_t id) noexcept { return id + kVirtIdMin; }
+  //! Converts a packed-id back to real-id.
+  static inline uint32_t virtIdToIndex(uint32_t id) noexcept { return id - kVirtIdMin; }
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! \cond INTERNAL
+  //! Initializes a `BaseReg` operand from `signature` and register `id`.
+  inline void _initReg(const Signature& signature, uint32_t id) noexcept {
+    _signature = signature;
+    _baseId = id;
+    _data[0] = 0;
+    _data[1] = 0;
+  }
+  //! \endcond
+
+  //! Initializes the operand from `other` operand (used by operator overloads).
+  inline void copyFrom(const Operand_& other) noexcept { memcpy(this, &other, sizeof(Operand_)); }
+
+  //! Resets the `Operand` to none.
+  //!
+  //! None operand is defined the following way:
+  //!   - Its signature is zero (OperandType::kNone, and the rest zero as well).
+  //!   - Its id is `0`.
+  //!   - The reserved8_4 field is set to `0`.
+  //!   - The reserved12_4 field is set to zero.
+  //!
+  //! In other words, reset operands have all members set to zero. Reset operand must match the Operand state
+  //! right after its construction. Alternatively, if you have an array of operands, you can simply use `memset()`.
+  //!
+  //! ```
+  //! using namespace asmjit;
+  //!
+  //! Operand a;
+  //! Operand b;
+  //! assert(a == b);
+  //!
+  //! b = x86::eax;
+  //! assert(a != b);
+  //!
+  //! b.reset();
+  //! assert(a == b);
+  //!
+  //! memset(&b, 0, sizeof(Operand));
+  //! assert(a == b);
+  //! ```
+  inline void reset() noexcept {
+    _signature.reset();
+    _baseId = 0;
+    _data[0] = 0;
+    _data[1] = 0;
+  }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  //! Tests whether this operand is the same as `other`.
+  inline constexpr bool operator==(const Operand_& other) const noexcept { return  equals(other); }
+  //! Tests whether this operand is not the same as `other`.
+  inline constexpr bool operator!=(const Operand_& other) const noexcept { return !equals(other); }
+
+  //! \}
+
+  //! \name Cast
+  //! \{
+
+  //! Casts this operand to `T` type.
+  template<typename T>
+  inline T& as() noexcept { return static_cast<T&>(*this); }
+
+  //! Casts this operand to `T` type (const).
+  template<typename T>
+  inline const T& as() const noexcept { return static_cast<const T&>(*this); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the operand's signature matches the signature of the `other` operand.
+  inline constexpr bool hasSignature(const Operand_& other) const noexcept { return _signature == other._signature; }
+  //! Tests whether the operand's signature matches the given signature `sign`.
+  inline constexpr bool hasSignature(const Signature& other) const noexcept { return _signature == other; }
+
+  //! Returns operand signature as unsigned 32-bit integer.
+  //!
+  //! Signature is first 4 bytes of the operand data. It's used mostly for operand checking as it's
+  //! much faster to check packed 4 bytes at once than having to check these bytes individually.
+  inline constexpr Signature signature() const noexcept { return _signature; }
+
+  //! Sets the operand signature, see `signature()`.
+  //!
+  //! \note Improper use of `setSignature()` can lead to hard-to-debug errors.
+  inline void setSignature(const Signature& signature) noexcept { _signature = signature; }
+
+  //! Returns the type of the operand, see `OpType`.
+  inline constexpr OperandType opType() const noexcept { return _signature.opType(); }
+  //! Tests whether the operand is none (`OperandType::kNone`).
+  inline constexpr bool isNone() const noexcept { return _signature == Signature::fromBits(0); }
+  //! Tests whether the operand is a register (`OperandType::kReg`).
+  inline constexpr bool isReg() const noexcept { return opType() == OperandType::kReg; }
+  //! Tests whether the operand is a memory location (`OperandType::kMem`).
+  inline constexpr bool isMem() const noexcept { return opType() == OperandType::kMem; }
+  //! Tests whether the operand is an immediate (`OperandType::kImm`).
+  inline constexpr bool isImm() const noexcept { return opType() == OperandType::kImm; }
+  //! Tests whether the operand is a label (`OperandType::kLabel`).
+  inline constexpr bool isLabel() const noexcept { return opType() == OperandType::kLabel; }
+
+  //! Tests whether the operand is a physical register.
+  inline constexpr bool isPhysReg() const noexcept { return isReg() && _baseId < 0xFFu; }
+  //! Tests whether the operand is a virtual register.
+  inline constexpr bool isVirtReg() const noexcept { return isReg() && _baseId > 0xFFu; }
+
+  //! Tests whether the operand specifies a size (i.e. the size is not zero).
+  inline constexpr bool hasSize() const noexcept { return _signature.hasField<Signature::kSizeMask>(); }
+  //! Tests whether the size of the operand matches `size`.
+  inline constexpr bool hasSize(uint32_t s) const noexcept { return size() == s; }
+
+  //! Returns the size of the operand in bytes.
+  //!
+  //! The value returned depends on the operand type:
+  //!   * None  - Should always return zero size.
+  //!   * Reg   - Should always return the size of the register. If the register size depends on architecture
+  //!             (like `x86::CReg` and `x86::DReg`) the size returned should be the greatest possible (so it
+  //!             should return 64-bit size in such case).
+  //!   * Mem   - Size is optional and will be in most cases zero.
+  //!   * Imm   - Should always return zero size.
+  //!   * Label - Should always return zero size.
+  inline constexpr uint32_t size() const noexcept { return _signature.getField<Signature::kSizeMask>(); }
+
+  //! Returns the operand id.
+  //!
+  //! The value returned should be interpreted accordingly to the operand type:
+  //!   * None  - Should be `0`.
+  //!   * Reg   - Physical or virtual register id.
+  //!   * Mem   - Multiple meanings - BASE address (register or label id), or high value of a 64-bit absolute address.
+  //!   * Imm   - Should be `0`.
+  //!   * Label - Label id if it was created by using `newLabel()` or `Globals::kInvalidId` if the label is invalid or
+  //!             not initialized.
+  inline constexpr uint32_t id() const noexcept { return _baseId; }
+
+  //! Tests whether the operand is 100% equal to `other` operand.
+  //!
+  //! \note This basically performs a binary comparison, if aby bit is
+  //! different the operands are not equal.
+  inline constexpr bool equals(const Operand_& other) const noexcept {
+    return (_signature == other._signature) &
+           (_baseId    == other._baseId   ) &
+           (_data[0]   == other._data[0]  ) &
+           (_data[1]   == other._data[1]  ) ;
+  }
+
+  //! Tests whether the operand is a register matching the given register `type`.
+  inline constexpr bool isReg(RegType type) const noexcept {
+    return _signature.subset(Signature::kOpTypeMask | Signature::kRegTypeMask) == (Signature::fromOpType(OperandType::kReg) | Signature::fromRegType(type));
+  }
+
+  //! Tests whether the operand is register and of register `type` and `id`.
+  inline constexpr bool isReg(RegType type, uint32_t id) const noexcept {
+    return isReg(type) && this->id() == id;
+  }
+
+  //! Tests whether the operand is a register or memory.
+  inline constexpr bool isRegOrMem() const noexcept {
+    return Support::isBetween<uint32_t>(uint32_t(opType()), uint32_t(OperandType::kReg), uint32_t(OperandType::kMem));
+  }
+
+  //! \}
+};
+
+//! Base class representing an operand in AsmJit (default constructed version).
+class Operand : public Operand_ {
+public:
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates `kOpNone` operand having all members initialized to zero.
+  inline constexpr Operand() noexcept
+    : Operand_{ Signature::fromOpType(OperandType::kNone), 0u, { 0u, 0u }} {}
+
+  //! Creates a cloned `other` operand.
+  inline constexpr Operand(const Operand& other) noexcept = default;
+
+  //! Creates a cloned `other` operand.
+  inline constexpr explicit Operand(const Operand_& other)
+    : Operand_(other) {}
+
+  //! Creates an operand initialized to raw `[u0, u1, u2, u3]` values.
+  inline constexpr Operand(Globals::Init_, const Signature& u0, uint32_t u1, uint32_t u2, uint32_t u3) noexcept
+    : Operand_{ u0, u1, { u2, u3 }} {}
+
+  //! Creates an uninitialized operand (dangerous).
+  inline explicit Operand(Globals::NoInit_) noexcept {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline Operand& operator=(const Operand& other) noexcept = default;
+  inline Operand& operator=(const Operand_& other) noexcept { return operator=(static_cast<const Operand&>(other)); }
+
+  //! \}
+
+  //! \name Clone
+  //! \{
+
+  //! Clones this operand and returns its copy.
+  inline constexpr Operand clone() const noexcept { return Operand(*this); }
+
+  //! \}
+};
+
+static_assert(sizeof(Operand) == 16, "asmjit::Operand must be exactly 16 bytes long");
+
+//! Label (jump target or data location).
+//!
+//! Label represents a location in code typically used as a jump target, but may be also a reference to some data or
+//! a static variable. Label has to be explicitly created by BaseEmitter.
+//!
+//! Example of using labels:
+//!
+//! ```
+//! // Create some emitter (for example x86::Assembler).
+//! x86::Assembler a;
+//!
+//! // Create Label instance.
+//! Label L1 = a.newLabel();
+//!
+//! // ... your code ...
+//!
+//! // Using label.
+//! a.jump(L1);
+//!
+//! // ... your code ...
+//!
+//! // Bind label to the current position, see `BaseEmitter::bind()`.
+//! a.bind(L1);
+//! ```
+class Label : public Operand {
+public:
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a label operand without ID (you must set the ID to make it valid).
+  inline constexpr Label() noexcept
+    : Operand(Globals::Init, Signature::fromOpType(OperandType::kLabel), Globals::kInvalidId, 0, 0) {}
+
+  //! Creates a cloned label operand of `other`.
+  inline constexpr Label(const Label& other) noexcept
+    : Operand(other) {}
+
+  //! Creates a label operand of the given `id`.
+  inline constexpr explicit Label(uint32_t id) noexcept
+    : Operand(Globals::Init, Signature::fromOpType(OperandType::kLabel), id, 0, 0) {}
+
+  inline explicit Label(Globals::NoInit_) noexcept
+    : Operand(Globals::NoInit) {}
+
+  //! Resets the label, will reset all properties and set its ID to `Globals::kInvalidId`.
+  inline void reset() noexcept {
+    _signature = Signature::fromOpType(OperandType::kLabel);
+    _baseId = Globals::kInvalidId;
+    _data[0] = 0;
+    _data[1] = 0;
+  }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline Label& operator=(const Label& other) noexcept = default;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the label was created by CodeHolder and/or an attached emitter.
+  inline constexpr bool isValid() const noexcept { return _baseId != Globals::kInvalidId; }
+  //! Sets the label `id`.
+  inline void setId(uint32_t id) noexcept { _baseId = id; }
+
+  //! \}
+};
+
+//! \cond INTERNAL
+//! Default register traits.
+struct BaseRegTraits {
+  enum : uint32_t {
+    //! \ref TypeId representing this register type, could be \ref TypeId::kVoid if such type doesn't exist.
+    kTypeId = uint32_t(TypeId::kVoid),
+    //! RegType is not valid by default.
+    kValid = 0,
+    //! Count of registers (0 if none).
+    kCount = 0,
+
+    //! Zero type by default (defeaults to None).
+    kType = uint32_t(RegType::kNone),
+    //! Zero group by default (defaults to GP).
+    kGroup = uint32_t(RegGroup::kGp),
+    //! No size by default.
+    kSize = 0,
+
+    //! Empty signature by default (not even having operand type set to register).
+    kSignature = 0
+  };
+};
+//! \endcond
+
+//! Physical or virtual register operand.
+class BaseReg : public Operand {
+public:
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    //! None or any register (mostly internal).
+    kIdBad = 0xFFu,
+
+    kBaseSignatureMask =
+      Signature::kOpTypeMask   |
+      Signature::kRegTypeMask  |
+      Signature::kRegGroupMask |
+      Signature::kSizeMask,
+
+    kTypeNone = uint32_t(RegType::kNone),
+    kSignature = Signature::fromOpType(OperandType::kReg).bits()
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a dummy register operand.
+  inline constexpr BaseReg() noexcept
+    : Operand(Globals::Init, Signature::fromOpType(OperandType::kReg), kIdBad, 0, 0) {}
+
+  //! Creates a new register operand which is the same as `other` .
+  inline constexpr BaseReg(const BaseReg& other) noexcept
+    : Operand(other) {}
+
+  //! Creates a new register operand compatible with `other`, but with a different `id`.
+  inline constexpr BaseReg(const BaseReg& other, uint32_t id) noexcept
+    : Operand(Globals::Init, other._signature, id, 0, 0) {}
+
+  //! Creates a register initialized to the given `signature` and `id`.
+  inline constexpr BaseReg(const Signature& signature, uint32_t id) noexcept
+    : Operand(Globals::Init, signature, id, 0, 0) {}
+
+  inline explicit BaseReg(Globals::NoInit_) noexcept
+    : Operand(Globals::NoInit) {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline BaseReg& operator=(const BaseReg& other) noexcept = default;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns base signature of the register associated with each register type.
+  //!
+  //! Base signature only contains the operand type, register type, register group, and register size. It doesn't
+  //! contain element type, predicate, or other architecture-specific data. Base signature is a signature that is
+  //! provided by architecture-specific `RegTraits`, like \ref x86::RegTraits.
+  inline constexpr OperandSignature baseSignature() const noexcept { return _signature & kBaseSignatureMask; }
+
+  //! Tests whether the operand's base signature matches the given signature `sign`.
+  inline constexpr bool hasBaseSignature(uint32_t signature) const noexcept { return baseSignature() == signature; }
+  //! Tests whether the operand's base signature matches the given signature `sign`.
+  inline constexpr bool hasBaseSignature(const OperandSignature& signature) const noexcept { return baseSignature() == signature; }
+  //! Tests whether the operand's base signature matches the base signature of the `other` operand.
+  inline constexpr bool hasBaseSignature(const BaseReg& other) const noexcept { return baseSignature() == other.baseSignature(); }
+
+  //! Tests whether this register is the same as `other`.
+  //!
+  //! This is just an optimization. Registers by default only use the first 8 bytes of Operand data, so this method
+  //! takes advantage of this knowledge and only compares these 8 bytes. If both operands were created correctly
+  //! both \ref equals() and \ref isSame() should give the same answer, however, if any of these two contains garbage
+  //! or other metadata in the upper 8 bytes then \ref isSame() may return `true` in cases in which \ref equals()
+  //! returns false.
+  inline constexpr bool isSame(const BaseReg& other) const noexcept {
+    return (_signature == other._signature) & (_baseId == other._baseId);
+  }
+
+  //! Tests whether the register is valid (either virtual or physical).
+  inline constexpr bool isValid() const noexcept { return (_signature != 0) & (_baseId != kIdBad); }
+
+  //! Tests whether this is a physical register.
+  inline constexpr bool isPhysReg() const noexcept { return _baseId < kIdBad; }
+  //! Tests whether this is a virtual register.
+  inline constexpr bool isVirtReg() const noexcept { return _baseId > kIdBad; }
+
+  //! Tests whether the register type matches `type` - same as `isReg(type)`, provided for convenience.
+  inline constexpr bool isType(RegType type) const noexcept { return _signature.subset(Signature::kRegTypeMask) == Signature::fromRegType(type); }
+  //! Tests whether the register group matches `group`.
+  inline constexpr bool isGroup(RegGroup group) const noexcept { return _signature.subset(Signature::kRegGroupMask) == Signature::fromRegGroup(group); }
+
+  //! Tests whether the register is a general purpose register (any size).
+  inline constexpr bool isGp() const noexcept { return isGroup(RegGroup::kGp); }
+  //! Tests whether the register is a vector register.
+  inline constexpr bool isVec() const noexcept { return isGroup(RegGroup::kVec); }
+
+  using Operand_::isReg;
+
+  //! Same as `isType()`, provided for convenience.
+  inline constexpr bool isReg(RegType rType) const noexcept { return isType(rType); }
+  //! Tests whether the register type matches `type` and register id matches `id`.
+  inline constexpr bool isReg(RegType rType, uint32_t id) const noexcept { return isType(rType) && this->id() == id; }
+
+  //! Returns the register type.
+  inline constexpr RegType type() const noexcept { return _signature.regType(); }
+  //! Returns the register group.
+  inline constexpr RegGroup group() const noexcept { return _signature.regGroup(); }
+
+  //! Returns operation predicate of the register (ARM/AArch64).
+  //!
+  //! The meaning depends on architecture, for example on ARM hardware this describes \ref arm::ShiftOp
+  //! of the register.
+  inline constexpr uint32_t predicate() const noexcept { return _signature.getField<Signature::kPredicateMask>(); }
+
+  //! Sets operation predicate of the register to `predicate` (ARM/AArch64).
+  //!
+  //! The meaning depends on architecture, for example on ARM hardware this describes \ref arm::ShiftOp
+  //! of the register.
+  inline void setPredicate(uint32_t predicate) noexcept { _signature.setField<Signature::kPredicateMask>(predicate); }
+
+  //! Resets shift operation type of the register to the default value (ARM/AArch64).
+  inline void resetPredicate() noexcept { _signature.setField<Signature::kPredicateMask>(0); }
+
+  //! Clones the register operand.
+  inline constexpr BaseReg clone() const noexcept { return BaseReg(*this); }
+
+  //! Casts this register to `RegT` by also changing its signature.
+  //!
+  //! \note Improper use of `cloneAs()` can lead to hard-to-debug errors.
+  template<typename RegT>
+  inline constexpr RegT cloneAs() const noexcept { return RegT(Signature(RegT::kSignature), id()); }
+
+  //! Casts this register to `other` by also changing its signature.
+  //!
+  //! \note Improper use of `cloneAs()` can lead to hard-to-debug errors.
+  template<typename RegT>
+  inline constexpr RegT cloneAs(const RegT& other) const noexcept { return RegT(other.signature(), id()); }
+
+  //! Sets the register id to `id`.
+  inline void setId(uint32_t id) noexcept { _baseId = id; }
+
+  //! Sets a 32-bit operand signature based on traits of `RegT`.
+  template<typename RegT>
+  inline void setSignatureT() noexcept { _signature = RegT::kSignature; }
+
+  //! Sets the register `signature` and `id`.
+  inline void setSignatureAndId(const OperandSignature& signature, uint32_t id) noexcept {
+    _signature = signature;
+    _baseId = id;
+  }
+
+  //! \}
+
+  //! \name Static Functions
+  //! \{
+
+  //! Tests whether the `op` operand is a general purpose register.
+  static inline bool isGp(const Operand_& op) noexcept {
+    // Check operand type and register group. Not interested in register type and size.
+    return op.signature().subset(Signature::kOpTypeMask | Signature::kRegGroupMask) == (Signature::fromOpType(OperandType::kReg) | Signature::fromRegGroup(RegGroup::kGp));
+  }
+
+  //! Tests whether the `op` operand is a vector register.
+  static inline bool isVec(const Operand_& op) noexcept {
+    // Check operand type and register group. Not interested in register type and size.
+    return op.signature().subset(Signature::kOpTypeMask | Signature::kRegGroupMask) == (Signature::fromOpType(OperandType::kReg) | Signature::fromRegGroup(RegGroup::kVec));
+  }
+
+  //! Tests whether the `op` is a general purpose register of the given `id`.
+  static inline bool isGp(const Operand_& op, uint32_t id) noexcept { return isGp(op) & (op.id() == id); }
+  //! Tests whether the `op` is a vector register of the given `id`.
+  static inline bool isVec(const Operand_& op, uint32_t id) noexcept { return isVec(op) & (op.id() == id); }
+
+  //! \}
+};
+
+//! RegOnly is 8-byte version of `BaseReg` that allows to store either register or nothing.
+//!
+//! It's designed to decrease the space consumed by an extra "operand" in \ref BaseEmitter and \ref InstNode.
+struct RegOnly {
+  //! \name Types
+  //! \{
+
+  typedef OperandSignature Signature;
+
+  //! \}
+
+  //! Operand signature - only \ref OperandType::kNone and \ref OperandType::kReg are supported.
+  Signature _signature;
+  //! Physical or virtual register id.
+  uint32_t _id;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Initializes the `RegOnly` instance to hold register `signature` and `id`.
+  inline void init(const OperandSignature& signature, uint32_t id) noexcept {
+    _signature = signature;
+    _id = id;
+  }
+
+  inline void init(const BaseReg& reg) noexcept { init(reg.signature(), reg.id()); }
+  inline void init(const RegOnly& reg) noexcept { init(reg.signature(), reg.id()); }
+
+  //! Resets the `RegOnly` members to zeros (none).
+  inline void reset() noexcept { init(Signature::fromBits(0), 0); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether this ExtraReg is none (same as calling `Operand_::isNone()`).
+  inline constexpr bool isNone() const noexcept { return _signature == 0; }
+  //! Tests whether the register is valid (either virtual or physical).
+  inline constexpr bool isReg() const noexcept { return _signature != 0; }
+
+  //! Tests whether this is a physical register.
+  inline constexpr bool isPhysReg() const noexcept { return _id < BaseReg::kIdBad; }
+  //! Tests whether this is a virtual register (used by `BaseCompiler`).
+  inline constexpr bool isVirtReg() const noexcept { return _id > BaseReg::kIdBad; }
+
+  //! Returns the register signature or 0 if no register is assigned.
+  inline constexpr OperandSignature signature() const noexcept { return _signature; }
+  //! Returns the register id.
+  //!
+  //! \note Always check whether the register is assigned before using the returned identifier as
+  //! non-assigned `RegOnly` instance would return zero id, which is still a valid register id.
+  inline constexpr uint32_t id() const noexcept { return _id; }
+
+  //! Sets the register id.
+  inline void setId(uint32_t id) noexcept { _id = id; }
+
+  //! Returns the register type.
+  inline constexpr RegType type() const noexcept { return _signature.regType(); }
+  //! Returns the register group.
+  inline constexpr RegGroup group() const noexcept { return _signature.regGroup(); }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Converts this ExtraReg to a real `RegT` operand.
+  template<typename RegT>
+  inline constexpr RegT toReg() const noexcept { return RegT(_signature, _id); }
+
+  //! \}
+};
+
+//! \cond INTERNAL
+//! Adds a template specialization for `REG_TYPE` into the local `RegTraits`.
+#define ASMJIT_DEFINE_REG_TRAITS(REG, REG_TYPE, GROUP, SIZE, COUNT, TYPE_ID) \
+template<>                                                                   \
+struct RegTraits<REG_TYPE> {                                                 \
+  typedef REG RegT;                                                          \
+                                                                             \
+  enum : uint32_t {                                                          \
+    kValid = uint32_t(true),                                                 \
+    kCount = uint32_t(COUNT),                                                \
+    kType = uint32_t(REG_TYPE),                                              \
+    kGroup = uint32_t(GROUP),                                                \
+    kSize = uint32_t(SIZE),                                                  \
+    kTypeId = uint32_t(TYPE_ID),                                             \
+                                                                             \
+    kSignature = (OperandSignature::fromOpType(OperandType::kReg) |          \
+                  OperandSignature::fromRegType(REG_TYPE)         |          \
+                  OperandSignature::fromRegGroup(GROUP)           |          \
+                  OperandSignature::fromSize(kSize)).bits(),                 \
+  };                                                                         \
+}
+
+//! Adds constructors and member functions to a class that implements abstract register. Abstract register is register
+//! that doesn't have type or signature yet, it's a base class like `x86::Reg` or `arm::Reg`.
+#define ASMJIT_DEFINE_ABSTRACT_REG(REG, BASE)                                \
+public:                                                                      \
+  /*! Default constructor that only setups basics. */                        \
+  inline constexpr REG() noexcept                                            \
+    : BASE(Signature{kSignature}, kIdBad) {}                                 \
+                                                                             \
+  /*! Makes a copy of the `other` register operand. */                       \
+  inline constexpr REG(const REG& other) noexcept                            \
+    : BASE(other) {}                                                         \
+                                                                             \
+  /*! Makes a copy of the `other` register having id set to `id` */          \
+  inline constexpr REG(const BaseReg& other, uint32_t id) noexcept           \
+    : BASE(other, id) {}                                                     \
+                                                                             \
+  /*! Creates a register based on `signature` and `id`. */                   \
+  inline constexpr REG(const OperandSignature& sgn, uint32_t id) noexcept    \
+    : BASE(sgn, id) {}                                                       \
+                                                                             \
+  /*! Creates a completely uninitialized REG register operand (garbage). */  \
+  inline explicit REG(Globals::NoInit_) noexcept                             \
+    : BASE(Globals::NoInit) {}                                               \
+                                                                             \
+  /*! Creates a new register from register type and id. */                   \
+  static inline REG fromTypeAndId(RegType type, uint32_t id) noexcept {      \
+    return REG(signatureOf(type), id);                                       \
+  }                                                                          \
+                                                                             \
+  /*! Clones the register operand. */                                        \
+  inline constexpr REG clone() const noexcept { return REG(*this); }         \
+                                                                             \
+  inline REG& operator=(const REG& other) noexcept = default;
+
+//! Adds constructors and member functions to a class that implements final register. Final registers MUST HAVE a valid
+//! signature.
+#define ASMJIT_DEFINE_FINAL_REG(REG, BASE, TRAITS)                           \
+public:                                                                      \
+  enum : uint32_t {                                                          \
+    kThisType  = TRAITS::kType,                                              \
+    kThisGroup = TRAITS::kGroup,                                             \
+    kThisSize  = TRAITS::kSize,                                              \
+    kSignature = TRAITS::kSignature                                          \
+  };                                                                         \
+                                                                             \
+  ASMJIT_DEFINE_ABSTRACT_REG(REG, BASE)                                      \
+                                                                             \
+  /*! Creates a register operand having its id set to `id`. */               \
+  inline constexpr explicit REG(uint32_t id) noexcept                        \
+    : BASE(Signature{kSignature}, id) {}
+//! \endcond
+
+//! Base class for all memory operands.
+//!
+//! The data is split into the following parts:
+//!
+//!   - BASE - Base register or label - requires 36 bits total. 4 bits are used to encode the type of the BASE operand
+//!     (label vs. register type) and the remaining 32 bits define the BASE id, which can be a physical or virtual
+//!     register index. If BASE type is zero, which is never used as a register type and label doesn't use it as well
+//!     then BASE field contains a high DWORD of a possible 64-bit absolute address, which is possible on X64.
+//!
+//!   - INDEX - Index register (or theoretically Label, which doesn't make sense). Encoding is similar to BASE - it
+//!     also requires 36 bits and splits the encoding to INDEX type (4 bits defining the register type) and 32-bit id.
+//!
+//!   - OFFSET - A relative offset of the address. Basically if BASE is specified the relative displacement adjusts
+//!     BASE and an optional INDEX. if BASE is not specified then the OFFSET should be considered as ABSOLUTE address
+//!     (at least on X86). In that case its low 32 bits are stored in DISPLACEMENT field and the remaining high 32
+//!     bits are stored in BASE.
+//!
+//!   - OTHER - There is rest 8 bits that can be used for whatever purpose. For example \ref x86::Mem operand uses
+//!     these bits to store segment override prefix and index shift (or scale).
+class BaseMem : public Operand {
+public:
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a default `BaseMem` operand, that points to [0].
+  inline constexpr BaseMem() noexcept
+      : Operand(Globals::Init, Signature::fromOpType(OperandType::kMem), 0, 0, 0) {}
+
+  //! Creates a `BaseMem` operand that is a clone of `other`.
+  inline constexpr BaseMem(const BaseMem& other) noexcept
+    : Operand(other) {}
+
+  //! Creates a `BaseMem` operand from `baseReg` and `offset`.
+  //!
+  //! \note This is an architecture independent constructor that can be used to create an architecture
+  //! independent memory operand to be used in portable code that can handle multiple architectures.
+  inline constexpr explicit BaseMem(const BaseReg& baseReg, int32_t offset = 0) noexcept
+    : Operand(Globals::Init,
+              Signature::fromOpType(OperandType::kMem) | Signature::fromMemBaseType(baseReg.type()),
+              baseReg.id(),
+              0,
+              uint32_t(offset)) {}
+
+  //! \cond INTERNAL
+  //! Creates a `BaseMem` operand from 4 integers as used by `Operand_` struct.
+  inline constexpr BaseMem(const OperandSignature& u0, uint32_t baseId, uint32_t indexId, int32_t offset) noexcept
+    : Operand(Globals::Init, u0, baseId, indexId, uint32_t(offset)) {}
+  //! \endcond
+
+  //! Creates a completely uninitialized `BaseMem` operand.
+  inline explicit BaseMem(Globals::NoInit_) noexcept
+    : Operand(Globals::NoInit) {}
+
+  //! Resets the memory operand - after the reset the memory points to [0].
+  inline void reset() noexcept {
+    _signature = Signature::fromOpType(OperandType::kMem);
+    _baseId = 0;
+    _data[0] = 0;
+    _data[1] = 0;
+  }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline BaseMem& operator=(const BaseMem& other) noexcept { copyFrom(other); return *this; }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Clones the memory operand.
+  inline constexpr BaseMem clone() const noexcept { return BaseMem(*this); }
+
+  //! Creates a new copy of this memory operand adjusted by `off`.
+  inline BaseMem cloneAdjusted(int64_t off) const noexcept {
+    BaseMem result(*this);
+    result.addOffset(off);
+    return result;
+  }
+
+  //! Tests whether this memory operand is a register home (only used by \ref asmjit_compiler)
+  inline constexpr bool isRegHome() const noexcept { return _signature.hasField<Signature::kMemRegHomeFlag>(); }
+  //! Mark this memory operand as register home (only used by \ref asmjit_compiler).
+  inline void setRegHome() noexcept { _signature |= Signature::kMemRegHomeFlag; }
+  //! Marks this operand to not be a register home (only used by \ref asmjit_compiler).
+  inline void clearRegHome() noexcept { _signature &= ~Signature::kMemRegHomeFlag; }
+
+  //! Tests whether the memory operand has a BASE register or label specified.
+  inline constexpr bool hasBase() const noexcept {
+    return (_signature & Signature::kMemBaseTypeMask) != 0;
+  }
+
+  //! Tests whether the memory operand has an INDEX register specified.
+  inline constexpr bool hasIndex() const noexcept {
+    return (_signature & Signature::kMemIndexTypeMask) != 0;
+  }
+
+  //! Tests whether the memory operand has BASE or INDEX register.
+  inline constexpr bool hasBaseOrIndex() const noexcept {
+    return (_signature & Signature::kMemBaseIndexMask) != 0;
+  }
+
+  //! Tests whether the memory operand has BASE and INDEX register.
+  inline constexpr bool hasBaseAndIndex() const noexcept {
+    return (_signature & Signature::kMemBaseTypeMask) != 0 && (_signature & Signature::kMemIndexTypeMask) != 0;
+  }
+
+  //! Tests whether the BASE operand is a label.
+  inline constexpr bool hasBaseLabel() const noexcept {
+    return _signature.subset(Signature::kMemBaseTypeMask) == Signature::fromMemBaseType(RegType::kLabelTag);
+  }
+
+  //! Tests whether the BASE operand is a register (registers start after `RegType::kLabelTag`).
+  inline constexpr bool hasBaseReg() const noexcept {
+    return _signature.subset(Signature::kMemBaseTypeMask).bits() > Signature::fromMemBaseType(RegType::kLabelTag).bits();
+  }
+
+  //! Tests whether the INDEX operand is a register (registers start after `RegType::kLabelTag`).
+  inline constexpr bool hasIndexReg() const noexcept {
+    return _signature.subset(Signature::kMemIndexTypeMask).bits() > Signature::fromMemIndexType(RegType::kLabelTag).bits();
+  }
+
+  //! Returns the type of the BASE register (0 if this memory operand doesn't use the BASE register).
+  //!
+  //! \note If the returned type is one (a value never associated to a register type) the BASE is not register, but it
+  //! is a label. One equals to `kLabelTag`. You should always check `hasBaseLabel()` before using `baseId()` result.
+  inline constexpr RegType baseType() const noexcept { return _signature.memBaseType(); }
+
+  //! Returns the type of an INDEX register (0 if this memory operand doesn't
+  //! use the INDEX register).
+  inline constexpr RegType indexType() const noexcept { return _signature.memIndexType(); }
+
+  //! This is used internally for BASE+INDEX validation.
+  inline constexpr uint32_t baseAndIndexTypes() const noexcept { return _signature.getField<Signature::kMemBaseIndexMask>(); }
+
+  //! Returns both BASE (4:0 bits) and INDEX (9:5 bits) types combined into a single value.
+  //!
+  //! \remarks Returns id of the BASE register or label (if the BASE was specified as label).
+  inline constexpr uint32_t baseId() const noexcept { return _baseId; }
+
+  //! Returns the id of the INDEX register.
+  inline constexpr uint32_t indexId() const noexcept { return _data[kDataMemIndexId]; }
+
+  //! Sets the id of the BASE register (without modifying its type).
+  inline void setBaseId(uint32_t id) noexcept { _baseId = id; }
+  //! Sets the id of the INDEX register (without modifying its type).
+  inline void setIndexId(uint32_t id) noexcept { _data[kDataMemIndexId] = id; }
+
+  //! Sets the base register to type and id of the given `base` operand.
+  inline void setBase(const BaseReg& base) noexcept { return _setBase(base.type(), base.id()); }
+  //! Sets the index register to type and id of the given `index` operand.
+  inline void setIndex(const BaseReg& index) noexcept { return _setIndex(index.type(), index.id()); }
+
+  //! \cond INTERNAL
+  inline void _setBase(RegType type, uint32_t id) noexcept {
+    _signature.setField<Signature::kMemBaseTypeMask>(uint32_t(type));
+    _baseId = id;
+  }
+
+  inline void _setIndex(RegType type, uint32_t id) noexcept {
+    _signature.setField<Signature::kMemIndexTypeMask>(uint32_t(type));
+    _data[kDataMemIndexId] = id;
+  }
+  //! \endcond
+
+  //! Resets the memory operand's BASE register or label.
+  inline void resetBase() noexcept { _setBase(RegType::kNone, 0); }
+  //! Resets the memory operand's INDEX register.
+  inline void resetIndex() noexcept { _setIndex(RegType::kNone, 0); }
+
+  //! Sets the memory operand size (in bytes).
+  inline void setSize(uint32_t size) noexcept { _signature.setField<Signature::kSizeMask>(size); }
+
+  //! Tests whether the memory operand has a 64-bit offset or absolute address.
+  //!
+  //! If this is true then `hasBase()` must always report false.
+  inline constexpr bool isOffset64Bit() const noexcept { return baseType() == RegType::kNone; }
+
+  //! Tests whether the memory operand has a non-zero offset or absolute address.
+  inline constexpr bool hasOffset() const noexcept {
+    return (_data[kDataMemOffsetLo] | uint32_t(_baseId & Support::bitMaskFromBool<uint32_t>(isOffset64Bit()))) != 0;
+  }
+
+  //! Returns either relative offset or absolute address as 64-bit integer.
+  inline constexpr int64_t offset() const noexcept {
+    return isOffset64Bit() ? int64_t(uint64_t(_data[kDataMemOffsetLo]) | (uint64_t(_baseId) << 32))
+                           : int64_t(int32_t(_data[kDataMemOffsetLo])); // Sign extend 32-bit offset.
+  }
+
+  //! Returns a 32-bit low part of a 64-bit offset or absolute address.
+  inline constexpr int32_t offsetLo32() const noexcept { return int32_t(_data[kDataMemOffsetLo]); }
+  //! Returns a 32-but high part of a 64-bit offset or absolute address.
+  //!
+  //! \note This function is UNSAFE and returns garbage if `isOffset64Bit()`
+  //! returns false. Never use it blindly without checking it first.
+  inline constexpr int32_t offsetHi32() const noexcept { return int32_t(_baseId); }
+
+  //! Sets a 64-bit offset or an absolute address to `offset`.
+  //!
+  //! \note This functions attempts to set both high and low parts of a 64-bit offset, however, if the operand has
+  //! a BASE register it will store only the low 32 bits of the offset / address as there is no way to store both
+  //! BASE and 64-bit offset, and there is currently no architecture that has such capability targeted by AsmJit.
+  inline void setOffset(int64_t offset) noexcept {
+    uint32_t lo = uint32_t(uint64_t(offset) & 0xFFFFFFFFu);
+    uint32_t hi = uint32_t(uint64_t(offset) >> 32);
+    uint32_t hiMsk = Support::bitMaskFromBool<uint32_t>(isOffset64Bit());
+
+    _data[kDataMemOffsetLo] = lo;
+    _baseId = (hi & hiMsk) | (_baseId & ~hiMsk);
+  }
+  //! Sets a low 32-bit offset to `offset` (don't use without knowing how BaseMem works).
+  inline void setOffsetLo32(int32_t offset) noexcept { _data[kDataMemOffsetLo] = uint32_t(offset); }
+
+  //! Adjusts the offset by `offset`.
+  //!
+  //! \note This is a fast function that doesn't use the HI 32-bits of a 64-bit offset. Use it only if you know that
+  //! there is a BASE register and the offset is only 32 bits anyway.
+
+  //! Adjusts the memory operand offset by a `offset`.
+  inline void addOffset(int64_t offset) noexcept {
+    if (isOffset64Bit()) {
+      int64_t result = offset + int64_t(uint64_t(_data[kDataMemOffsetLo]) | (uint64_t(_baseId) << 32));
+      _data[kDataMemOffsetLo] = uint32_t(uint64_t(result) & 0xFFFFFFFFu);
+      _baseId                 = uint32_t(uint64_t(result) >> 32);
+    }
+    else {
+      _data[kDataMemOffsetLo] += uint32_t(uint64_t(offset) & 0xFFFFFFFFu);
+    }
+  }
+
+  //! Adds `offset` to a low 32-bit offset part (don't use without knowing how BaseMem works).
+  inline void addOffsetLo32(int32_t offset) noexcept { _data[kDataMemOffsetLo] += uint32_t(offset); }
+
+  //! Resets the memory offset to zero.
+  inline void resetOffset() noexcept { setOffset(0); }
+
+  //! Resets the lo part of the memory offset to zero (don't use without knowing how BaseMem works).
+  inline void resetOffsetLo32() noexcept { setOffsetLo32(0); }
+
+  //! \}
+};
+
+//! Type of the an immediate value.
+enum class ImmType : uint32_t {
+  //! Immediate is integer.
+  kInt = 0,
+  //! Immediate is a floating point stored as double-precision.
+  kDouble = 1
+};
+
+//! Immediate operands are encoded with instruction data.
+class Imm : public Operand {
+public:
+  //! \cond INTERNAL
+  template<typename T>
+  struct IsConstexprConstructibleAsImmType
+    : public std::integral_constant<bool, std::is_enum<T>::value ||
+                                          std::is_pointer<T>::value ||
+                                          std::is_integral<T>::value ||
+                                          std::is_function<T>::value> {};
+
+  template<typename T>
+  struct IsConvertibleToImmType
+    : public std::integral_constant<bool, IsConstexprConstructibleAsImmType<T>::value ||
+                                          std::is_floating_point<T>::value> {};
+  //! \endcond
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new immediate value (initial value is 0).
+  inline constexpr Imm() noexcept
+    : Operand(Globals::Init, Signature::fromOpType(OperandType::kImm), 0, 0, 0) {}
+
+  //! Creates a new immediate value from `other`.
+  inline constexpr Imm(const Imm& other) noexcept
+    : Operand(other) {}
+
+  //! Creates a new immediate value from ARM/AArch64 specific `shift`.
+  inline constexpr Imm(const arm::Shift& shift) noexcept
+    : Operand(Globals::Init,
+              Signature::fromOpType(OperandType::kImm) | Signature::fromPredicate(uint32_t(shift.op())),
+              0,
+              Support::unpackU32At0(shift.value()),
+              Support::unpackU32At1(shift.value())) {}
+
+  //! Creates a new signed immediate value, assigning the value to `val` and an architecture-specific predicate
+  //! to `predicate`.
+  //!
+  //! \note Predicate is currently only used by ARM architectures.
+  template<typename T, typename = typename std::enable_if<IsConstexprConstructibleAsImmType<typename std::decay<T>::type>::value>::type>
+  inline constexpr Imm(const T& val, const uint32_t predicate = 0) noexcept
+    : Operand(Globals::Init,
+              Signature::fromOpType(OperandType::kImm) | Signature::fromPredicate(predicate),
+              0,
+              Support::unpackU32At0(int64_t(val)),
+              Support::unpackU32At1(int64_t(val))) {}
+
+  inline Imm(const float& val, const uint32_t predicate = 0) noexcept
+    : Operand(Globals::Init,
+              Signature::fromOpType(OperandType::kImm) | Signature::fromPredicate(predicate),
+              0,
+              0,
+              0) { setValue(val); }
+
+  inline Imm(const double& val, const uint32_t predicate = 0) noexcept
+    : Operand(Globals::Init,
+              Signature::fromOpType(OperandType::kImm) | Signature::fromPredicate(predicate),
+              0,
+              0,
+              0) { setValue(val); }
+
+  inline explicit Imm(Globals::NoInit_) noexcept
+    : Operand(Globals::NoInit) {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  //! Assigns the value of the `other` operand to this immediate.
+  inline Imm& operator=(const Imm& other) noexcept { copyFrom(other); return *this; }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns immediate type.
+  inline constexpr ImmType type() const noexcept { return (ImmType)_signature.getField<Signature::kImmTypeMask>(); }
+  //! Sets the immediate type to `type`.
+  inline void setType(ImmType type) noexcept { _signature.setField<Signature::kImmTypeMask>(uint32_t(type)); }
+  //! Resets immediate type to \ref ImmType::kInt.
+  inline void resetType() noexcept { setType(ImmType::kInt); }
+
+  //! Returns operation predicate of the immediate.
+  //!
+  //! The meaning depends on architecture, for example on ARM hardware this describes \ref arm::ShiftOp
+  //! of the immediate.
+  inline constexpr uint32_t predicate() const noexcept { return _signature.getField<Signature::kPredicateMask>(); }
+
+  //! Sets operation predicate of the immediate to `predicate`.
+  //!
+  //! The meaning depends on architecture, for example on ARM hardware this describes \ref arm::ShiftOp
+  //! of the immediate.
+  inline void setPredicate(uint32_t predicate) noexcept { _signature.setField<Signature::kPredicateMask>(predicate); }
+
+  //! Resets the shift operation type of the immediate to the default value (no operation).
+  inline void resetPredicate() noexcept { _signature.setField<Signature::kPredicateMask>(0); }
+
+  //! Returns the immediate value as `int64_t`, which is the internal format Imm uses.
+  inline constexpr int64_t value() const noexcept {
+    return int64_t((uint64_t(_data[kDataImmValueHi]) << 32) | _data[kDataImmValueLo]);
+  }
+
+  //! Tests whether this immediate value is integer of any size.
+  inline constexpr uint32_t isInt() const noexcept { return type() == ImmType::kInt; }
+  //! Tests whether this immediate value is a double precision floating point value.
+  inline constexpr uint32_t isDouble() const noexcept { return type() == ImmType::kDouble; }
+
+  //! Tests whether the immediate can be casted to 8-bit signed integer.
+  inline constexpr bool isInt8() const noexcept { return type() == ImmType::kInt && Support::isInt8(value()); }
+  //! Tests whether the immediate can be casted to 8-bit unsigned integer.
+  inline constexpr bool isUInt8() const noexcept { return type() == ImmType::kInt && Support::isUInt8(value()); }
+  //! Tests whether the immediate can be casted to 16-bit signed integer.
+  inline constexpr bool isInt16() const noexcept { return type() == ImmType::kInt && Support::isInt16(value()); }
+  //! Tests whether the immediate can be casted to 16-bit unsigned integer.
+  inline constexpr bool isUInt16() const noexcept { return type() == ImmType::kInt && Support::isUInt16(value()); }
+  //! Tests whether the immediate can be casted to 32-bit signed integer.
+  inline constexpr bool isInt32() const noexcept { return type() == ImmType::kInt && Support::isInt32(value()); }
+  //! Tests whether the immediate can be casted to 32-bit unsigned integer.
+  inline constexpr bool isUInt32() const noexcept { return type() == ImmType::kInt && _data[kDataImmValueHi] == 0; }
+
+  //! Returns the immediate value casted to `T`.
+  //!
+  //! The value is masked before it's casted to `T` so the returned value is simply the representation of `T`
+  //! considering the original value's lowest bits.
+  template<typename T>
+  inline T valueAs() const noexcept { return Support::immediateToT<T>(value()); }
+
+  //! Returns low 32-bit signed integer.
+  inline constexpr int32_t int32Lo() const noexcept { return int32_t(_data[kDataImmValueLo]); }
+  //! Returns high 32-bit signed integer.
+  inline constexpr int32_t int32Hi() const noexcept { return int32_t(_data[kDataImmValueHi]); }
+  //! Returns low 32-bit signed integer.
+  inline constexpr uint32_t uint32Lo() const noexcept { return _data[kDataImmValueLo]; }
+  //! Returns high 32-bit signed integer.
+  inline constexpr uint32_t uint32Hi() const noexcept { return _data[kDataImmValueHi]; }
+
+  //! Sets immediate value to `val`, the value is casted to a signed 64-bit integer.
+  template<typename T>
+  inline void setValue(const T& val) noexcept {
+    _setValueInternal(Support::immediateFromT(val), std::is_floating_point<T>::value ? ImmType::kDouble : ImmType::kInt);
+  }
+
+  inline void _setValueInternal(int64_t val, ImmType type) noexcept {
+    setType(type);
+    _data[kDataImmValueHi] = uint32_t(uint64_t(val) >> 32);
+    _data[kDataImmValueLo] = uint32_t(uint64_t(val) & 0xFFFFFFFFu);
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Clones the immediate operand.
+  inline constexpr Imm clone() const noexcept { return Imm(*this); }
+
+  inline void signExtend8Bits() noexcept { setValue(int64_t(valueAs<int8_t>())); }
+  inline void signExtend16Bits() noexcept { setValue(int64_t(valueAs<int16_t>())); }
+  inline void signExtend32Bits() noexcept { setValue(int64_t(valueAs<int32_t>())); }
+
+  inline void zeroExtend8Bits() noexcept { setValue(valueAs<uint8_t>()); }
+  inline void zeroExtend16Bits() noexcept { setValue(valueAs<uint16_t>()); }
+  inline void zeroExtend32Bits() noexcept { _data[kDataImmValueHi] = 0u; }
+
+  //! \}
+};
+
+//! Creates a new immediate operand.
+template<typename T>
+static inline constexpr Imm imm(const T& val) noexcept { return Imm(val); }
+
+//! \}
+
+namespace Globals {
+  //! \ingroup asmjit_assembler
+  //!
+  //! A default-constructed operand of `Operand_::kOpNone` type.
+  static constexpr const Operand none;
+}
+
+//! \cond INTERNAL
+namespace Support {
+
+template<typename T, bool kIsImm>
+struct ForwardOpImpl {
+  static inline const T& forward(const T& value) noexcept { return value; }
+};
+
+template<typename T>
+struct ForwardOpImpl<T, true> {
+  static inline Imm forward(const T& value) noexcept { return Imm(value); }
+};
+
+//! Either forwards operand T or returns a new operand that wraps it if T is a type convertible to operand.
+//! At the moment this is only used to convert integers, floats, and enumarations to \ref Imm operands.
+template<typename T>
+struct ForwardOp : public ForwardOpImpl<T, Imm::IsConvertibleToImmType<typename std::decay<T>::type>::value> {};
+
+} // {Support}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_OPERAND_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/osutils.cpp b/lib/lepton/asmjit/core/osutils.cpp
new file mode 100644
index 0000000000..fa900bfbb4
--- /dev/null
+++ b/lib/lepton/asmjit/core/osutils.cpp
@@ -0,0 +1,84 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/osutils.h"
+#include "../core/support.h"
+
+#if defined(_WIN32)
+  #include <atomic>
+#elif defined(__APPLE__)
+  #include <mach/mach_time.h>
+#else
+  #include <time.h>
+  #include <unistd.h>
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+uint32_t OSUtils::getTickCount() noexcept {
+#if defined(_WIN32)
+  enum HiResStatus : uint32_t {
+    kHiResUnknown      = 0,
+    kHiResAvailable    = 1,
+    kHiResNotAvailable = 2
+  };
+
+  static std::atomic<uint32_t> _hiResStatus(kHiResUnknown);
+  static volatile double _hiResFreq(0);
+
+  uint32_t status = _hiResStatus.load();
+  LARGE_INTEGER now, qpf;
+
+  if (status != kHiResNotAvailable && ::QueryPerformanceCounter(&now)) {
+    double freq = _hiResFreq;
+    if (status == kHiResUnknown) {
+      // Detects the availability of high resolution counter.
+      if (::QueryPerformanceFrequency(&qpf)) {
+        freq = double(qpf.QuadPart) / 1000.0;
+        _hiResFreq = freq;
+        _hiResStatus.compare_exchange_strong(status, kHiResAvailable);
+        status = kHiResAvailable;
+      }
+      else {
+        // High resolution not available.
+        _hiResStatus.compare_exchange_strong(status, kHiResNotAvailable);
+      }
+    }
+
+    if (status == kHiResAvailable)
+      return uint32_t(uint64_t(int64_t(double(now.QuadPart) / freq)) & 0xFFFFFFFFu);
+  }
+
+  // Bail to `GetTickCount()` if we cannot use high resolution.
+  return ::GetTickCount();
+#elif defined(__APPLE__)
+  // See Apple's QA1398.
+  static mach_timebase_info_data_t _machTime;
+
+  uint32_t denom = _machTime.denom;
+  if (ASMJIT_UNLIKELY(!denom)) {
+    if (mach_timebase_info(&_machTime) != KERN_SUCCESS || !(denom = _machTime.denom))
+      return 0;
+  }
+
+  // `mach_absolute_time()` returns nanoseconds, we want milliseconds.
+  uint64_t t = mach_absolute_time() / 1000000u;
+  t = (t * _machTime.numer) / _machTime.denom;
+  return uint32_t(t & 0xFFFFFFFFu);
+#elif defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0
+  struct timespec ts;
+  if (ASMJIT_UNLIKELY(clock_gettime(CLOCK_MONOTONIC, &ts) != 0))
+    return 0;
+
+  uint64_t t = (uint64_t(ts.tv_sec ) * 1000u) + (uint64_t(ts.tv_nsec) / 1000000u);
+  return uint32_t(t & 0xFFFFFFFFu);
+#else
+  #pragma message("asmjit::OSUtils::getTickCount() doesn't have implementation for the target OS.")
+  return 0;
+#endif
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/osutils.h b/lib/lepton/asmjit/core/osutils.h
new file mode 100644
index 0000000000..3c5c3d94c1
--- /dev/null
+++ b/lib/lepton/asmjit/core/osutils.h
@@ -0,0 +1,61 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_OSUTILS_H_INCLUDED
+#define ASMJIT_CORE_OSUTILS_H_INCLUDED
+
+#include "../core/globals.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_utilities
+//! \{
+
+//! Operating system utilities.
+namespace OSUtils {
+  //! Gets the current CPU tick count, used for benchmarking (1ms resolution).
+  ASMJIT_API uint32_t getTickCount() noexcept;
+};
+
+
+//! \cond INTERNAL
+//! Lock.
+//!
+//! Lock is internal, it cannot be used outside of AsmJit, however, its internal
+//! layout is exposed as it's used by some other classes, which are public.
+class Lock {
+public:
+  ASMJIT_NONCOPYABLE(Lock)
+
+#if defined(_WIN32)
+#pragma pack(push, 8)
+  struct ASMJIT_MAY_ALIAS Handle {
+    void* DebugInfo;
+    long LockCount;
+    long RecursionCount;
+    void* OwningThread;
+    void* LockSemaphore;
+    unsigned long* SpinCount;
+  };
+  Handle _handle;
+#pragma pack(pop)
+#elif !defined(__EMSCRIPTEN__)
+  typedef pthread_mutex_t Handle;
+  Handle _handle;
+#endif
+
+  ASMJIT_FORCE_INLINE Lock() noexcept;
+  ASMJIT_FORCE_INLINE ~Lock() noexcept;
+
+  ASMJIT_FORCE_INLINE void lock() noexcept;
+  ASMJIT_FORCE_INLINE void unlock() noexcept;
+};
+//! \endcond
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_OSUTILS_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/osutils_p.h b/lib/lepton/asmjit/core/osutils_p.h
new file mode 100644
index 0000000000..fd87e73112
--- /dev/null
+++ b/lib/lepton/asmjit/core/osutils_p.h
@@ -0,0 +1,68 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_OSUTILS_P_H_INCLUDED
+#define ASMJIT_CORE_OSUTILS_P_H_INCLUDED
+
+#include "../core/osutils.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_utilities
+//! \{
+
+#if defined(_WIN32)
+
+// Windows implementation.
+static_assert(sizeof(Lock::Handle) == sizeof(CRITICAL_SECTION), "asmjit::Lock::Handle layout must match CRITICAL_SECTION");
+static_assert(alignof(Lock::Handle) == alignof(CRITICAL_SECTION), "asmjit::Lock::Handle alignment must match CRITICAL_SECTION");
+
+ASMJIT_FORCE_INLINE Lock::Lock() noexcept { InitializeCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&_handle)); }
+ASMJIT_FORCE_INLINE Lock::~Lock() noexcept { DeleteCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&_handle)); }
+ASMJIT_FORCE_INLINE void Lock::lock() noexcept { EnterCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&_handle)); }
+ASMJIT_FORCE_INLINE void Lock::unlock() noexcept { LeaveCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&_handle)); }
+
+#elif !defined(__EMSCRIPTEN__)
+
+// PThread implementation.
+#ifdef PTHREAD_MUTEX_INITIALIZER
+ASMJIT_FORCE_INLINE Lock::Lock() noexcept : _handle(PTHREAD_MUTEX_INITIALIZER) {}
+#else
+ASMJIT_FORCE_INLINE Lock::Lock() noexcept { pthread_mutex_init(&_handle, nullptr); }
+#endif
+ASMJIT_FORCE_INLINE Lock::~Lock() noexcept { pthread_mutex_destroy(&_handle); }
+ASMJIT_FORCE_INLINE void Lock::lock() noexcept { pthread_mutex_lock(&_handle); }
+ASMJIT_FORCE_INLINE void Lock::unlock() noexcept { pthread_mutex_unlock(&_handle); }
+
+#else
+
+// Dummy implementation - Emscripten or other unsupported platform.
+ASMJIT_FORCE_INLINE Lock::Lock() noexcept {}
+ASMJIT_FORCE_INLINE Lock::~Lock() noexcept {}
+ASMJIT_FORCE_INLINE void Lock::lock() noexcept {}
+ASMJIT_FORCE_INLINE void Lock::unlock() noexcept {}
+
+#endif
+
+//! Scoped lock.
+class LockGuard {
+public:
+  ASMJIT_NONCOPYABLE(LockGuard)
+
+  Lock& _target;
+
+  inline LockGuard(Lock& target) noexcept
+    : _target(target) { _target.lock(); }
+  inline ~LockGuard() noexcept { _target.unlock(); }
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_OSUTILS_P_H_INCLUDED
+
diff --git a/lib/lepton/asmjit/core/raassignment_p.h b/lib/lepton/asmjit/core/raassignment_p.h
new file mode 100644
index 0000000000..5418329311
--- /dev/null
+++ b/lib/lepton/asmjit/core/raassignment_p.h
@@ -0,0 +1,418 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_RAASSIGNMENT_P_H_INCLUDED
+#define ASMJIT_CORE_RAASSIGNMENT_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/radefs_p.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_ra
+//! \{
+
+//! Holds the current register assignment.
+//!
+//! Has two purposes:
+//!
+//!   1. Holds register assignment of a local register allocator (see \ref RALocalAllocator).
+//!   2. Holds register assignment of the entry of basic blocks (see \ref RABlock).
+class RAAssignment {
+public:
+  ASMJIT_NONCOPYABLE(RAAssignment)
+
+  enum Ids : uint32_t {
+    kPhysNone = 0xFF,
+    kWorkNone = RAWorkReg::kIdNone
+  };
+
+  enum DirtyBit : uint32_t {
+    kClean = 0,
+    kDirty = 1
+  };
+
+  struct Layout {
+    //! Index of architecture registers per group.
+    RARegIndex physIndex;
+    //! Count of architecture registers per group.
+    RARegCount physCount;
+    //! Count of physical registers of all groups.
+    uint32_t physTotal;
+    //! Count of work registers.
+    uint32_t workCount;
+    //! WorkRegs data (vector).
+    const RAWorkRegs* workRegs;
+
+    inline void reset() noexcept {
+      physIndex.reset();
+      physCount.reset();
+      physTotal = 0;
+      workCount = 0;
+      workRegs = nullptr;
+    }
+  };
+
+  struct PhysToWorkMap {
+    //! Assigned registers (each bit represents one physical reg).
+    RARegMask assigned;
+    //! Dirty registers (spill slot out of sync or no spill slot).
+    RARegMask dirty;
+    //! PhysReg to WorkReg mapping.
+    uint32_t workIds[1 /* ... */];
+
+    static inline size_t sizeOf(size_t count) noexcept {
+      return sizeof(PhysToWorkMap) - sizeof(uint32_t) + count * sizeof(uint32_t);
+    }
+
+    inline void reset(size_t count) noexcept {
+      assigned.reset();
+      dirty.reset();
+
+      for (size_t i = 0; i < count; i++)
+        workIds[i] = kWorkNone;
+    }
+
+    inline void copyFrom(const PhysToWorkMap* other, size_t count) noexcept {
+      size_t size = sizeOf(count);
+      memcpy(this, other, size);
+    }
+
+    inline void unassign(RegGroup group, uint32_t physId, uint32_t indexInWorkIds) noexcept {
+      assigned.clear(group, Support::bitMask(physId));
+      dirty.clear(group, Support::bitMask(physId));
+      workIds[indexInWorkIds] = kWorkNone;
+    }
+  };
+
+  struct WorkToPhysMap {
+    //! WorkReg to PhysReg mapping
+    uint8_t physIds[1 /* ... */];
+
+    static inline size_t sizeOf(size_t count) noexcept {
+      return size_t(count) * sizeof(uint8_t);
+    }
+
+    inline void reset(size_t count) noexcept {
+      for (size_t i = 0; i < count; i++)
+        physIds[i] = kPhysNone;
+    }
+
+    inline void copyFrom(const WorkToPhysMap* other, size_t count) noexcept {
+      size_t size = sizeOf(count);
+      if (ASMJIT_LIKELY(size))
+        memcpy(this, other, size);
+    }
+  };
+
+  //! \name Members
+  //! \{
+
+  //! Physical registers layout.
+  Layout _layout;
+  //! WorkReg to PhysReg mapping.
+  WorkToPhysMap* _workToPhysMap;
+  //! PhysReg to WorkReg mapping and assigned/dirty bits.
+  PhysToWorkMap* _physToWorkMap;
+  //! Optimization to translate PhysRegs to WorkRegs faster.
+  Support::Array<uint32_t*, Globals::kNumVirtGroups> _physToWorkIds;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RAAssignment() noexcept {
+    _layout.reset();
+    resetMaps();
+  }
+
+  ASMJIT_FORCE_INLINE void initLayout(const RARegCount& physCount, const RAWorkRegs& workRegs) noexcept {
+    // Layout must be initialized before data.
+    ASMJIT_ASSERT(_physToWorkMap == nullptr);
+    ASMJIT_ASSERT(_workToPhysMap == nullptr);
+
+    _layout.physIndex.buildIndexes(physCount);
+    _layout.physCount = physCount;
+    _layout.physTotal = uint32_t(_layout.physIndex[RegGroup::kMaxVirt]) +
+                        uint32_t(_layout.physCount[RegGroup::kMaxVirt]) ;
+    _layout.workCount = workRegs.size();
+    _layout.workRegs = &workRegs;
+  }
+
+  ASMJIT_FORCE_INLINE void initMaps(PhysToWorkMap* physToWorkMap, WorkToPhysMap* workToPhysMap) noexcept {
+    _physToWorkMap = physToWorkMap;
+    _workToPhysMap = workToPhysMap;
+    for (RegGroup group : RegGroupVirtValues{})
+      _physToWorkIds[group] = physToWorkMap->workIds + _layout.physIndex.get(group);
+  }
+
+  ASMJIT_FORCE_INLINE void resetMaps() noexcept {
+    _physToWorkMap = nullptr;
+    _workToPhysMap = nullptr;
+    _physToWorkIds.fill(nullptr);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline PhysToWorkMap* physToWorkMap() const noexcept { return _physToWorkMap; }
+  inline WorkToPhysMap* workToPhysMap() const noexcept { return _workToPhysMap; }
+
+  inline RARegMask& assigned() noexcept { return _physToWorkMap->assigned; }
+  inline const RARegMask& assigned() const noexcept { return _physToWorkMap->assigned; }
+  inline uint32_t assigned(RegGroup group) const noexcept { return _physToWorkMap->assigned[group]; }
+
+  inline RARegMask& dirty() noexcept { return _physToWorkMap->dirty; }
+  inline const RARegMask& dirty() const noexcept { return _physToWorkMap->dirty; }
+  inline RegMask dirty(RegGroup group) const noexcept { return _physToWorkMap->dirty[group]; }
+
+  inline uint32_t workToPhysId(RegGroup group, uint32_t workId) const noexcept {
+    DebugUtils::unused(group);
+    ASMJIT_ASSERT(workId != kWorkNone);
+    ASMJIT_ASSERT(workId < _layout.workCount);
+    return _workToPhysMap->physIds[workId];
+  }
+
+  inline uint32_t physToWorkId(RegGroup group, uint32_t physId) const noexcept {
+    ASMJIT_ASSERT(physId < Globals::kMaxPhysRegs);
+    return _physToWorkIds[group][physId];
+  }
+
+  inline bool isPhysAssigned(RegGroup group, uint32_t physId) const noexcept {
+    ASMJIT_ASSERT(physId < Globals::kMaxPhysRegs);
+    return Support::bitTest(_physToWorkMap->assigned[group], physId);
+  }
+
+  inline bool isPhysDirty(RegGroup group, uint32_t physId) const noexcept {
+    ASMJIT_ASSERT(physId < Globals::kMaxPhysRegs);
+    return Support::bitTest(_physToWorkMap->dirty[group], physId);
+  }
+
+  //! \}
+
+  //! \name Assignment
+  //!
+  //! These are low-level allocation helpers that are used to update the current mappings between physical and
+  //! virt/work registers and also to update masks that represent allocated and dirty registers. These functions
+  //! don't emit any code; they are only used to update and keep all mappings in sync.
+  //!
+  //! \{
+
+  //! Assign [VirtReg/WorkReg] to a physical register.
+  inline void assign(RegGroup group, uint32_t workId, uint32_t physId, bool dirty) noexcept {
+    ASMJIT_ASSERT(workToPhysId(group, workId) == kPhysNone);
+    ASMJIT_ASSERT(physToWorkId(group, physId) == kWorkNone);
+    ASMJIT_ASSERT(!isPhysAssigned(group, physId));
+    ASMJIT_ASSERT(!isPhysDirty(group, physId));
+
+    _workToPhysMap->physIds[workId] = uint8_t(physId);
+    _physToWorkIds[group][physId] = workId;
+
+    RegMask regMask = Support::bitMask(physId);
+    _physToWorkMap->assigned[group] |= regMask;
+    _physToWorkMap->dirty[group] |= regMask & Support::bitMaskFromBool<RegMask>(dirty);
+
+    verify();
+  }
+
+  //! Reassign [VirtReg/WorkReg] to `dstPhysId` from `srcPhysId`.
+  inline void reassign(RegGroup group, uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
+    ASMJIT_ASSERT(dstPhysId != srcPhysId);
+    ASMJIT_ASSERT(workToPhysId(group, workId) == srcPhysId);
+    ASMJIT_ASSERT(physToWorkId(group, srcPhysId) == workId);
+    ASMJIT_ASSERT(isPhysAssigned(group, srcPhysId) == true);
+    ASMJIT_ASSERT(isPhysAssigned(group, dstPhysId) == false);
+
+    _workToPhysMap->physIds[workId] = uint8_t(dstPhysId);
+    _physToWorkIds[group][srcPhysId] = kWorkNone;
+    _physToWorkIds[group][dstPhysId] = workId;
+
+    RegMask srcMask = Support::bitMask(srcPhysId);
+    RegMask dstMask = Support::bitMask(dstPhysId);
+
+    bool dirty = (_physToWorkMap->dirty[group] & srcMask) != 0;
+    RegMask regMask = dstMask | srcMask;
+
+    _physToWorkMap->assigned[group] ^= regMask;
+    _physToWorkMap->dirty[group] ^= regMask & Support::bitMaskFromBool<RegMask>(dirty);
+
+    verify();
+  }
+
+  inline void swap(RegGroup group, uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept {
+    ASMJIT_ASSERT(aPhysId != bPhysId);
+    ASMJIT_ASSERT(workToPhysId(group, aWorkId) == aPhysId);
+    ASMJIT_ASSERT(workToPhysId(group, bWorkId) == bPhysId);
+    ASMJIT_ASSERT(physToWorkId(group, aPhysId) == aWorkId);
+    ASMJIT_ASSERT(physToWorkId(group, bPhysId) == bWorkId);
+    ASMJIT_ASSERT(isPhysAssigned(group, aPhysId));
+    ASMJIT_ASSERT(isPhysAssigned(group, bPhysId));
+
+    _workToPhysMap->physIds[aWorkId] = uint8_t(bPhysId);
+    _workToPhysMap->physIds[bWorkId] = uint8_t(aPhysId);
+    _physToWorkIds[group][aPhysId] = bWorkId;
+    _physToWorkIds[group][bPhysId] = aWorkId;
+
+    RegMask aMask = Support::bitMask(aPhysId);
+    RegMask bMask = Support::bitMask(bPhysId);
+    RegMask flipMask = Support::bitMaskFromBool<RegMask>(((_physToWorkMap->dirty[group] & aMask) != 0) ^ ((_physToWorkMap->dirty[group] & bMask) != 0));
+    RegMask regMask = aMask | bMask;
+    _physToWorkMap->dirty[group] ^= regMask & flipMask;
+
+    verify();
+  }
+
+  //! Unassign [VirtReg/WorkReg] from a physical register.
+  inline void unassign(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    ASMJIT_ASSERT(physId < Globals::kMaxPhysRegs);
+    ASMJIT_ASSERT(workToPhysId(group, workId) == physId);
+    ASMJIT_ASSERT(physToWorkId(group, physId) == workId);
+    ASMJIT_ASSERT(isPhysAssigned(group, physId));
+
+    _workToPhysMap->physIds[workId] = kPhysNone;
+    _physToWorkIds[group][physId] = kWorkNone;
+
+    RegMask regMask = Support::bitMask(physId);
+    _physToWorkMap->assigned[group] &= ~regMask;
+    _physToWorkMap->dirty[group] &= ~regMask;
+
+    verify();
+  }
+
+  inline void makeClean(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    DebugUtils::unused(workId);
+    RegMask regMask = Support::bitMask(physId);
+    _physToWorkMap->dirty[group] &= ~regMask;
+  }
+
+  inline void makeDirty(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    DebugUtils::unused(workId);
+    RegMask regMask = Support::bitMask(physId);
+    _physToWorkMap->dirty[group] |= regMask;
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  ASMJIT_FORCE_INLINE void swap(RAAssignment& other) noexcept {
+    std::swap(_workToPhysMap, other._workToPhysMap);
+    std::swap(_physToWorkMap, other._physToWorkMap);
+    _physToWorkIds.swap(other._physToWorkIds);
+  }
+
+  inline void assignWorkIdsFromPhysIds() noexcept {
+    memset(_workToPhysMap, uint8_t(BaseReg::kIdBad), WorkToPhysMap::sizeOf(_layout.workCount));
+
+    for (RegGroup group : RegGroupVirtValues{}) {
+      uint32_t physBaseIndex = _layout.physIndex[group];
+      Support::BitWordIterator<RegMask> it(_physToWorkMap->assigned[group]);
+
+      while (it.hasNext()) {
+        uint32_t physId = it.next();
+        uint32_t workId = _physToWorkMap->workIds[physBaseIndex + physId];
+
+        ASMJIT_ASSERT(workId != kWorkNone);
+        _workToPhysMap->physIds[workId] = uint8_t(physId);
+      }
+    }
+  }
+
+  inline void copyFrom(const PhysToWorkMap* physToWorkMap) noexcept {
+    memcpy(_physToWorkMap, physToWorkMap, PhysToWorkMap::sizeOf(_layout.physTotal));
+    assignWorkIdsFromPhysIds();
+  }
+
+  inline void copyFrom(const PhysToWorkMap* physToWorkMap, const WorkToPhysMap* workToPhysMap) noexcept {
+    memcpy(_physToWorkMap, physToWorkMap, PhysToWorkMap::sizeOf(_layout.physTotal));
+    memcpy(_workToPhysMap, workToPhysMap, WorkToPhysMap::sizeOf(_layout.workCount));
+  }
+
+  inline void copyFrom(const RAAssignment& other) noexcept {
+    copyFrom(other.physToWorkMap(), other.workToPhysMap());
+  }
+
+  // Not really useful outside of debugging.
+  bool equals(const RAAssignment& other) const noexcept {
+    // Layout should always match.
+    if (_layout.physIndex != other._layout.physIndex ||
+        _layout.physCount != other._layout.physCount ||
+        _layout.physTotal != other._layout.physTotal ||
+        _layout.workCount != other._layout.workCount ||
+        _layout.workRegs  != other._layout.workRegs)
+      return false;
+
+    uint32_t physTotal = _layout.physTotal;
+    uint32_t workCount = _layout.workCount;
+
+    for (uint32_t physId = 0; physId < physTotal; physId++) {
+      uint32_t thisWorkId = _physToWorkMap->workIds[physId];
+      uint32_t otherWorkId = other._physToWorkMap->workIds[physId];
+      if (thisWorkId != otherWorkId)
+        return false;
+    }
+
+    for (uint32_t workId = 0; workId < workCount; workId++) {
+      uint32_t thisPhysId = _workToPhysMap->physIds[workId];
+      uint32_t otherPhysId = other._workToPhysMap->physIds[workId];
+      if (thisPhysId != otherPhysId)
+        return false;
+    }
+
+    if (_physToWorkMap->assigned != other._physToWorkMap->assigned ||
+        _physToWorkMap->dirty    != other._physToWorkMap->dirty    )
+      return false;
+
+    return true;
+  }
+
+#if defined(ASMJIT_BUILD_DEBUG)
+  ASMJIT_NOINLINE void verify() noexcept {
+    // Verify WorkToPhysMap.
+    {
+      for (uint32_t workId = 0; workId < _layout.workCount; workId++) {
+        uint32_t physId = _workToPhysMap->physIds[workId];
+        if (physId != kPhysNone) {
+          const RAWorkReg* workReg = _layout.workRegs->at(workId);
+          RegGroup group = workReg->group();
+          ASMJIT_ASSERT(_physToWorkIds[group][physId] == workId);
+        }
+      }
+    }
+
+    // Verify PhysToWorkMap.
+    {
+      for (RegGroup group : RegGroupVirtValues{}) {
+        uint32_t physCount = _layout.physCount[group];
+        for (uint32_t physId = 0; physId < physCount; physId++) {
+          uint32_t workId = _physToWorkIds[group][physId];
+          if (workId != kWorkNone) {
+            ASMJIT_ASSERT(_workToPhysMap->physIds[workId] == physId);
+          }
+        }
+      }
+    }
+  }
+#else
+  inline void verify() noexcept {}
+#endif
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_CORE_RAASSIGNMENT_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/rabuilders_p.h b/lib/lepton/asmjit/core/rabuilders_p.h
new file mode 100644
index 0000000000..1b763030c4
--- /dev/null
+++ b/lib/lepton/asmjit/core/rabuilders_p.h
@@ -0,0 +1,612 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_RABUILDERS_P_H_INCLUDED
+#define ASMJIT_CORE_RABUILDERS_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/formatter.h"
+#include "../core/rapass_p.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_ra
+//! \{
+
+template<typename This>
+class RACFGBuilderT {
+public:
+  enum : uint32_t {
+    kRootIndentation = 2,
+    kCodeIndentation = 4,
+
+    // NOTE: This is a bit hacky. There are some nodes which are processed twice (see `onBeforeInvoke()` and
+    // `onBeforeRet()`) as they can insert some nodes around them. Since we don't have any flags to mark these
+    // we just use their position that is [at that time] unassigned.
+    kNodePositionDidOnBefore = 0xFFFFFFFFu
+  };
+
+  //! \name Members
+  //! \{
+
+  BaseRAPass* _pass = nullptr;
+  BaseCompiler* _cc = nullptr;
+  RABlock* _curBlock = nullptr;
+  RABlock* _retBlock = nullptr;
+  FuncNode* _funcNode = nullptr;
+  RARegsStats _blockRegStats {};
+  uint32_t _exitLabelId = Globals::kInvalidId;
+  ZoneVector<uint32_t> _sharedAssignmentsMap {};
+
+  // Only used by logging, it's fine to be here to prevent more #ifdefs...
+  bool _hasCode = false;
+  RABlock* _lastLoggedBlock = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  Logger* _logger = nullptr;
+  FormatOptions _formatOptions {};
+  StringTmp<512> _sb;
+#endif
+
+  //! \}
+
+  inline RACFGBuilderT(BaseRAPass* pass) noexcept
+    : _pass(pass),
+      _cc(pass->cc()) {
+#ifndef ASMJIT_NO_LOGGING
+    _logger = _pass->hasDiagnosticOption(DiagnosticOptions::kRADebugCFG) ? _pass->logger() : nullptr;
+    if (_logger)
+      _formatOptions = _logger->options();
+#endif
+  }
+
+  inline BaseCompiler* cc() const noexcept { return _cc; }
+
+  //! \name Run
+  //! \{
+
+  //! Called per function by an architecture-specific CFG builder.
+  Error run() noexcept {
+    log("[BuildCFG]\n");
+    ASMJIT_PROPAGATE(prepare());
+
+    logNode(_funcNode, kRootIndentation);
+    logBlock(_curBlock, kRootIndentation);
+
+    RABlock* entryBlock = _curBlock;
+    BaseNode* node = _funcNode->next();
+    if (ASMJIT_UNLIKELY(!node))
+      return DebugUtils::errored(kErrorInvalidState);
+
+    _curBlock->setFirst(_funcNode);
+    _curBlock->setLast(_funcNode);
+
+    RAInstBuilder ib;
+    ZoneVector<RABlock*> blocksWithUnknownJumps;
+
+    for (;;) {
+      BaseNode* next = node->next();
+      ASMJIT_ASSERT(node->position() == 0 || node->position() == kNodePositionDidOnBefore);
+
+      if (node->isInst()) {
+        // Instruction | Jump | Invoke | Return
+        // ------------------------------------
+
+        // Handle `InstNode`, `InvokeNode`, and `FuncRetNode`. All of them share the same interface that provides
+        // operands that have read/write semantics.
+        if (ASMJIT_UNLIKELY(!_curBlock)) {
+          // Unreachable code has to be removed, we cannot allocate registers in such code as we cannot do proper
+          // liveness analysis in such case.
+          removeNode(node);
+          node = next;
+          continue;
+        }
+
+        _hasCode = true;
+
+        if (node->isInvoke() || node->isFuncRet()) {
+          if (node->position() != kNodePositionDidOnBefore) {
+            // Call and Reg are complicated as they may insert some surrounding code around them. The simplest
+            // approach is to get the previous node, call the `onBefore()` handlers and then check whether
+            // anything changed and restart if so. By restart we mean that the current `node` would go back to
+            // the first possible inserted node by `onBeforeInvoke()` or `onBeforeRet()`.
+            BaseNode* prev = node->prev();
+
+            if (node->type() == NodeType::kInvoke)
+              ASMJIT_PROPAGATE(static_cast<This*>(this)->onBeforeInvoke(node->as<InvokeNode>()));
+            else
+              ASMJIT_PROPAGATE(static_cast<This*>(this)->onBeforeRet(node->as<FuncRetNode>()));
+
+            if (prev != node->prev()) {
+              // If this was the first node in the block and something was
+              // inserted before it then we have to update the first block.
+              if (_curBlock->first() == node)
+                _curBlock->setFirst(prev->next());
+
+              node->setPosition(kNodePositionDidOnBefore);
+              node = prev->next();
+
+              // `onBeforeInvoke()` and `onBeforeRet()` can only insert instructions.
+              ASMJIT_ASSERT(node->isInst());
+            }
+
+            // Necessary if something was inserted after `node`, but nothing before.
+            next = node->next();
+          }
+          else {
+            // Change the position back to its original value.
+            node->setPosition(0);
+          }
+        }
+
+        InstNode* inst = node->as<InstNode>();
+        logNode(inst, kCodeIndentation);
+
+        InstControlFlow cf = InstControlFlow::kRegular;
+        ib.reset();
+        ASMJIT_PROPAGATE(static_cast<This*>(this)->onInst(inst, cf, ib));
+
+        if (node->isInvoke()) {
+          ASMJIT_PROPAGATE(static_cast<This*>(this)->onInvoke(inst->as<InvokeNode>(), ib));
+        }
+
+        if (node->isFuncRet()) {
+          ASMJIT_PROPAGATE(static_cast<This*>(this)->onRet(inst->as<FuncRetNode>(), ib));
+          cf = InstControlFlow::kReturn;
+        }
+
+        if (cf == InstControlFlow::kJump) {
+          uint32_t fixedRegCount = 0;
+          for (RATiedReg& tiedReg : ib) {
+            RAWorkReg* workReg = _pass->workRegById(tiedReg.workId());
+            if (workReg->group() == RegGroup::kGp) {
+              uint32_t useId = tiedReg.useId();
+              if (useId == BaseReg::kIdBad) {
+                useId = _pass->_scratchRegIndexes[fixedRegCount++];
+                tiedReg.setUseId(useId);
+              }
+              _curBlock->addExitScratchGpRegs(Support::bitMask(useId));
+            }
+          }
+        }
+
+        ASMJIT_PROPAGATE(_pass->assignRAInst(inst, _curBlock, ib));
+        _blockRegStats.combineWith(ib._stats);
+
+        if (cf != InstControlFlow::kRegular) {
+          // Support for conditional and unconditional jumps.
+          if (cf == InstControlFlow::kJump || cf == InstControlFlow::kBranch) {
+            _curBlock->setLast(node);
+            _curBlock->addFlags(RABlockFlags::kHasTerminator);
+            _curBlock->makeConstructed(_blockRegStats);
+
+            if (!inst->hasOption(InstOptions::kUnfollow)) {
+              // Jmp/Jcc/Call/Loop/etc...
+              uint32_t opCount = inst->opCount();
+              const Operand* opArray = inst->operands();
+
+              // Cannot jump anywhere without operands.
+              if (ASMJIT_UNLIKELY(!opCount))
+                return DebugUtils::errored(kErrorInvalidState);
+
+              if (opArray[opCount - 1].isLabel()) {
+                // Labels are easy for constructing the control flow.
+                LabelNode* labelNode;
+                ASMJIT_PROPAGATE(cc()->labelNodeOf(&labelNode, opArray[opCount - 1].as<Label>()));
+
+                RABlock* targetBlock = _pass->newBlockOrExistingAt(labelNode);
+                if (ASMJIT_UNLIKELY(!targetBlock))
+                  return DebugUtils::errored(kErrorOutOfMemory);
+
+                targetBlock->makeTargetable();
+                ASMJIT_PROPAGATE(_curBlock->appendSuccessor(targetBlock));
+              }
+              else {
+                // Not a label - could be jump with reg/mem operand, which means that it can go anywhere. Such jumps
+                // must either be annotated so the CFG can be properly constructed, otherwise we assume the worst case
+                // - can jump to any basic block.
+                JumpAnnotation* jumpAnnotation = nullptr;
+                _curBlock->addFlags(RABlockFlags::kHasJumpTable);
+
+                if (inst->type() == NodeType::kJump)
+                  jumpAnnotation = inst->as<JumpNode>()->annotation();
+
+                if (jumpAnnotation) {
+                  uint64_t timestamp = _pass->nextTimestamp();
+                  for (uint32_t id : jumpAnnotation->labelIds()) {
+                    LabelNode* labelNode;
+                    ASMJIT_PROPAGATE(cc()->labelNodeOf(&labelNode, id));
+
+                    RABlock* targetBlock = _pass->newBlockOrExistingAt(labelNode);
+                    if (ASMJIT_UNLIKELY(!targetBlock))
+                      return DebugUtils::errored(kErrorOutOfMemory);
+
+                    // Prevents adding basic-block successors multiple times.
+                    if (!targetBlock->hasTimestamp(timestamp)) {
+                      targetBlock->setTimestamp(timestamp);
+                      targetBlock->makeTargetable();
+                      ASMJIT_PROPAGATE(_curBlock->appendSuccessor(targetBlock));
+                    }
+                  }
+                  ASMJIT_PROPAGATE(shareAssignmentAcrossSuccessors(_curBlock));
+                }
+                else {
+                  ASMJIT_PROPAGATE(blocksWithUnknownJumps.append(_pass->allocator(), _curBlock));
+                }
+              }
+            }
+
+            if (cf == InstControlFlow::kJump) {
+              // Unconditional jump makes the code after the jump unreachable, which will be removed instantly during
+              // the CFG construction; as we cannot allocate registers for instructions that are not part of any block.
+              // Of course we can leave these instructions as they are, however, that would only postpone the problem
+              // as assemblers can't encode instructions that use virtual registers.
+              _curBlock = nullptr;
+            }
+            else {
+              node = next;
+              if (ASMJIT_UNLIKELY(!node))
+                return DebugUtils::errored(kErrorInvalidState);
+
+              RABlock* consecutiveBlock;
+              if (node->type() == NodeType::kLabel) {
+                if (node->hasPassData()) {
+                  consecutiveBlock = node->passData<RABlock>();
+                }
+                else {
+                  consecutiveBlock = _pass->newBlock(node);
+                  if (ASMJIT_UNLIKELY(!consecutiveBlock))
+                    return DebugUtils::errored(kErrorOutOfMemory);
+                  node->setPassData<RABlock>(consecutiveBlock);
+                }
+              }
+              else {
+                consecutiveBlock = _pass->newBlock(node);
+                if (ASMJIT_UNLIKELY(!consecutiveBlock))
+                  return DebugUtils::errored(kErrorOutOfMemory);
+              }
+
+              _curBlock->addFlags(RABlockFlags::kHasConsecutive);
+              ASMJIT_PROPAGATE(_curBlock->prependSuccessor(consecutiveBlock));
+
+              _curBlock = consecutiveBlock;
+              _hasCode = false;
+              _blockRegStats.reset();
+
+              if (_curBlock->isConstructed())
+                break;
+              ASMJIT_PROPAGATE(_pass->addBlock(consecutiveBlock));
+
+              logBlock(_curBlock, kRootIndentation);
+              continue;
+            }
+          }
+
+          if (cf == InstControlFlow::kReturn) {
+            _curBlock->setLast(node);
+            _curBlock->makeConstructed(_blockRegStats);
+            ASMJIT_PROPAGATE(_curBlock->appendSuccessor(_retBlock));
+
+            _curBlock = nullptr;
+          }
+        }
+      }
+      else if (node->type() == NodeType::kLabel) {
+        // Label - Basic-Block Management
+        // ------------------------------
+
+        if (!_curBlock) {
+          // If the current code is unreachable the label makes it reachable again. We may remove the whole block in
+          // the future if it's not referenced though.
+          _curBlock = node->passData<RABlock>();
+
+          if (_curBlock) {
+            // If the label has a block assigned we can either continue with it or skip it if the block has been
+            // constructed already.
+            if (_curBlock->isConstructed())
+              break;
+          }
+          else {
+            // No block assigned - create a new one and assign it.
+            _curBlock = _pass->newBlock(node);
+            if (ASMJIT_UNLIKELY(!_curBlock))
+              return DebugUtils::errored(kErrorOutOfMemory);
+            node->setPassData<RABlock>(_curBlock);
+          }
+
+          _curBlock->makeTargetable();
+          _hasCode = false;
+          _blockRegStats.reset();
+          ASMJIT_PROPAGATE(_pass->addBlock(_curBlock));
+        }
+        else {
+          if (node->hasPassData()) {
+            RABlock* consecutive = node->passData<RABlock>();
+            consecutive->makeTargetable();
+
+            if (_curBlock == consecutive) {
+              // The label currently processed is part of the current block. This is only possible for multiple labels
+              // that are right next to each other or labels that are separated by non-code nodes like directives and
+              // comments.
+              if (ASMJIT_UNLIKELY(_hasCode))
+                return DebugUtils::errored(kErrorInvalidState);
+            }
+            else {
+              // Label makes the current block constructed. There is a chance that the Label is not used, but we don't
+              // know that at this point. In the worst case there would be two blocks next to each other, it's just fine.
+              ASMJIT_ASSERT(_curBlock->last() != node);
+              _curBlock->setLast(node->prev());
+              _curBlock->addFlags(RABlockFlags::kHasConsecutive);
+              _curBlock->makeConstructed(_blockRegStats);
+
+              ASMJIT_PROPAGATE(_curBlock->appendSuccessor(consecutive));
+              ASMJIT_PROPAGATE(_pass->addBlock(consecutive));
+
+              _curBlock = consecutive;
+              _hasCode = false;
+              _blockRegStats.reset();
+            }
+          }
+          else {
+            // First time we see this label.
+            if (_hasCode || _curBlock == entryBlock) {
+              // Cannot continue the current block if it already contains some code or it's a block entry. We need to
+              // create a new block and make it a successor.
+              ASMJIT_ASSERT(_curBlock->last() != node);
+              _curBlock->setLast(node->prev());
+              _curBlock->addFlags(RABlockFlags::kHasConsecutive);
+              _curBlock->makeConstructed(_blockRegStats);
+
+              RABlock* consecutive = _pass->newBlock(node);
+              if (ASMJIT_UNLIKELY(!consecutive))
+                return DebugUtils::errored(kErrorOutOfMemory);
+              consecutive->makeTargetable();
+
+              ASMJIT_PROPAGATE(_curBlock->appendSuccessor(consecutive));
+              ASMJIT_PROPAGATE(_pass->addBlock(consecutive));
+
+              _curBlock = consecutive;
+              _hasCode = false;
+              _blockRegStats.reset();
+            }
+
+            node->setPassData<RABlock>(_curBlock);
+          }
+        }
+
+        if (_curBlock && _curBlock != _lastLoggedBlock)
+          logBlock(_curBlock, kRootIndentation);
+        logNode(node, kRootIndentation);
+
+        // Unlikely: Assume that the exit label is reached only once per function.
+        if (ASMJIT_UNLIKELY(node->as<LabelNode>()->labelId() == _exitLabelId)) {
+          _curBlock->setLast(node);
+          _curBlock->makeConstructed(_blockRegStats);
+          ASMJIT_PROPAGATE(_pass->addExitBlock(_curBlock));
+
+          _curBlock = nullptr;
+        }
+      }
+      else {
+        // Other Nodes | Function Exit
+        // ---------------------------
+
+        logNode(node, kCodeIndentation);
+
+        if (node->type() == NodeType::kSentinel) {
+          if (node == _funcNode->endNode()) {
+            // Make sure we didn't flow here if this is the end of the function sentinel.
+            if (ASMJIT_UNLIKELY(_curBlock))
+              return DebugUtils::errored(kErrorInvalidState);
+            break;
+          }
+        }
+        else if (node->type() == NodeType::kFunc) {
+          // RAPass can only compile a single function at a time. If we
+          // encountered a function it must be the current one, bail if not.
+          if (ASMJIT_UNLIKELY(node != _funcNode))
+            return DebugUtils::errored(kErrorInvalidState);
+          // PASS if this is the first node.
+        }
+        else {
+          // PASS if this is a non-interesting or unknown node.
+        }
+      }
+
+      // Advance to the next node.
+      node = next;
+
+      // NOTE: We cannot encounter a NULL node, because every function must be terminated by a sentinel (`stop`)
+      // node. If we encountered a NULL node it means that something went wrong and this node list is corrupted;
+      // bail in such case.
+      if (ASMJIT_UNLIKELY(!node))
+        return DebugUtils::errored(kErrorInvalidState);
+    }
+
+    if (_pass->hasDanglingBlocks())
+      return DebugUtils::errored(kErrorInvalidState);
+
+    for (RABlock* block : blocksWithUnknownJumps)
+      handleBlockWithUnknownJump(block);
+
+    return _pass->initSharedAssignments(_sharedAssignmentsMap);
+  }
+
+  //! \}
+
+  //! \name Prepare
+  //! \{
+
+  //! Prepares the CFG builder of the current function.
+  Error prepare() noexcept {
+    FuncNode* func = _pass->func();
+    BaseNode* node = nullptr;
+
+    // Create entry and exit blocks.
+    _funcNode = func;
+    _retBlock = _pass->newBlockOrExistingAt(func->exitNode(), &node);
+
+    if (ASMJIT_UNLIKELY(!_retBlock))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    _retBlock->makeTargetable();
+    ASMJIT_PROPAGATE(_pass->addExitBlock(_retBlock));
+
+    if (node != func) {
+      _curBlock = _pass->newBlock();
+      if (ASMJIT_UNLIKELY(!_curBlock))
+        return DebugUtils::errored(kErrorOutOfMemory);
+    }
+    else {
+      // Function that has no code at all.
+      _curBlock = _retBlock;
+    }
+
+    // Reset everything we may need.
+    _blockRegStats.reset();
+    _exitLabelId = func->exitNode()->labelId();
+
+    // Initially we assume there is no code in the function body.
+    _hasCode = false;
+
+    return _pass->addBlock(_curBlock);
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Called when a `node` is removed, e.g. because of a dead code elimination.
+  void removeNode(BaseNode* node) noexcept {
+    logNode(node, kRootIndentation, "<Removed>");
+    cc()->removeNode(node);
+  }
+
+  //! Handles block with unknown jump, which could be a jump to a jump table.
+  //!
+  //! If we encounter such block we basically insert all existing blocks as successors except the function entry
+  //! block and a natural successor, if such block exists.
+  Error handleBlockWithUnknownJump(RABlock* block) noexcept {
+    RABlocks& blocks = _pass->blocks();
+    size_t blockCount = blocks.size();
+
+    // NOTE: Iterate from `1` as the first block is the entry block, we don't
+    // allow the entry to be a successor of any block.
+    RABlock* consecutive = block->consecutive();
+    for (size_t i = 1; i < blockCount; i++) {
+      RABlock* candidate = blocks[i];
+      if (candidate == consecutive || !candidate->isTargetable())
+        continue;
+      block->appendSuccessor(candidate);
+    }
+
+    return shareAssignmentAcrossSuccessors(block);
+  }
+
+  Error shareAssignmentAcrossSuccessors(RABlock* block) noexcept {
+    if (block->successors().size() <= 1)
+      return kErrorOk;
+
+    RABlock* consecutive = block->consecutive();
+    uint32_t sharedAssignmentId = Globals::kInvalidId;
+
+    for (RABlock* successor : block->successors()) {
+      if (successor == consecutive)
+        continue;
+
+      if (successor->hasSharedAssignmentId()) {
+        if (sharedAssignmentId == Globals::kInvalidId)
+          sharedAssignmentId = successor->sharedAssignmentId();
+        else
+          _sharedAssignmentsMap[successor->sharedAssignmentId()] = sharedAssignmentId;
+      }
+      else {
+        if (sharedAssignmentId == Globals::kInvalidId)
+          ASMJIT_PROPAGATE(newSharedAssignmentId(&sharedAssignmentId));
+        successor->setSharedAssignmentId(sharedAssignmentId);
+      }
+    }
+    return kErrorOk;
+  }
+
+  Error newSharedAssignmentId(uint32_t* out) noexcept {
+    uint32_t id = _sharedAssignmentsMap.size();
+    ASMJIT_PROPAGATE(_sharedAssignmentsMap.append(_pass->allocator(), id));
+
+    *out = id;
+    return kErrorOk;
+  }
+
+  //! \}
+
+  //! \name Logging
+  //! \{
+
+#ifndef ASMJIT_NO_LOGGING
+  template<typename... Args>
+  inline void log(const char* fmt, Args&&... args) noexcept {
+    if (_logger)
+      _logger->logf(fmt, std::forward<Args>(args)...);
+  }
+
+  inline void logBlock(RABlock* block, uint32_t indentation = 0) noexcept {
+    if (_logger)
+      _logBlock(block, indentation);
+  }
+
+  inline void logNode(BaseNode* node, uint32_t indentation = 0, const char* action = nullptr) noexcept {
+    if (_logger)
+      _logNode(node, indentation, action);
+  }
+
+  void _logBlock(RABlock* block, uint32_t indentation) noexcept {
+    _sb.clear();
+    _sb.appendChars(' ', indentation);
+    _sb.appendFormat("{#%u}\n", block->blockId());
+    _logger->log(_sb);
+    _lastLoggedBlock = block;
+  }
+
+  void _logNode(BaseNode* node, uint32_t indentation, const char* action) noexcept {
+    _sb.clear();
+    _sb.appendChars(' ', indentation);
+    if (action) {
+      _sb.append(action);
+      _sb.append(' ');
+    }
+    Formatter::formatNode(_sb, _formatOptions, cc(), node);
+    _sb.append('\n');
+    _logger->log(_sb);
+  }
+#else
+  template<typename... Args>
+  inline void log(const char* fmt, Args&&... args) noexcept {
+    DebugUtils::unused(fmt);
+    DebugUtils::unused(std::forward<Args>(args)...);
+  }
+
+  inline void logBlock(RABlock* block, uint32_t indentation = 0) noexcept {
+    DebugUtils::unused(block, indentation);
+  }
+
+  inline void logNode(BaseNode* node, uint32_t indentation = 0, const char* action = nullptr) noexcept {
+    DebugUtils::unused(node, indentation, action);
+  }
+#endif
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_CORE_RABUILDERS_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/radefs_p.h b/lib/lepton/asmjit/core/radefs_p.h
new file mode 100644
index 0000000000..15c50ff728
--- /dev/null
+++ b/lib/lepton/asmjit/core/radefs_p.h
@@ -0,0 +1,1204 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_RADEFS_P_H_INCLUDED
+#define ASMJIT_CORE_RADEFS_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#include "../core/archtraits.h"
+#include "../core/compilerdefs.h"
+#include "../core/logger.h"
+#include "../core/operand.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../core/zone.h"
+#include "../core/zonevector.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_ra
+//! \{
+
+#ifndef ASMJIT_NO_LOGGING
+# define ASMJIT_RA_LOG_FORMAT(...)  \
+  do {                              \
+    if (logger)                     \
+      logger->logf(__VA_ARGS__);    \
+  } while (0)
+# define ASMJIT_RA_LOG_COMPLEX(...) \
+  do {                              \
+    if (logger) {                   \
+      __VA_ARGS__                   \
+    }                               \
+  } while (0)
+#else
+# define ASMJIT_RA_LOG_FORMAT(...) ((void)0)
+# define ASMJIT_RA_LOG_COMPLEX(...) ((void)0)
+#endif
+
+class BaseRAPass;
+class RABlock;
+class BaseNode;
+struct RAStackSlot;
+
+typedef ZoneVector<RABlock*> RABlocks;
+typedef ZoneVector<RAWorkReg*> RAWorkRegs;
+
+//! Maximum number of consecutive registers aggregated from all supported backends.
+static constexpr uint32_t kMaxConsecutiveRegs = 4;
+
+//! Provides architecture constraints used by register allocator.
+class RAConstraints {
+public:
+  //! \name Members
+  //! \{
+
+  Support::Array<RegMask, Globals::kNumVirtGroups> _availableRegs {};
+
+  //! \}
+
+  ASMJIT_NOINLINE Error init(Arch arch) noexcept {
+    switch (arch) {
+      case Arch::kX86:
+      case Arch::kX64: {
+        uint32_t registerCount = arch == Arch::kX86 ? 8 : 16;
+        _availableRegs[RegGroup::kGp] = Support::lsbMask<RegMask>(registerCount) & ~Support::bitMask(4u);
+        _availableRegs[RegGroup::kVec] = Support::lsbMask<RegMask>(registerCount);
+        _availableRegs[RegGroup::kExtraVirt2] = Support::lsbMask<RegMask>(8);
+        _availableRegs[RegGroup::kExtraVirt3] = Support::lsbMask<RegMask>(8);
+        return kErrorOk;
+      }
+
+      case Arch::kAArch64: {
+        _availableRegs[RegGroup::kGp] = 0xFFFFFFFFu & ~Support::bitMask(18, 31u);
+        _availableRegs[RegGroup::kVec] = 0xFFFFFFFFu;
+        _availableRegs[RegGroup::kExtraVirt2] = 0;
+        _availableRegs[RegGroup::kExtraVirt3] = 0;
+        return kErrorOk;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidArch);
+    }
+  }
+
+  inline RegMask availableRegs(RegGroup group) const noexcept { return _availableRegs[group]; }
+};
+
+enum class RAStrategyType : uint8_t {
+  kSimple  = 0,
+  kComplex = 1
+};
+ASMJIT_DEFINE_ENUM_COMPARE(RAStrategyType)
+
+enum class RAStrategyFlags : uint8_t {
+  kNone = 0
+};
+ASMJIT_DEFINE_ENUM_FLAGS(RAStrategyFlags)
+
+//! Register allocation strategy.
+//!
+//! The idea is to select the best register allocation strategy for each virtual register group based on the
+//! complexity of the code.
+struct RAStrategy {
+  //! \name Members
+  //! \{
+
+  RAStrategyType _type = RAStrategyType::kSimple;
+  RAStrategyFlags _flags = RAStrategyFlags::kNone;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline void reset() noexcept {
+    _type = RAStrategyType::kSimple;
+    _flags = RAStrategyFlags::kNone;
+  }
+
+  inline RAStrategyType type() const noexcept { return _type; }
+  inline void setType(RAStrategyType type) noexcept { _type = type; }
+
+  inline bool isSimple() const noexcept { return _type == RAStrategyType::kSimple; }
+  inline bool isComplex() const noexcept { return _type >= RAStrategyType::kComplex; }
+
+  inline RAStrategyFlags flags() const noexcept { return _flags; }
+  inline bool hasFlag(RAStrategyFlags flag) const noexcept { return Support::test(_flags, flag); }
+  inline void addFlags(RAStrategyFlags flags) noexcept { _flags |= flags; }
+
+  //! \}
+};
+
+//! Count of virtual or physical registers per group.
+//!
+//! \note This class uses 8-bit integers to represent counters, it's only used in places where this is sufficient,
+//! for example total count of machine's physical registers, count of virtual registers per instruction, etc...
+//! There is also `RALiveCount`, which uses 32-bit integers and is indeed much safer.
+struct RARegCount {
+  //! \name Members
+  //! \{
+
+  union {
+    uint8_t _regs[4];
+    uint32_t _packed;
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Resets all counters to zero.
+  inline void reset() noexcept { _packed = 0; }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline uint8_t& operator[](RegGroup group) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _regs[size_t(group)];
+  }
+
+  inline const uint8_t& operator[](RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    return _regs[size_t(group)];
+  }
+
+  inline bool operator==(const RARegCount& other) const noexcept { return _packed == other._packed; }
+  inline bool operator!=(const RARegCount& other) const noexcept { return _packed != other._packed; }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the count of registers by the given register `group`.
+  inline uint32_t get(RegGroup group) const noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+
+    uint32_t shift = Support::byteShiftOfDWordStruct(uint32_t(group));
+    return (_packed >> shift) & uint32_t(0xFF);
+  }
+
+  //! Sets the register count by a register `group`.
+  inline void set(RegGroup group, uint32_t n) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    ASMJIT_ASSERT(n <= 0xFF);
+
+    uint32_t shift = Support::byteShiftOfDWordStruct(uint32_t(group));
+    _packed = (_packed & ~uint32_t(0xFF << shift)) + (n << shift);
+  }
+
+  //! Adds the register count by a register `group`.
+  inline void add(RegGroup group, uint32_t n = 1) noexcept {
+    ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+    ASMJIT_ASSERT(0xFF - uint32_t(_regs[size_t(group)]) >= n);
+
+    uint32_t shift = Support::byteShiftOfDWordStruct(uint32_t(group));
+    _packed += n << shift;
+  }
+
+  //! \}
+};
+
+//! Provides mapping that can be used to fast index architecture register groups.
+struct RARegIndex : public RARegCount {
+  //! Build register indexes based on the given `count` of registers.
+  ASMJIT_FORCE_INLINE void buildIndexes(const RARegCount& count) noexcept {
+    uint32_t x = uint32_t(count._regs[0]);
+    uint32_t y = uint32_t(count._regs[1]) + x;
+    uint32_t z = uint32_t(count._regs[2]) + y;
+
+    ASMJIT_ASSERT(y <= 0xFF);
+    ASMJIT_ASSERT(z <= 0xFF);
+    _packed = Support::bytepack32_4x8(0, x, y, z);
+  }
+};
+
+//! Registers mask.
+struct RARegMask {
+  //! \name Members
+  //! \{
+
+  Support::Array<RegMask, Globals::kNumVirtGroups> _masks;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline void init(const RARegMask& other) noexcept { _masks = other._masks; }
+  //! Reset all register masks to zero.
+  inline void reset() noexcept { _masks.fill(0); }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline bool operator==(const RARegMask& other) const noexcept { return _masks == other._masks; }
+  inline bool operator!=(const RARegMask& other) const noexcept { return _masks != other._masks; }
+
+  template<typename Index>
+  inline uint32_t& operator[](const Index& index) noexcept { return _masks[index]; }
+
+  template<typename Index>
+  inline const uint32_t& operator[](const Index& index) const noexcept { return _masks[index]; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Tests whether all register masks are zero (empty).
+  inline bool empty() const noexcept {
+    return _masks.aggregate<Support::Or>() == 0;
+  }
+
+  inline bool has(RegGroup group, RegMask mask = 0xFFFFFFFFu) const noexcept {
+    return (_masks[group] & mask) != 0;
+  }
+
+  template<class Operator>
+  inline void op(const RARegMask& other) noexcept {
+    _masks.combine<Operator>(other._masks);
+  }
+
+  template<class Operator>
+  inline void op(RegGroup group, RegMask mask) noexcept {
+    _masks[group] = Operator::op(_masks[group], mask);
+  }
+
+  inline void clear(RegGroup group, RegMask mask) noexcept {
+    _masks[group] = _masks[group] & ~mask;
+  }
+
+  //! \}
+};
+
+//! Information associated with each instruction, propagated to blocks, loops, and the whole function. This
+//! information can be used to do minor decisions before the register allocator tries to do its job. For
+//! example to use fast register allocation inside a block or loop it cannot have clobbered and/or fixed
+//! registers, etc...
+class RARegsStats {
+public:
+  //! \name Constants
+  //! \{
+
+  enum Index : uint32_t {
+    kIndexUsed       = 0,
+    kIndexFixed      = 8,
+    kIndexClobbered  = 16
+  };
+
+  enum Mask : uint32_t {
+    kMaskUsed        = 0xFFu << kIndexUsed,
+    kMaskFixed       = 0xFFu << kIndexFixed,
+    kMaskClobbered   = 0xFFu << kIndexClobbered
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  uint32_t _packed = 0;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline void reset() noexcept { _packed = 0; }
+  inline void combineWith(const RARegsStats& other) noexcept { _packed |= other._packed; }
+
+  inline bool hasUsed() const noexcept { return (_packed & kMaskUsed) != 0u; }
+  inline bool hasUsed(RegGroup group) const noexcept { return (_packed & Support::bitMask(kIndexUsed + uint32_t(group))) != 0u; }
+  inline void makeUsed(RegGroup group) noexcept { _packed |= Support::bitMask(kIndexUsed + uint32_t(group)); }
+
+  inline bool hasFixed() const noexcept { return (_packed & kMaskFixed) != 0u; }
+  inline bool hasFixed(RegGroup group) const noexcept { return (_packed & Support::bitMask(kIndexFixed + uint32_t(group))) != 0u; }
+  inline void makeFixed(RegGroup group) noexcept { _packed |= Support::bitMask(kIndexFixed + uint32_t(group)); }
+
+  inline bool hasClobbered() const noexcept { return (_packed & kMaskClobbered) != 0u; }
+  inline bool hasClobbered(RegGroup group) const noexcept { return (_packed & Support::bitMask(kIndexClobbered + uint32_t(group))) != 0u; }
+  inline void makeClobbered(RegGroup group) noexcept { _packed |= Support::bitMask(kIndexClobbered + uint32_t(group)); }
+
+  //! \}
+};
+
+//! Count of live registers, per group.
+class RALiveCount {
+public:
+  //! \name Members
+  //! \{
+
+  Support::Array<uint32_t, Globals::kNumVirtGroups> n {};
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RALiveCount() noexcept = default;
+  inline RALiveCount(const RALiveCount& other) noexcept = default;
+
+  inline void init(const RALiveCount& other) noexcept { n = other.n; }
+  inline void reset() noexcept { n.fill(0); }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline RALiveCount& operator=(const RALiveCount& other) noexcept = default;
+
+  inline uint32_t& operator[](RegGroup group) noexcept { return n[group]; }
+  inline const uint32_t& operator[](RegGroup group) const noexcept { return n[group]; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  template<class Operator>
+  inline void op(const RALiveCount& other) noexcept { n.combine<Operator>(other.n); }
+
+  //! \}
+};
+
+struct RALiveInterval {
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    kNaN = 0,
+    kInf = 0xFFFFFFFFu
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  uint32_t a, b;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RALiveInterval() noexcept : a(0), b(0) {}
+  inline RALiveInterval(uint32_t a, uint32_t b) noexcept : a(a), b(b) {}
+  inline RALiveInterval(const RALiveInterval& other) noexcept : a(other.a), b(other.b) {}
+
+  inline void init(uint32_t aVal, uint32_t bVal) noexcept {
+    a = aVal;
+    b = bVal;
+  }
+  inline void init(const RALiveInterval& other) noexcept { init(other.a, other.b); }
+  inline void reset() noexcept { init(0, 0); }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline RALiveInterval& operator=(const RALiveInterval& other) = default;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool isValid() const noexcept { return a < b; }
+  inline uint32_t width() const noexcept { return b - a; }
+
+  //! \}
+};
+
+//! Live span with payload of type `T`.
+template<typename T>
+class RALiveSpan : public RALiveInterval, public T {
+public:
+  //! \name Types
+  //! \{
+
+  typedef T DataType;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RALiveSpan() noexcept : RALiveInterval(), T() {}
+  inline RALiveSpan(const RALiveSpan<T>& other) noexcept : RALiveInterval(other), T() {}
+  inline RALiveSpan(const RALiveInterval& interval, const T& data) noexcept : RALiveInterval(interval), T(data) {}
+  inline RALiveSpan(uint32_t a, uint32_t b) noexcept : RALiveInterval(a, b), T() {}
+  inline RALiveSpan(uint32_t a, uint32_t b, const T& data) noexcept : RALiveInterval(a, b), T(data) {}
+
+  inline void init(const RALiveSpan<T>& other) noexcept {
+    RALiveInterval::init(static_cast<const RALiveInterval&>(other));
+    T::init(static_cast<const T&>(other));
+  }
+
+  inline void init(const RALiveSpan<T>& span, const T& data) noexcept {
+    RALiveInterval::init(static_cast<const RALiveInterval&>(span));
+    T::init(data);
+  }
+
+  inline void init(const RALiveInterval& interval, const T& data) noexcept {
+    RALiveInterval::init(interval);
+    T::init(data);
+  }
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline RALiveSpan& operator=(const RALiveSpan& other) {
+    init(other);
+    return *this;
+  }
+
+  //! \}
+};
+
+//! Vector of `RALiveSpan<T>` with additional convenience API.
+template<typename T>
+class RALiveSpans {
+public:
+  ASMJIT_NONCOPYABLE(RALiveSpans)
+
+  typedef typename T::DataType DataType;
+  ZoneVector<T> _data;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RALiveSpans() noexcept : _data() {}
+
+  inline void reset() noexcept { _data.reset(); }
+  inline void release(ZoneAllocator* allocator) noexcept { _data.release(allocator); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool empty() const noexcept { return _data.empty(); }
+  inline uint32_t size() const noexcept { return _data.size(); }
+
+  inline T* data() noexcept { return _data.data(); }
+  inline const T* data() const noexcept { return _data.data(); }
+
+  inline bool isOpen() const noexcept {
+    uint32_t size = _data.size();
+    return size > 0 && _data[size - 1].b == RALiveInterval::kInf;
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(RALiveSpans<T>& other) noexcept { _data.swap(other._data); }
+
+  //! Open the current live span.
+  ASMJIT_FORCE_INLINE Error openAt(ZoneAllocator* allocator, uint32_t start, uint32_t end) noexcept {
+    bool wasOpen;
+    return openAt(allocator, start, end, wasOpen);
+  }
+
+  ASMJIT_FORCE_INLINE Error openAt(ZoneAllocator* allocator, uint32_t start, uint32_t end, bool& wasOpen) noexcept {
+    uint32_t size = _data.size();
+    wasOpen = false;
+
+    if (size > 0) {
+      T& last = _data[size - 1];
+      if (last.b >= start) {
+        wasOpen = last.b > start;
+        last.b = end;
+        return kErrorOk;
+      }
+    }
+
+    return _data.append(allocator, T(start, end));
+  }
+
+  ASMJIT_FORCE_INLINE void closeAt(uint32_t end) noexcept {
+    ASMJIT_ASSERT(!empty());
+
+    uint32_t size = _data.size();
+    _data[size - 1].b = end;
+  }
+
+  //! Returns the sum of width of all spans.
+  //!
+  //! \note Don't overuse, this iterates over all spans so it's O(N). It should be only called once and then cached.
+  inline uint32_t width() const noexcept {
+    uint32_t width = 0;
+    for (const T& span : _data)
+      width += span.width();
+    return width;
+  }
+
+  inline T& operator[](uint32_t index) noexcept { return _data[index]; }
+  inline const T& operator[](uint32_t index) const noexcept { return _data[index]; }
+
+  inline bool intersects(const RALiveSpans<T>& other) const noexcept {
+    return intersects(*this, other);
+  }
+
+  ASMJIT_FORCE_INLINE Error nonOverlappingUnionOf(ZoneAllocator* allocator, const RALiveSpans<T>& x, const RALiveSpans<T>& y, const DataType& yData) noexcept {
+    uint32_t finalSize = x.size() + y.size();
+    ASMJIT_PROPAGATE(_data.reserve(allocator, finalSize));
+
+    T* dstPtr = _data.data();
+    const T* xSpan = x.data();
+    const T* ySpan = y.data();
+
+    const T* xEnd = xSpan + x.size();
+    const T* yEnd = ySpan + y.size();
+
+    // Loop until we have intersection or either `xSpan == xEnd` or `ySpan == yEnd`, which means that there is no
+    // intersection. We advance either `xSpan` or `ySpan` depending on their ranges.
+    if (xSpan != xEnd && ySpan != yEnd) {
+      uint32_t xa, ya;
+      xa = xSpan->a;
+      for (;;) {
+        while (ySpan->b <= xa) {
+          dstPtr->init(*ySpan, yData);
+          dstPtr++;
+          if (++ySpan == yEnd)
+            goto Done;
+        }
+
+        ya = ySpan->a;
+        while (xSpan->b <= ya) {
+          *dstPtr++ = *xSpan;
+          if (++xSpan == xEnd)
+            goto Done;
+        }
+
+        // We know that `xSpan->b > ySpan->a`, so check if `ySpan->b > xSpan->a`.
+        xa = xSpan->a;
+        if (ySpan->b > xa)
+          return 0xFFFFFFFFu;
+      }
+    }
+
+  Done:
+    while (xSpan != xEnd) {
+      *dstPtr++ = *xSpan++;
+    }
+
+    while (ySpan != yEnd) {
+      dstPtr->init(*ySpan, yData);
+      dstPtr++;
+      ySpan++;
+    }
+
+    _data._setEndPtr(dstPtr);
+    return kErrorOk;
+  }
+
+  static ASMJIT_FORCE_INLINE bool intersects(const RALiveSpans<T>& x, const RALiveSpans<T>& y) noexcept {
+    const T* xSpan = x.data();
+    const T* ySpan = y.data();
+
+    const T* xEnd = xSpan + x.size();
+    const T* yEnd = ySpan + y.size();
+
+    // Loop until we have intersection or either `xSpan == xEnd` or `ySpan == yEnd`, which means that there is no
+    // intersection. We advance either `xSpan` or `ySpan` depending on their end positions.
+    if (xSpan == xEnd || ySpan == yEnd)
+      return false;
+
+    uint32_t xa, ya;
+    xa = xSpan->a;
+
+    for (;;) {
+      while (ySpan->b <= xa)
+        if (++ySpan == yEnd)
+          return false;
+
+      ya = ySpan->a;
+      while (xSpan->b <= ya)
+        if (++xSpan == xEnd)
+          return false;
+
+      // We know that `xSpan->b > ySpan->a`, so check if `ySpan->b > xSpan->a`.
+      xa = xSpan->a;
+      if (ySpan->b > xa)
+        return true;
+    }
+  }
+
+  //! \}
+};
+
+//! Statistics about a register liveness.
+class RALiveStats {
+public:
+  uint32_t _width = 0;
+  float _freq = 0.0f;
+  float _priority = 0.0f;
+
+  //! \name Accessors
+  //! \{
+
+  inline uint32_t width() const noexcept { return _width; }
+  inline float freq() const noexcept { return _freq; }
+  inline float priority() const noexcept { return _priority; }
+
+  //! \}
+};
+
+struct LiveRegData {
+  uint32_t id;
+
+  inline explicit LiveRegData(uint32_t id = BaseReg::kIdBad) noexcept : id(id) {}
+  inline LiveRegData(const LiveRegData& other) noexcept : id(other.id) {}
+
+  inline void init(const LiveRegData& other) noexcept { id = other.id; }
+
+  inline bool operator==(const LiveRegData& other) const noexcept { return id == other.id; }
+  inline bool operator!=(const LiveRegData& other) const noexcept { return id != other.id; }
+};
+
+typedef RALiveSpan<LiveRegData> LiveRegSpan;
+typedef RALiveSpans<LiveRegSpan> LiveRegSpans;
+
+//! Flags used by \ref RATiedReg.
+//!
+//! Register access information is encoded in 4 flags in total:
+//!
+//!   - `kRead`  - Register is Read    (ReadWrite if combined with `kWrite`).
+//!   - `kWrite` - Register is Written (ReadWrite if combined with `kRead`).
+//!   - `kUse`   - Encoded as Read or ReadWrite.
+//!   - `kOut`   - Encoded as WriteOnly.
+//!
+//! Let's describe all of these on two X86 instructions:
+//!
+//!   - ADD x{R|W|Use},  x{R|Use}              -> {x:R|W|Use            }
+//!   - LEA x{  W|Out}, [x{R|Use} + x{R|Out}]  -> {x:R|W|Use|Out        }
+//!   - ADD x{R|W|Use},  y{R|Use}              -> {x:R|W|Use     y:R|Use}
+//!   - LEA x{  W|Out}, [x{R|Use} + y{R|Out}]  -> {x:R|W|Use|Out y:R|Use}
+//!
+//! It should be obvious from the example above how these flags get created. Each operand contains READ/WRITE
+//! information, which is then merged to RATiedReg's flags. However, we also need to represent the possitility
+//! to view the operation as two independent operations - USE and OUT, because the register allocator first
+//! allocates USE registers, and then assigns OUT registers independently of USE registers.
+enum class RATiedFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+
+  // Access Flags
+  // ------------
+
+  //! Register is read.
+  kRead = uint32_t(OpRWFlags::kRead),
+  //! Register is written.
+  kWrite = uint32_t(OpRWFlags::kWrite),
+  //! Register both read and written.
+  kRW = uint32_t(OpRWFlags::kRW),
+
+  // Use / Out Flags
+  // ---------------
+
+  //! Register has a USE slot (read/rw).
+  kUse = 0x00000004u,
+  //! Register has an OUT slot (write-only).
+  kOut = 0x00000008u,
+  //! Register in USE slot can be patched to memory.
+  kUseRM = 0x00000010u,
+  //! Register in OUT slot can be patched to memory.
+  kOutRM = 0x00000020u,
+
+  //! Register has a fixed USE slot.
+  kUseFixed = 0x00000040u,
+  //! Register has a fixed OUT slot.
+  kOutFixed = 0x00000080u,
+  //! Register USE slot has been allocated.
+  kUseDone = 0x00000100u,
+  //! Register OUT slot has been allocated.
+  kOutDone = 0x00000200u,
+
+  // Consecutive Flags / Data
+  // ------------------------
+
+  kUseConsecutive = 0x00000400u,
+  kOutConsecutive = 0x00000800u,
+  kLeadConsecutive = 0x00001000u,
+  kConsecutiveData = 0x00006000u,
+
+  // Liveness Flags
+  // --------------
+
+  //! Register must be duplicated (function call only).
+  kDuplicate = 0x00010000u,
+  //! Last occurrence of this VirtReg in basic block.
+  kLast = 0x00020000u,
+  //! Kill this VirtReg after use.
+  kKill = 0x00040000u,
+
+  // X86 Specific Flags
+  // ------------------
+
+  // Architecture specific flags are used during RATiedReg building to ensure that architecture-specific constraints
+  // are handled properly. These flags are not really needed after RATiedReg[] is built and copied to `RAInst`.
+
+  //! This RATiedReg references GPB-LO or GPB-HI.
+  kX86_Gpb = 0x01000000u,
+
+  // Instruction Flags (Never used by RATiedReg)
+  // -------------------------------------------
+
+  //! Instruction is transformable to another instruction if necessary.
+  //!
+  //! This is flag that is only used by \ref RAInst to inform register allocator that the instruction has some
+  //! constraints that can only be solved by transforming the instruction into another instruction, most likely
+  //! by changing its InstId.
+  kInst_IsTransformable = 0x80000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(RATiedFlags)
+
+static_assert(uint32_t(RATiedFlags::kRead ) == 0x1, "RATiedFlags::kRead must be 0x1");
+static_assert(uint32_t(RATiedFlags::kWrite) == 0x2, "RATiedFlags::kWrite must be 0x2");
+static_assert(uint32_t(RATiedFlags::kRW   ) == 0x3, "RATiedFlags::kRW must be 0x3");
+
+//! Tied register merges one ore more register operand into a single entity. It contains information about its access
+//! (Read|Write) and allocation slots (Use|Out) that are used by the register allocator and liveness analysis.
+struct RATiedReg {
+  //! \name Members
+  //! \{
+
+  //! WorkReg id.
+  uint32_t _workId;
+  //! WorkReg id that is an immediate consecutive parent of this register, or Globals::kInvalidId if it has no parent.
+  uint32_t _consecutiveParent;
+  //! Allocation flags.
+  RATiedFlags _flags;
+
+  union {
+    struct {
+      //! How many times the VirtReg is referenced in all operands.
+      uint8_t _refCount;
+      //! Size of a memory operand in case that it's use instead of the register.
+      uint8_t _rmSize;
+      //! Physical register for use operation (ReadOnly / ReadWrite).
+      uint8_t _useId;
+      //! Physical register for out operation (WriteOnly).
+      uint8_t _outId;
+    };
+    //! Packed data.
+    uint32_t _packed;
+  };
+
+  //! Registers where inputs {R|X} can be allocated to.
+  RegMask _useRegMask;
+  //! Registers where outputs {W} can be allocated to.
+  RegMask _outRegMask;
+  //! Indexes used to rewrite USE regs.
+  uint32_t _useRewriteMask;
+  //! Indexes used to rewrite OUT regs.
+  uint32_t _outRewriteMask;
+
+  //! \}
+
+  //! \name Statics
+  //! \{
+
+  static inline RATiedFlags consecutiveDataToFlags(uint32_t offset) noexcept {
+    ASMJIT_ASSERT(offset < 4);
+    constexpr uint32_t kOffsetShift = Support::ConstCTZ<uint32_t(RATiedFlags::kConsecutiveData)>::value;
+    return (RATiedFlags)(offset << kOffsetShift);
+  }
+
+  static inline uint32_t consecutiveDataFromFlags(RATiedFlags flags) noexcept {
+    constexpr uint32_t kOffsetShift = Support::ConstCTZ<uint32_t(RATiedFlags::kConsecutiveData)>::value;
+    return uint32_t(flags & RATiedFlags::kConsecutiveData) >> kOffsetShift;
+  }
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline void init(uint32_t workId, RATiedFlags flags, RegMask useRegMask, uint32_t useId, uint32_t useRewriteMask, RegMask outRegMask, uint32_t outId, uint32_t outRewriteMask, uint32_t rmSize = 0, uint32_t consecutiveParent = Globals::kInvalidId) noexcept {
+    _workId = workId;
+    _consecutiveParent = consecutiveParent;
+    _flags = flags;
+    _refCount = 1;
+    _rmSize = uint8_t(rmSize);
+    _useId = uint8_t(useId);
+    _outId = uint8_t(outId);
+    _useRegMask = useRegMask;
+    _outRegMask = outRegMask;
+    _useRewriteMask = useRewriteMask;
+    _outRewriteMask = outRewriteMask;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the associated WorkReg id.
+  inline uint32_t workId() const noexcept { return _workId; }
+
+  inline bool hasConsecutiveParent() const noexcept { return _consecutiveParent != Globals::kInvalidId; }
+  inline uint32_t consecutiveParent() const noexcept { return _consecutiveParent; }
+  inline uint32_t consecutiveData() const noexcept { return consecutiveDataFromFlags(_flags); }
+
+  //! Returns TiedReg flags.
+  inline RATiedFlags flags() const noexcept { return _flags; }
+  //! Checks if the given `flag` is set.
+  inline bool hasFlag(RATiedFlags flag) const noexcept { return Support::test(_flags, flag); }
+  //! Adds tied register flags.
+  inline void addFlags(RATiedFlags flags) noexcept { _flags |= flags; }
+
+  //! Tests whether the register is read (writes `true` also if it's Read/Write).
+  inline bool isRead() const noexcept { return hasFlag(RATiedFlags::kRead); }
+  //! Tests whether the register is written (writes `true` also if it's Read/Write).
+  inline bool isWrite() const noexcept { return hasFlag(RATiedFlags::kWrite); }
+  //! Tests whether the register is read only.
+  inline bool isReadOnly() const noexcept { return (_flags & RATiedFlags::kRW) == RATiedFlags::kRead; }
+  //! Tests whether the register is write only.
+  inline bool isWriteOnly() const noexcept { return (_flags & RATiedFlags::kRW) == RATiedFlags::kWrite; }
+  //! Tests whether the register is read and written.
+  inline bool isReadWrite() const noexcept { return (_flags & RATiedFlags::kRW) == RATiedFlags::kRW; }
+
+  //! Tests whether the tied register has use operand (Read/ReadWrite).
+  inline bool isUse() const noexcept { return hasFlag(RATiedFlags::kUse); }
+  //! Tests whether the tied register has out operand (Write).
+  inline bool isOut() const noexcept { return hasFlag(RATiedFlags::kOut); }
+
+  //! Tests whether the tied register has \ref RATiedFlags::kLeadConsecutive flag set.
+  inline bool isLeadConsecutive() const noexcept { return hasFlag(RATiedFlags::kLeadConsecutive); }
+  //! Tests whether the tied register has \ref RATiedFlags::kUseConsecutive flag set.
+  inline bool isUseConsecutive() const noexcept { return hasFlag(RATiedFlags::kUseConsecutive); }
+  //! Tests whether the tied register has \ref RATiedFlags::kOutConsecutive flag set.
+  inline bool isOutConsecutive() const noexcept { return hasFlag(RATiedFlags::kOutConsecutive); }
+
+  //! Tests whether the tied register has any consecutive flag.
+  inline bool hasAnyConsecutiveFlag() const noexcept { return hasFlag(RATiedFlags::kLeadConsecutive | RATiedFlags::kUseConsecutive | RATiedFlags::kOutConsecutive); }
+
+  //! Tests whether the USE slot can be patched to memory operand.
+  inline bool hasUseRM() const noexcept { return hasFlag(RATiedFlags::kUseRM); }
+  //! Tests whether the OUT slot can be patched to memory operand.
+  inline bool hasOutRM() const noexcept { return hasFlag(RATiedFlags::kOutRM); }
+
+  inline uint32_t rmSize() const noexcept { return _rmSize; }
+
+  inline void makeReadOnly() noexcept {
+    _flags = (_flags & ~(RATiedFlags::kOut | RATiedFlags::kWrite)) | RATiedFlags::kUse;
+    _useRewriteMask |= _outRewriteMask;
+    _outRewriteMask = 0;
+  }
+
+  inline void makeWriteOnly() noexcept {
+    _flags = (_flags & ~(RATiedFlags::kUse | RATiedFlags::kRead)) | RATiedFlags::kOut;
+    _outRewriteMask |= _useRewriteMask;
+    _useRewriteMask = 0;
+  }
+
+  //! Tests whether the register would duplicate.
+  inline bool isDuplicate() const noexcept { return hasFlag(RATiedFlags::kDuplicate); }
+
+  //! Tests whether the register (and the instruction it's part of) appears last in the basic block.
+  inline bool isLast() const noexcept { return hasFlag(RATiedFlags::kLast); }
+  //! Tests whether the register should be killed after USEd and/or OUTed.
+  inline bool isKill() const noexcept { return hasFlag(RATiedFlags::kKill); }
+
+  //! Tests whether the register is OUT or KILL (used internally by local register allocator).
+  inline bool isOutOrKill() const noexcept { return hasFlag(RATiedFlags::kOut | RATiedFlags::kKill); }
+
+  //! Returns a register mask that describes allocable USE registers (Read/ReadWrite access).
+  inline RegMask useRegMask() const noexcept { return _useRegMask; }
+  //! Returns a register mask that describes allocable OUT registers (WriteOnly access).
+  inline RegMask outRegMask() const noexcept { return _outRegMask; }
+
+  inline uint32_t refCount() const noexcept { return _refCount; }
+  inline void addRefCount(uint32_t n = 1) noexcept { _refCount = uint8_t(_refCount + n); }
+
+  //! Tests whether the register must be allocated to a fixed physical register before it's used.
+  inline bool hasUseId() const noexcept { return _useId != BaseReg::kIdBad; }
+  //! Tests whether the register must be allocated to a fixed physical register before it's written.
+  inline bool hasOutId() const noexcept { return _outId != BaseReg::kIdBad; }
+
+  //! Returns a physical register id used for 'use' operation.
+  inline uint32_t useId() const noexcept { return _useId; }
+  //! Returns a physical register id used for 'out' operation.
+  inline uint32_t outId() const noexcept { return _outId; }
+
+  inline uint32_t useRewriteMask() const noexcept { return _useRewriteMask; }
+  inline uint32_t outRewriteMask() const noexcept { return _outRewriteMask; }
+
+  //! Sets a physical register used for 'use' operation.
+  inline void setUseId(uint32_t index) noexcept { _useId = uint8_t(index); }
+  //! Sets a physical register used for 'out' operation.
+  inline void setOutId(uint32_t index) noexcept { _outId = uint8_t(index); }
+
+  inline bool isUseDone() const noexcept { return hasFlag(RATiedFlags::kUseDone); }
+  inline bool isOutDone() const noexcept { return hasFlag(RATiedFlags::kUseDone); }
+
+  inline void markUseDone() noexcept { addFlags(RATiedFlags::kUseDone); }
+  inline void markOutDone() noexcept { addFlags(RATiedFlags::kUseDone); }
+
+  //! \}
+};
+
+//! Flags used by \ref RAWorkReg.
+enum class RAWorkRegFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+
+  //! This register has already been allocated.
+  kAllocated = 0x00000001u,
+  //! Has been coalesced to another WorkReg.
+  kCoalesced = 0x00000002u,
+
+  //! Set when this register is used as a LEAD consecutive register at least once.
+  kLeadConsecutive = 0x00000004u,
+  //! Used to mark consecutive registers during processing.
+  kProcessedConsecutive = 0x00000008u,
+
+  //! Stack slot has to be allocated.
+  kStackUsed = 0x00000010u,
+  //! Stack allocation is preferred.
+  kStackPreferred = 0x00000020u,
+  //! Marked for stack argument reassignment.
+  kStackArgToStack = 0x00000040u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(RAWorkRegFlags)
+
+//! Work register provides additional data of \ref VirtReg that is used by register allocator.
+//!
+//! In general when a virtual register is found by register allocator it maps it to \ref RAWorkReg
+//! and then only works with it. The reason for such mapping is that users can create many virtual
+//! registers, which are not used inside a register allocation scope (which is currently always a
+//! function). So register allocator basically scans the function for virtual registers and maps
+//! them into WorkRegs, which receive a temporary ID (workId), which starts from zero. This WorkId
+//! is then used in bit-arrays and other mappings.
+class RAWorkReg {
+public:
+  ASMJIT_NONCOPYABLE(RAWorkReg)
+
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    kIdNone = 0xFFFFFFFFu
+  };
+
+  enum : uint32_t {
+    kNoArgIndex = 0xFFu
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! RAPass specific ID used during analysis and allocation.
+  uint32_t _workId = 0;
+  //! Copy of ID used by \ref VirtReg.
+  uint32_t _virtId = 0;
+
+  //! Permanent association with \ref VirtReg.
+  VirtReg* _virtReg = nullptr;
+  //! Temporary association with \ref RATiedReg.
+  RATiedReg* _tiedReg = nullptr;
+  //! Stack slot associated with the register.
+  RAStackSlot* _stackSlot = nullptr;
+
+  //! Copy of a signature used by \ref VirtReg.
+  OperandSignature _signature {};
+  //! RAPass specific flags used during analysis and allocation.
+  RAWorkRegFlags _flags = RAWorkRegFlags::kNone;
+
+  //! Constains all USE ids collected from all instructions.
+  //!
+  //! If this mask is non-zero and not a power of two, it means that the register is used multiple times in
+  //! instructions where it requires to have a different use ID. This means that in general it's not possible
+  //! to keep this register in a single home.
+  RegMask _useIdMask = 0;
+  //! Preferred mask of registers (if non-zero) to allocate this register to.
+  //!
+  //! If this mask is zero it means that either there is no intersection of preferred registers collected from all
+  //! TiedRegs or there is no preference at all (the register can be allocated to any register all the time).
+  RegMask _preferredMask = 0xFFFFFFFFu;
+  //! Consecutive mask, which was collected from all instructions where this register was used as a lead consecutive
+  //! register.
+  RegMask _consecutiveMask = 0xFFFFFFFFu;
+  //! IDs of all physical registers that are clobbered during the lifetime of this WorkReg.
+  //!
+  //! This mask should be updated by `RAPass::buildLiveness()`, because it's global and should
+  //! be updated after unreachable code has been removed.
+  RegMask _clobberSurvivalMask = 0;
+  //! IDs of all physical registers this WorkReg has been allocated to.
+  RegMask _allocatedMask = 0;
+
+  //! A byte-mask where each bit represents one valid byte of the register.
+  uint64_t _regByteMask = 0;
+
+  //! Argument index (or `kNoArgIndex` if none).
+  uint8_t _argIndex = kNoArgIndex;
+  //! Argument value index in the pack (0 by default).
+  uint8_t _argValueIndex = 0;
+  //! Global home register ID (if any, assigned by RA).
+  uint8_t _homeRegId = BaseReg::kIdBad;
+  //! Global hint register ID (provided by RA or user).
+  uint8_t _hintRegId = BaseReg::kIdBad;
+
+  //! Live spans of the `VirtReg`.
+  LiveRegSpans _liveSpans {};
+  //! Live statistics.
+  RALiveStats _liveStats {};
+
+  //! All nodes that read/write this VirtReg/WorkReg.
+  ZoneVector<BaseNode*> _refs {};
+  //! All nodes that write to this VirtReg/WorkReg.
+  ZoneVector<BaseNode*> _writes {};
+
+  //! Contains work IDs of all immediate consecutive registers of this register.
+  //!
+  //! \note This bit array only contains immediate consecutives. This means that if this is a register that is
+  //! followed by 3 more registers, then it would still have only a single immediate. The rest registers would
+  //! have immediate consecutive registers as well, except the last one.
+  ZoneBitVector _immediateConsecutives {};
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RAWorkReg(VirtReg* vReg, uint32_t workId) noexcept
+    : _workId(workId),
+      _virtId(vReg->id()),
+      _virtReg(vReg),
+      _signature(vReg->signature()) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline uint32_t workId() const noexcept { return _workId; }
+  inline uint32_t virtId() const noexcept { return _virtId; }
+
+  inline const char* name() const noexcept { return _virtReg->name(); }
+  inline uint32_t nameSize() const noexcept { return _virtReg->nameSize(); }
+
+  inline TypeId typeId() const noexcept { return _virtReg->typeId(); }
+
+  inline RAWorkRegFlags flags() const noexcept { return _flags; }
+  inline bool hasFlag(RAWorkRegFlags flag) const noexcept { return Support::test(_flags, flag); }
+  inline void addFlags(RAWorkRegFlags flags) noexcept { _flags |= flags; }
+
+  inline bool isAllocated() const noexcept { return hasFlag(RAWorkRegFlags::kAllocated); }
+  inline void markAllocated() noexcept { addFlags(RAWorkRegFlags::kAllocated); }
+
+  inline bool isLeadConsecutive() const noexcept { return hasFlag(RAWorkRegFlags::kLeadConsecutive); }
+  inline void markLeadConsecutive() noexcept { addFlags(RAWorkRegFlags::kLeadConsecutive); }
+
+  inline bool isProcessedConsecutive() const noexcept { return hasFlag(RAWorkRegFlags::kProcessedConsecutive); }
+  inline void markProcessedConsecutive() noexcept { addFlags(RAWorkRegFlags::kProcessedConsecutive); }
+
+  inline bool isStackUsed() const noexcept { return hasFlag(RAWorkRegFlags::kStackUsed); }
+  inline void markStackUsed() noexcept { addFlags(RAWorkRegFlags::kStackUsed); }
+
+  inline bool isStackPreferred() const noexcept { return hasFlag(RAWorkRegFlags::kStackPreferred); }
+  inline void markStackPreferred() noexcept { addFlags(RAWorkRegFlags::kStackPreferred); }
+
+  //! Tests whether this RAWorkReg has been coalesced with another one (cannot be used anymore).
+  inline bool isCoalesced() const noexcept { return hasFlag(RAWorkRegFlags::kCoalesced); }
+
+  inline OperandSignature signature() const noexcept { return _signature; }
+  inline RegType type() const noexcept { return _signature.regType(); }
+  inline RegGroup group() const noexcept { return _signature.regGroup(); }
+
+  inline VirtReg* virtReg() const noexcept { return _virtReg; }
+
+  inline bool hasTiedReg() const noexcept { return _tiedReg != nullptr; }
+  inline RATiedReg* tiedReg() const noexcept { return _tiedReg; }
+  inline void setTiedReg(RATiedReg* tiedReg) noexcept { _tiedReg = tiedReg; }
+  inline void resetTiedReg() noexcept { _tiedReg = nullptr; }
+
+  inline bool hasStackSlot() const noexcept { return _stackSlot != nullptr; }
+  inline RAStackSlot* stackSlot() const noexcept { return _stackSlot; }
+
+  inline LiveRegSpans& liveSpans() noexcept { return _liveSpans; }
+  inline const LiveRegSpans& liveSpans() const noexcept { return _liveSpans; }
+
+  inline RALiveStats& liveStats() noexcept { return _liveStats; }
+  inline const RALiveStats& liveStats() const noexcept { return _liveStats; }
+
+  inline bool hasArgIndex() const noexcept { return _argIndex != kNoArgIndex; }
+  inline uint32_t argIndex() const noexcept { return _argIndex; }
+  inline uint32_t argValueIndex() const noexcept { return _argValueIndex; }
+
+  inline void setArgIndex(uint32_t argIndex, uint32_t valueIndex) noexcept {
+    _argIndex = uint8_t(argIndex);
+    _argValueIndex = uint8_t(valueIndex);
+  }
+
+  inline bool hasHomeRegId() const noexcept { return _homeRegId != BaseReg::kIdBad; }
+  inline uint32_t homeRegId() const noexcept { return _homeRegId; }
+  inline void setHomeRegId(uint32_t physId) noexcept { _homeRegId = uint8_t(physId); }
+
+  inline bool hasHintRegId() const noexcept { return _hintRegId != BaseReg::kIdBad; }
+  inline uint32_t hintRegId() const noexcept { return _hintRegId; }
+  inline void setHintRegId(uint32_t physId) noexcept { _hintRegId = uint8_t(physId); }
+
+  inline RegMask useIdMask() const noexcept { return _useIdMask; }
+  inline bool hasUseIdMask() const noexcept { return _useIdMask != 0u; }
+  inline bool hasMultipleUseIds() const noexcept { return _useIdMask != 0u && !Support::isPowerOf2(_useIdMask); }
+  inline void addUseIdMask(RegMask mask) noexcept { _useIdMask |= mask; }
+
+  inline RegMask preferredMask() const noexcept { return _preferredMask; }
+  inline bool hasPrereffedMask() const noexcept { return _preferredMask != 0xFFFFFFFFu; }
+  inline void restrictPreferredMask(RegMask mask) noexcept { _preferredMask &= mask; }
+
+  inline RegMask consecutiveMask() const noexcept { return _consecutiveMask; }
+  inline bool hasConsecutiveMask() const noexcept { return _consecutiveMask != 0xFFFFFFFFu; }
+  inline void restrictConsecutiveMask(RegMask mask) noexcept { _consecutiveMask &= mask; }
+
+  inline RegMask clobberSurvivalMask() const noexcept { return _clobberSurvivalMask; }
+  inline void addClobberSurvivalMask(RegMask mask) noexcept { _clobberSurvivalMask |= mask; }
+
+  inline RegMask allocatedMask() const noexcept { return _allocatedMask; }
+  inline void addAllocatedMask(RegMask mask) noexcept { _allocatedMask |= mask; }
+
+  inline uint64_t regByteMask() const noexcept { return _regByteMask; }
+  inline void setRegByteMask(uint64_t mask) noexcept { _regByteMask = mask; }
+
+  inline bool hasImmediateConsecutives() const noexcept { return !_immediateConsecutives.empty(); }
+  inline const ZoneBitVector& immediateConsecutives() const noexcept { return _immediateConsecutives; }
+
+  inline Error addImmediateConsecutive(ZoneAllocator* allocator, uint32_t workId) noexcept {
+    if (_immediateConsecutives.size() <= workId)
+      ASMJIT_PROPAGATE(_immediateConsecutives.resize(allocator, workId + 1));
+
+    _immediateConsecutives.setBit(workId, true);
+    return kErrorOk;
+  }
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_RADEFS_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/ralocal.cpp b/lib/lepton/asmjit/core/ralocal.cpp
new file mode 100644
index 0000000000..b4d92446b3
--- /dev/null
+++ b/lib/lepton/asmjit/core/ralocal.cpp
@@ -0,0 +1,1166 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/ralocal_p.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// RALocalAllocator - Utilities
+// ============================
+
+static ASMJIT_FORCE_INLINE RATiedReg* RALocal_findTiedRegByWorkId(RATiedReg* tiedRegs, size_t count, uint32_t workId) noexcept {
+  for (size_t i = 0; i < count; i++)
+    if (tiedRegs[i].workId() == workId)
+      return &tiedRegs[i];
+  return nullptr;
+}
+
+// RALocalAllocator - Init & Reset
+// ===============================
+
+Error RALocalAllocator::init() noexcept {
+  PhysToWorkMap* physToWorkMap;
+  WorkToPhysMap* workToPhysMap;
+
+  physToWorkMap = _pass->newPhysToWorkMap();
+  workToPhysMap = _pass->newWorkToPhysMap();
+  if (!physToWorkMap || !workToPhysMap)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  _curAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
+  _curAssignment.initMaps(physToWorkMap, workToPhysMap);
+
+  physToWorkMap = _pass->newPhysToWorkMap();
+  workToPhysMap = _pass->newWorkToPhysMap();
+  _tmpWorkToPhysMap = _pass->newWorkToPhysMap();
+
+  if (!physToWorkMap || !workToPhysMap || !_tmpWorkToPhysMap)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  _tmpAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
+  _tmpAssignment.initMaps(physToWorkMap, workToPhysMap);
+
+  return kErrorOk;
+}
+
+// RALocalAllocator - Assignment
+// =============================
+
+Error RALocalAllocator::makeInitialAssignment() noexcept {
+  FuncNode* func = _pass->func();
+  RABlock* entry = _pass->entryBlock();
+
+  ZoneBitVector& liveIn = entry->liveIn();
+  uint32_t argCount = func->argCount();
+  uint32_t numIter = 1;
+
+  for (uint32_t iter = 0; iter < numIter; iter++) {
+    for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+      for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+        // Unassigned argument.
+        const RegOnly& regArg = func->argPack(argIndex)[valueIndex];
+        if (!regArg.isReg() || !_cc->isVirtIdValid(regArg.id()))
+          continue;
+
+        VirtReg* virtReg = _cc->virtRegById(regArg.id());
+
+        // Unreferenced argument.
+        RAWorkReg* workReg = virtReg->workReg();
+        if (!workReg)
+          continue;
+
+        // Overwritten argument.
+        uint32_t workId = workReg->workId();
+        if (!liveIn.bitAt(workId))
+          continue;
+
+        RegGroup group = workReg->group();
+        if (_curAssignment.workToPhysId(group, workId) != RAAssignment::kPhysNone)
+          continue;
+
+        RegMask allocableRegs = _availableRegs[group] & ~_curAssignment.assigned(group);
+        if (iter == 0) {
+          // First iteration: Try to allocate to home RegId.
+          if (workReg->hasHomeRegId()) {
+            uint32_t physId = workReg->homeRegId();
+            if (Support::bitTest(allocableRegs, physId)) {
+              _curAssignment.assign(group, workId, physId, true);
+              _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->type(), physId, workReg->typeId());
+              continue;
+            }
+          }
+
+          numIter = 2;
+        }
+        else {
+          // Second iteration: Pick any other register if the is an unassigned one or assign to stack.
+          if (allocableRegs) {
+            uint32_t physId = Support::ctz(allocableRegs);
+            _curAssignment.assign(group, workId, physId, true);
+            _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->type(), physId, workReg->typeId());
+          }
+          else {
+            // This register will definitely need stack, create the slot now and assign also `argIndex`
+            // to it. We will patch `_argsAssignment` later after RAStackAllocator finishes.
+            RAStackSlot* slot = _pass->getOrCreateStackSlot(workReg);
+            if (ASMJIT_UNLIKELY(!slot))
+              return DebugUtils::errored(kErrorOutOfMemory);
+
+            // This means STACK_ARG may be moved to STACK.
+            workReg->addFlags(RAWorkRegFlags::kStackArgToStack);
+            _pass->_numStackArgsToStackSlots++;
+          }
+        }
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error RALocalAllocator::replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept {
+  _curAssignment.copyFrom(physToWorkMap);
+  return kErrorOk;
+}
+
+Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept {
+  RAAssignment dst;
+  RAAssignment& cur = _curAssignment;
+
+  dst.initLayout(_pass->_physRegCount, _pass->workRegs());
+  dst.initMaps(dstPhysToWorkMap, _tmpWorkToPhysMap);
+  dst.assignWorkIdsFromPhysIds();
+
+  if (tryMode)
+    return kErrorOk;
+
+  for (RegGroup group : RegGroupVirtValues{}) {
+    // STEP 1
+    // ------
+    //
+    //   - KILL all registers that are not live at `dst`,
+    //   - SPILL all registers that are not assigned at `dst`.
+
+    if (!tryMode) {
+      Support::BitWordIterator<RegMask> it(cur.assigned(group));
+      while (it.hasNext()) {
+        uint32_t physId = it.next();
+        uint32_t workId = cur.physToWorkId(group, physId);
+
+        // Must be true as we iterate over assigned registers.
+        ASMJIT_ASSERT(workId != RAAssignment::kWorkNone);
+
+        // KILL if it's not live on entry.
+        if (!liveIn.bitAt(workId)) {
+          onKillReg(group, workId, physId);
+          continue;
+        }
+
+        // SPILL if it's not assigned on entry.
+        uint32_t altId = dst.workToPhysId(group, workId);
+        if (altId == RAAssignment::kPhysNone) {
+          ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
+        }
+      }
+    }
+
+    // STEP 2
+    // ------
+    //
+    //   - MOVE and SWAP registers from their current assignments into their DST assignments.
+    //   - Build `willLoadRegs` mask of registers scheduled for `onLoadReg()`.
+
+    // Current run-id (1 means more aggressive decisions).
+    int32_t runId = -1;
+    // Remaining registers scheduled for `onLoadReg()`.
+    RegMask willLoadRegs = 0;
+    // Remaining registers to be allocated in this loop.
+    RegMask affectedRegs = dst.assigned(group);
+
+    while (affectedRegs) {
+      if (++runId == 2) {
+        if (!tryMode)
+          return DebugUtils::errored(kErrorInvalidState);
+
+        // Stop in `tryMode` if we haven't done anything in past two rounds.
+        break;
+      }
+
+      Support::BitWordIterator<RegMask> it(affectedRegs);
+      while (it.hasNext()) {
+        uint32_t physId = it.next();
+        RegMask physMask = Support::bitMask<RegMask>(physId);
+
+        uint32_t curWorkId = cur.physToWorkId(group, physId);
+        uint32_t dstWorkId = dst.physToWorkId(group, physId);
+
+        // The register must have assigned `dstWorkId` as we only iterate over assigned regs.
+        ASMJIT_ASSERT(dstWorkId != RAAssignment::kWorkNone);
+
+        if (curWorkId != RAAssignment::kWorkNone) {
+          // Both assigned.
+          if (curWorkId != dstWorkId) {
+            // Wait a bit if this is the first run, we may avoid this if `curWorkId` moves out.
+            if (runId <= 0)
+              continue;
+
+            uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
+            if (altPhysId == RAAssignment::kPhysNone)
+              continue;
+
+            // Reset as we will do some changes to the current assignment.
+            runId = -1;
+
+            if (_archTraits->hasInstRegSwap(group)) {
+              ASMJIT_PROPAGATE(onSwapReg(group, curWorkId, physId, dstWorkId, altPhysId));
+            }
+            else {
+              // SPILL the reg if it's not dirty in DST, otherwise try to MOVE.
+              if (!cur.isPhysDirty(group, physId)) {
+                ASMJIT_PROPAGATE(onKillReg(group, curWorkId, physId));
+              }
+              else {
+                RegMask allocableRegs = _pass->_availableRegs[group] & ~cur.assigned(group);
+
+                // If possible don't conflict with assigned regs at DST.
+                if (allocableRegs & ~dst.assigned(group))
+                  allocableRegs &= ~dst.assigned(group);
+
+                if (allocableRegs) {
+                  // MOVE is possible, thus preferred.
+                  uint32_t tmpPhysId = Support::ctz(allocableRegs);
+
+                  ASMJIT_PROPAGATE(onMoveReg(group, curWorkId, tmpPhysId, physId));
+                  _pass->_clobberedRegs[group] |= Support::bitMask(tmpPhysId);
+                }
+                else {
+                  // MOVE is impossible, must SPILL.
+                  ASMJIT_PROPAGATE(onSpillReg(group, curWorkId, physId));
+                }
+              }
+
+              goto Cleared;
+            }
+          }
+        }
+        else {
+Cleared:
+          // DST assigned, CUR unassigned.
+          uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
+          if (altPhysId == RAAssignment::kPhysNone) {
+            if (liveIn.bitAt(dstWorkId))
+              willLoadRegs |= physMask; // Scheduled for `onLoadReg()`.
+            affectedRegs &= ~physMask;  // Unaffected from now.
+            continue;
+          }
+          ASMJIT_PROPAGATE(onMoveReg(group, dstWorkId, physId, altPhysId));
+        }
+
+        // Both DST and CUR assigned to the same reg or CUR just moved to DST.
+        if ((dst.dirty(group) & physMask) != (cur.dirty(group) & physMask)) {
+          if ((dst.dirty(group) & physMask) == 0) {
+            // CUR dirty, DST not dirty (the assert is just to visualize the condition).
+            ASMJIT_ASSERT(!dst.isPhysDirty(group, physId) && cur.isPhysDirty(group, physId));
+
+            // If `dstReadOnly` is true it means that that block was already processed and we cannot change from
+            // CLEAN to DIRTY. In that case the register has to be saved as it cannot enter the block DIRTY.
+            if (dstReadOnly)
+              ASMJIT_PROPAGATE(onSaveReg(group, dstWorkId, physId));
+            else
+              dst.makeDirty(group, dstWorkId, physId);
+          }
+          else {
+            // DST dirty, CUR not dirty (the assert is just to visualize the condition).
+            ASMJIT_ASSERT(dst.isPhysDirty(group, physId) && !cur.isPhysDirty(group, physId));
+
+            cur.makeDirty(group, dstWorkId, physId);
+          }
+        }
+
+        // Must match now...
+        ASMJIT_ASSERT(dst.physToWorkId(group, physId) == cur.physToWorkId(group, physId));
+        ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
+
+        runId = -1;
+        affectedRegs &= ~physMask;
+      }
+    }
+
+    // STEP 3
+    // ------
+    //
+    //   - Load registers specified by `willLoadRegs`.
+
+    {
+      Support::BitWordIterator<RegMask> it(willLoadRegs);
+      while (it.hasNext()) {
+        uint32_t physId = it.next();
+
+        if (!cur.isPhysAssigned(group, physId)) {
+          uint32_t workId = dst.physToWorkId(group, physId);
+
+          // The algorithm is broken if it tries to load a register that is not in LIVE-IN.
+          ASMJIT_ASSERT(liveIn.bitAt(workId) == true);
+
+          ASMJIT_PROPAGATE(onLoadReg(group, workId, physId));
+          if (dst.isPhysDirty(group, physId))
+            cur.makeDirty(group, workId, physId);
+          ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
+        }
+        else {
+          // Not possible otherwise.
+          ASMJIT_ASSERT(tryMode == true);
+        }
+      }
+    }
+  }
+
+  if (!tryMode) {
+    // Here is a code that dumps the conflicting part if something fails here:
+    // if (!dst.equals(cur)) {
+    //   uint32_t physTotal = dst._layout.physTotal;
+    //   uint32_t workCount = dst._layout.workCount;
+    //
+    //   fprintf(stderr, "Dirty    DST=0x%08X CUR=0x%08X\n", dst.dirty(RegGroup::kGp), cur.dirty(RegGroup::kGp));
+    //   fprintf(stderr, "Assigned DST=0x%08X CUR=0x%08X\n", dst.assigned(RegGroup::kGp), cur.assigned(RegGroup::kGp));
+    //
+    //   for (uint32_t physId = 0; physId < physTotal; physId++) {
+    //     uint32_t dstWorkId = dst._physToWorkMap->workIds[physId];
+    //     uint32_t curWorkId = cur._physToWorkMap->workIds[physId];
+    //     if (dstWorkId != curWorkId)
+    //       fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId);
+    //   }
+    //
+    //   for (uint32_t workId = 0; workId < workCount; workId++) {
+    //     uint32_t dstPhysId = dst._workToPhysMap->physIds[workId];
+    //     uint32_t curPhysId = cur._workToPhysMap->physIds[workId];
+    //     if (dstPhysId != curPhysId)
+    //       fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId);
+    //   }
+    // }
+    ASMJIT_ASSERT(dst.equals(cur));
+  }
+
+  return kErrorOk;
+}
+
+Error RALocalAllocator::spillScratchGpRegsBeforeEntry(RegMask scratchRegs) noexcept {
+  RegGroup group = RegGroup::kGp;
+  Support::BitWordIterator<RegMask> it(scratchRegs);
+
+  while (it.hasNext()) {
+    uint32_t physId = it.next();
+    if (_curAssignment.isPhysAssigned(group, physId)) {
+      uint32_t workId = _curAssignment.physToWorkId(group, physId);
+      ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
+    }
+  }
+
+  return kErrorOk;
+}
+
+// RALocalAllocator - Allocation
+// =============================
+
+Error RALocalAllocator::allocInst(InstNode* node) noexcept {
+  RAInst* raInst = node->passData<RAInst>();
+
+  RATiedReg* outTiedRegs[Globals::kMaxPhysRegs];
+  RATiedReg* dupTiedRegs[Globals::kMaxPhysRegs];
+  RATiedReg* consecutiveRegs[kMaxConsecutiveRegs];
+
+  // The cursor must point to the previous instruction for a possible instruction insertion.
+  _cc->_setCursor(node->prev());
+
+  _node = node;
+  _raInst = raInst;
+  _tiedTotal = raInst->_tiedTotal;
+  _tiedCount = raInst->_tiedCount;
+
+  // Whether we already replaced register operand with memory operand.
+  bool rmAllocated = false;
+
+  for (RegGroup group : RegGroupVirtValues{}) {
+    uint32_t i, count = this->tiedCount(group);
+    RATiedReg* tiedRegs = this->tiedRegs(group);
+
+    RegMask willUse = _raInst->_usedRegs[group];
+    RegMask willOut = _raInst->_clobberedRegs[group];
+    RegMask willFree = 0;
+
+    uint32_t usePending = count;
+    uint32_t outTiedCount = 0;
+    uint32_t dupTiedCount = 0;
+    uint32_t consecutiveMask = 0;
+
+    // STEP 1
+    // ------
+    //
+    // Calculate `willUse` and `willFree` masks based on tied registers we have. In addition, aggregate information
+    // regarding consecutive registers used by this instruction. We need that to make USE/OUT assignments.
+    //
+    // We don't do any assignment decisions at this stage as we just need to collect some information first. Then,
+    // after we populate all masks needed we can finally make some decisions in the second loop. The main reason
+    // for this is that we really need `willFree` to make assignment decisions for `willUse`, because if we mark
+    // some registers that will be freed, we can consider them in decision making afterwards.
+
+    for (i = 0; i < count; i++) {
+      RATiedReg* tiedReg = &tiedRegs[i];
+
+      if (tiedReg->hasAnyConsecutiveFlag()) {
+        uint32_t consecutiveOffset = tiedReg->isLeadConsecutive() ? uint32_t(0) : tiedReg->consecutiveData();
+
+        if (ASMJIT_UNLIKELY(Support::bitTest(consecutiveMask, consecutiveOffset)))
+          return DebugUtils::errored(kErrorInvalidState);
+
+        consecutiveMask |= Support::bitMask(consecutiveOffset);
+        consecutiveRegs[consecutiveOffset] = tiedReg;
+      }
+
+      // Add OUT and KILL to `outPending` for CLOBBERing and/or OUT assignment.
+      if (tiedReg->isOutOrKill())
+        outTiedRegs[outTiedCount++] = tiedReg;
+
+      if (tiedReg->isDuplicate())
+        dupTiedRegs[dupTiedCount++] = tiedReg;
+
+      if (!tiedReg->isUse()) {
+        tiedReg->markUseDone();
+        usePending--;
+        continue;
+      }
+
+      // Don't assign anything here if this is a consecutive USE - we will handle this in STEP 2 instead.
+      if (tiedReg->isUseConsecutive())
+        continue;
+
+      uint32_t workId = tiedReg->workId();
+      uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+
+      if (tiedReg->hasUseId()) {
+        // If the register has `useId` it means it can only be allocated in that register.
+        RegMask useMask = Support::bitMask(tiedReg->useId());
+
+        // RAInstBuilder must have collected `usedRegs` on-the-fly.
+        ASMJIT_ASSERT((willUse & useMask) != 0);
+
+        if (assignedId == tiedReg->useId()) {
+          // If the register is already allocated in this one, mark it done and continue.
+          tiedReg->markUseDone();
+          if (tiedReg->isWrite())
+            _curAssignment.makeDirty(group, workId, assignedId);
+          usePending--;
+          willUse |= useMask;
+        }
+        else {
+          willFree |= useMask & _curAssignment.assigned(group);
+        }
+      }
+      else {
+        // Check if the register must be moved to `allocableRegs`.
+        RegMask allocableRegs = tiedReg->useRegMask();
+        if (assignedId != RAAssignment::kPhysNone) {
+          RegMask assignedMask = Support::bitMask(assignedId);
+          if ((allocableRegs & ~willUse) & assignedMask) {
+            tiedReg->setUseId(assignedId);
+            tiedReg->markUseDone();
+            if (tiedReg->isWrite())
+              _curAssignment.makeDirty(group, workId, assignedId);
+            usePending--;
+            willUse |= assignedMask;
+          }
+          else {
+            willFree |= assignedMask;
+          }
+        }
+      }
+    }
+
+    // STEP 2
+    // ------
+    //
+    // Verify that all the consecutive registers are really consecutive. Terminate if there is a gap. In addition,
+    // decide which USE ids will be used in case that this consecutive sequence is USE (OUT registers are allocated
+    // in a different step).
+    uint32_t consecutiveCount = 0;
+
+    if (consecutiveMask) {
+      if ((consecutiveMask & (consecutiveMask + 1u)) != 0)
+        return DebugUtils::errored(kErrorInvalidState);
+
+      // Count of trailing ones is the count of consecutive registers. There cannot be gap.
+      consecutiveCount = Support::ctz(~consecutiveMask);
+
+      // Prioritize allocation that would result in least moves even when moving registers away from their homes.
+      RATiedReg* lead = consecutiveRegs[0];
+
+      // Assign the best possible USE Ids to all consecutives.
+      if (lead->isUseConsecutive()) {
+        uint32_t bestScore = 0;
+        uint32_t bestLeadReg = 0xFFFFFFFF;
+        RegMask allocableRegs = (_availableRegs[group] | willFree) & ~willUse;
+
+        uint32_t assignments[kMaxConsecutiveRegs];
+
+        for (i = 0; i < consecutiveCount; i++)
+          assignments[i] = _curAssignment.workToPhysId(group, consecutiveRegs[i]->workId());
+
+        Support::BitWordIterator<uint32_t> it(lead->useRegMask());
+        while (it.hasNext()) {
+          uint32_t regIndex = it.next();
+          if (Support::bitTest(lead->useRegMask(), regIndex)) {
+            uint32_t score = 15;
+
+            for (i = 0; i < consecutiveCount; i++) {
+              uint32_t consecutiveIndex = regIndex + i;
+              if (!Support::bitTest(allocableRegs, consecutiveIndex)) {
+                score = 0;
+                break;
+              }
+
+              RAWorkReg* workReg = workRegById(consecutiveRegs[i]->workId());
+              score += uint32_t(workReg->homeRegId() == consecutiveIndex);
+              score += uint32_t(assignments[i] == consecutiveIndex) * 2;
+            }
+
+            if (score > bestScore) {
+              bestScore = score;
+              bestLeadReg = regIndex;
+            }
+          }
+        }
+
+        if (bestLeadReg == 0xFFFFFFFF)
+          return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
+
+        for (i = 0; i < consecutiveCount; i++) {
+          uint32_t consecutiveIndex = bestLeadReg + i;
+
+          RATiedReg* tiedReg = consecutiveRegs[i];
+          RegMask useMask = Support::bitMask(consecutiveIndex);
+
+          uint32_t workId = tiedReg->workId();
+          uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+
+          tiedReg->setUseId(consecutiveIndex);
+
+          if (assignedId == consecutiveIndex) {
+            // If the register is already allocated in this one, mark it done and continue.
+            tiedReg->markUseDone();
+            if (tiedReg->isWrite())
+              _curAssignment.makeDirty(group, workId, assignedId);
+            usePending--;
+            willUse |= useMask;
+          }
+          else {
+            willUse |= useMask;
+            willFree |= useMask & _curAssignment.assigned(group);
+          }
+        }
+      }
+    }
+
+    // STEP 3
+    // ------
+    //
+    // Do some decision making to find the best candidates of registers that need to be assigned, moved, and/or
+    // spilled. Only USE registers are considered here, OUT will be decided later after all CLOBBERed and OUT
+    // registers are unassigned.
+
+    if (usePending) {
+      // TODO: Not sure `liveRegs` should be used, maybe willUse and willFree would be enough and much more clear.
+
+      // All registers that are currently alive without registers that will be freed.
+      RegMask liveRegs = _curAssignment.assigned(group) & ~willFree;
+
+      for (i = 0; i < count; i++) {
+        RATiedReg* tiedReg = &tiedRegs[i];
+        if (tiedReg->isUseDone())
+          continue;
+
+        uint32_t workId = tiedReg->workId();
+        uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+
+        // REG/MEM: Patch register operand to memory operand if not allocated.
+        if (!rmAllocated && tiedReg->hasUseRM()) {
+          if (assignedId == RAAssignment::kPhysNone && Support::isPowerOf2(tiedReg->useRewriteMask())) {
+            RAWorkReg* workReg = workRegById(tiedReg->workId());
+            uint32_t opIndex = Support::ctz(tiedReg->useRewriteMask()) / uint32_t(sizeof(Operand) / sizeof(uint32_t));
+            uint32_t rmSize = tiedReg->rmSize();
+
+            if (rmSize <= workReg->virtReg()->virtSize()) {
+              Operand& op = node->operands()[opIndex];
+              op = _pass->workRegAsMem(workReg);
+              op.as<BaseMem>().setSize(rmSize);
+              tiedReg->_useRewriteMask = 0;
+
+              tiedReg->markUseDone();
+              usePending--;
+
+              rmAllocated = true;
+              continue;
+            }
+          }
+        }
+
+        if (!tiedReg->hasUseId()) {
+          // DECIDE where to assign the USE register.
+          RegMask allocableRegs = tiedReg->useRegMask() & ~(willFree | willUse);
+          uint32_t useId = decideOnAssignment(group, workId, assignedId, allocableRegs);
+
+          RegMask useMask = Support::bitMask(useId);
+          willUse |= useMask;
+          willFree |= useMask & liveRegs;
+          tiedReg->setUseId(useId);
+
+          if (assignedId != RAAssignment::kPhysNone) {
+            RegMask assignedMask = Support::bitMask(assignedId);
+
+            willFree |= assignedMask;
+            liveRegs &= ~assignedMask;
+
+            // OPTIMIZATION: Assign the USE register here if it's possible.
+            if (!(liveRegs & useMask)) {
+              ASMJIT_PROPAGATE(onMoveReg(group, workId, useId, assignedId));
+              tiedReg->markUseDone();
+              if (tiedReg->isWrite())
+                _curAssignment.makeDirty(group, workId, useId);
+              usePending--;
+            }
+          }
+          else {
+            // OPTIMIZATION: Assign the USE register here if it's possible.
+            if (!(liveRegs & useMask)) {
+              ASMJIT_PROPAGATE(onLoadReg(group, workId, useId));
+              tiedReg->markUseDone();
+              if (tiedReg->isWrite())
+                _curAssignment.makeDirty(group, workId, useId);
+              usePending--;
+            }
+          }
+
+          liveRegs |= useMask;
+        }
+      }
+    }
+
+    // Initially all used regs will be marked as clobbered.
+    RegMask clobberedByInst = willUse | willOut;
+
+    // STEP 4
+    // ------
+    //
+    // Free all registers that we marked as `willFree`. Only registers that are not USEd by the instruction are
+    // considered as we don't want to free regs we need.
+
+    if (willFree) {
+      RegMask allocableRegs = _availableRegs[group] & ~(_curAssignment.assigned(group) | willFree | willUse | willOut);
+      Support::BitWordIterator<RegMask> it(willFree);
+
+      do {
+        uint32_t assignedId = it.next();
+        if (_curAssignment.isPhysAssigned(group, assignedId)) {
+          uint32_t workId = _curAssignment.physToWorkId(group, assignedId);
+
+          // DECIDE whether to MOVE or SPILL.
+          if (allocableRegs) {
+            uint32_t reassignedId = decideOnReassignment(group, workId, assignedId, allocableRegs);
+            if (reassignedId != RAAssignment::kPhysNone) {
+              ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId));
+              allocableRegs ^= Support::bitMask(reassignedId);
+              continue;
+            }
+          }
+
+          ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
+        }
+      } while (it.hasNext());
+    }
+
+    // STEP 5
+    // ------
+    //
+    // ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't allocated yet. This is a bit
+    // complicated as the allocation is iterative. In some cases we have to wait before allocating a particual
+    // physical register as it's still occupied by some other one, which we need to move before we can use it.
+    // In this case we skip it and allocate another some other instead (making it free for another iteration).
+    //
+    // NOTE: Iterations are mostly important for complicated allocations like function calls, where there can
+    // be up to N registers used at once. Asm instructions won't run the loop more than once in 99.9% of cases
+    // as they use 2..3 registers in average.
+
+    if (usePending) {
+      bool mustSwap = false;
+      do {
+        uint32_t oldPending = usePending;
+
+        for (i = 0; i < count; i++) {
+          RATiedReg* thisTiedReg = &tiedRegs[i];
+          if (thisTiedReg->isUseDone())
+            continue;
+
+          uint32_t thisWorkId = thisTiedReg->workId();
+          uint32_t thisPhysId = _curAssignment.workToPhysId(group, thisWorkId);
+
+          // This would be a bug, fatal one!
+          uint32_t targetPhysId = thisTiedReg->useId();
+          ASMJIT_ASSERT(targetPhysId != thisPhysId);
+
+          uint32_t targetWorkId = _curAssignment.physToWorkId(group, targetPhysId);
+          if (targetWorkId != RAAssignment::kWorkNone) {
+            RAWorkReg* targetWorkReg = workRegById(targetWorkId);
+
+            // Swapping two registers can solve two allocation tasks by emitting just a single instruction. However,
+            // swap is only available on few architectures and it's definitely not available for each register group.
+            // Calling `onSwapReg()` before checking these would be fatal.
+            if (_archTraits->hasInstRegSwap(group) && thisPhysId != RAAssignment::kPhysNone) {
+              ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId));
+
+              thisTiedReg->markUseDone();
+              if (thisTiedReg->isWrite())
+                _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
+              usePending--;
+
+              // Double-hit.
+              RATiedReg* targetTiedReg = RALocal_findTiedRegByWorkId(tiedRegs, count, targetWorkReg->workId());
+              if (targetTiedReg && targetTiedReg->useId() == thisPhysId) {
+                targetTiedReg->markUseDone();
+                if (targetTiedReg->isWrite())
+                  _curAssignment.makeDirty(group, targetWorkId, thisPhysId);
+                usePending--;
+              }
+              continue;
+            }
+
+            if (!mustSwap)
+              continue;
+
+            // Only branched here if the previous iteration did nothing. This is essentially a SWAP operation without
+            // having a dedicated instruction for that purpose (vector registers, etc). The simplest way to handle
+            // such case is to SPILL the target register.
+            ASMJIT_PROPAGATE(onSpillReg(group, targetWorkId, targetPhysId));
+          }
+
+          if (thisPhysId != RAAssignment::kPhysNone) {
+            ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, targetPhysId, thisPhysId));
+
+            thisTiedReg->markUseDone();
+            if (thisTiedReg->isWrite())
+              _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
+            usePending--;
+          }
+          else {
+            ASMJIT_PROPAGATE(onLoadReg(group, thisWorkId, targetPhysId));
+
+            thisTiedReg->markUseDone();
+            if (thisTiedReg->isWrite())
+              _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
+            usePending--;
+          }
+        }
+
+        mustSwap = (oldPending == usePending);
+      } while (usePending);
+    }
+
+    // STEP 6
+    // ------
+    //
+    // KILL registers marked as KILL/OUT.
+
+    uint32_t outPending = outTiedCount;
+    if (outTiedCount) {
+      for (i = 0; i < outTiedCount; i++) {
+        RATiedReg* tiedReg = outTiedRegs[i];
+
+        uint32_t workId = tiedReg->workId();
+        uint32_t physId = _curAssignment.workToPhysId(group, workId);
+
+        // Must check if it's allocated as KILL can be related to OUT (like KILL immediately after OUT, which could
+        // mean the register is not assigned).
+        if (physId != RAAssignment::kPhysNone) {
+          ASMJIT_PROPAGATE(onKillReg(group, workId, physId));
+          willOut &= ~Support::bitMask(physId);
+        }
+
+        // We still maintain number of pending registers for OUT assignment. So, if this is only KILL, not OUT, we
+        // can safely decrement it.
+        outPending -= !tiedReg->isOut();
+      }
+    }
+
+    // STEP 7
+    // ------
+    //
+    // SPILL registers that will be CLOBBERed. Since OUT and KILL were already processed this is used mostly to
+    // handle function CALLs.
+
+    if (willOut) {
+      Support::BitWordIterator<RegMask> it(willOut);
+      do {
+        uint32_t physId = it.next();
+        uint32_t workId = _curAssignment.physToWorkId(group, physId);
+
+        if (workId == RAAssignment::kWorkNone)
+          continue;
+
+        ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
+      } while (it.hasNext());
+    }
+
+    // STEP 8
+    // ------
+    //
+    // Duplication.
+
+    for (i = 0; i < dupTiedCount; i++) {
+      RATiedReg* tiedReg = dupTiedRegs[i];
+      uint32_t workId = tiedReg->workId();
+      uint32_t srcId = tiedReg->useId();
+
+      Support::BitWordIterator<RegMask> it(tiedReg->useRegMask());
+      while (it.hasNext()) {
+        uint32_t dstId = it.next();
+        if (dstId == srcId)
+          continue;
+        _pass->emitMove(workId, dstId, srcId);
+      }
+    }
+
+    // STEP 9
+    // ------
+    //
+    // Vector registers can be cloberred partially by invoke - find if that's the case and clobber when necessary.
+
+    if (node->isInvoke() && group == RegGroup::kVec) {
+      const InvokeNode* invokeNode = node->as<InvokeNode>();
+
+      RegMask maybeClobberedRegs = invokeNode->detail().callConv().preservedRegs(group) & _curAssignment.assigned(group);
+      if (maybeClobberedRegs) {
+        uint32_t saveRestoreVecSize = invokeNode->detail().callConv().saveRestoreRegSize(group);
+        Support::BitWordIterator<RegMask> it(maybeClobberedRegs);
+
+        do {
+          uint32_t physId = it.next();
+          uint32_t workId = _curAssignment.physToWorkId(group, physId);
+
+          RAWorkReg* workReg = workRegById(workId);
+          uint32_t virtSize = workReg->virtReg()->virtSize();
+
+          if (virtSize > saveRestoreVecSize) {
+            ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
+          }
+
+        } while (it.hasNext());
+      }
+    }
+
+    // STEP 10
+    // -------
+    //
+    // Assign OUT registers.
+
+    if (outPending) {
+      // Live registers, we need a separate register (outside of `_curAssignment) to hold these because of KILLed
+      // registers. If we KILL a register here it will go out from `_curAssignment`, but we cannot assign to it in
+      // here.
+      RegMask liveRegs = _curAssignment.assigned(group);
+
+      // Must avoid as they have been already OUTed (added during the loop).
+      RegMask outRegs = 0;
+
+      // Must avoid as they collide with already allocated ones.
+      RegMask avoidRegs = willUse & ~clobberedByInst;
+
+      // Assign the best possible OUT ids of all consecutives.
+      if (consecutiveCount) {
+        RATiedReg* lead = consecutiveRegs[0];
+        if (lead->isOutConsecutive()) {
+          uint32_t bestScore = 0;
+          uint32_t bestLeadReg = 0xFFFFFFFF;
+          RegMask allocableRegs = _availableRegs[group] & ~(outRegs | avoidRegs);
+
+          Support::BitWordIterator<uint32_t> it(lead->outRegMask());
+          while (it.hasNext()) {
+            uint32_t regIndex = it.next();
+            if (Support::bitTest(lead->outRegMask(), regIndex)) {
+              uint32_t score = 15;
+
+              for (i = 0; i < consecutiveCount; i++) {
+                uint32_t consecutiveIndex = regIndex + i;
+                if (!Support::bitTest(allocableRegs, consecutiveIndex)) {
+                  score = 0;
+                  break;
+                }
+
+                RAWorkReg* workReg = workRegById(consecutiveRegs[i]->workId());
+                score += uint32_t(workReg->homeRegId() == consecutiveIndex);
+              }
+
+              if (score > bestScore) {
+                bestScore = score;
+                bestLeadReg = regIndex;
+              }
+            }
+          }
+
+          if (bestLeadReg == 0xFFFFFFFF)
+            return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
+
+          for (i = 0; i < consecutiveCount; i++) {
+            uint32_t consecutiveIndex = bestLeadReg + i;
+            RATiedReg* tiedReg = consecutiveRegs[i];
+            tiedReg->setOutId(consecutiveIndex);
+          }
+        }
+      }
+
+      // Allocate OUT registers.
+      for (i = 0; i < outTiedCount; i++) {
+        RATiedReg* tiedReg = outTiedRegs[i];
+        if (!tiedReg->isOut())
+          continue;
+
+        uint32_t workId = tiedReg->workId();
+        uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+
+        if (assignedId != RAAssignment::kPhysNone)
+          ASMJIT_PROPAGATE(onKillReg(group, workId, assignedId));
+
+        uint32_t physId = tiedReg->outId();
+        if (physId == RAAssignment::kPhysNone) {
+          RegMask allocableRegs = tiedReg->outRegMask() & ~(outRegs | avoidRegs);
+
+          if (!(allocableRegs & ~liveRegs)) {
+            // There are no more registers, decide which one to spill.
+            uint32_t spillWorkId;
+            physId = decideOnSpillFor(group, workId, allocableRegs & liveRegs, &spillWorkId);
+            ASMJIT_PROPAGATE(onSpillReg(group, spillWorkId, physId));
+          }
+          else {
+            physId = decideOnAssignment(group, workId, RAAssignment::kPhysNone, allocableRegs & ~liveRegs);
+          }
+        }
+
+        // OUTs are CLOBBERed thus cannot be ASSIGNed right now.
+        ASMJIT_ASSERT(!_curAssignment.isPhysAssigned(group, physId));
+
+        if (!tiedReg->isKill())
+          ASMJIT_PROPAGATE(onAssignReg(group, workId, physId, true));
+
+        tiedReg->setOutId(physId);
+        tiedReg->markOutDone();
+
+        outRegs |= Support::bitMask(physId);
+        liveRegs &= ~Support::bitMask(physId);
+        outPending--;
+      }
+
+      clobberedByInst |= outRegs;
+      ASMJIT_ASSERT(outPending == 0);
+    }
+
+    _clobberedRegs[group] |= clobberedByInst;
+  }
+
+  return kErrorOk;
+}
+
+Error RALocalAllocator::spillAfterAllocation(InstNode* node) noexcept {
+  // This is experimental feature that would spill registers that don't have home-id and are last in this basic block.
+  // This prevents saving these regs in other basic blocks and then restoring them (mostly relevant for loops).
+  RAInst* raInst = node->passData<RAInst>();
+  uint32_t count = raInst->tiedCount();
+
+  for (uint32_t i = 0; i < count; i++) {
+    RATiedReg* tiedReg = raInst->tiedAt(i);
+    if (tiedReg->isLast()) {
+      uint32_t workId = tiedReg->workId();
+      RAWorkReg* workReg = workRegById(workId);
+      if (!workReg->hasHomeRegId()) {
+        RegGroup group = workReg->group();
+        uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+        if (assignedId != RAAssignment::kPhysNone) {
+          _cc->_setCursor(node);
+          ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
+        }
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* cont) noexcept {
+  // TODO: This should be used to make the branch allocation better.
+  DebugUtils::unused(cont);
+
+  // The cursor must point to the previous instruction for a possible instruction insertion.
+  _cc->_setCursor(node->prev());
+
+  // Use TryMode of `switchToAssignment()` if possible.
+  if (target->hasEntryAssignment()) {
+    ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), true));
+  }
+
+  ASMJIT_PROPAGATE(allocInst(node));
+  ASMJIT_PROPAGATE(spillRegsBeforeEntry(target));
+
+  if (target->hasEntryAssignment()) {
+    BaseNode* injectionPoint = _pass->extraBlock()->prev();
+    BaseNode* prevCursor = _cc->setCursor(injectionPoint);
+
+    _tmpAssignment.copyFrom(_curAssignment);
+    ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), false));
+
+    BaseNode* curCursor = _cc->cursor();
+    if (curCursor != injectionPoint) {
+      // Additional instructions emitted to switch from the current state to the `target` state. This means
+      // that we have to move these instructions into an independent code block and patch the jump location.
+      Operand& targetOp = node->op(node->opCount() - 1);
+      if (ASMJIT_UNLIKELY(!targetOp.isLabel()))
+        return DebugUtils::errored(kErrorInvalidState);
+
+      Label trampoline = _cc->newLabel();
+      Label savedTarget = targetOp.as<Label>();
+
+      // Patch `target` to point to the `trampoline` we just created.
+      targetOp = trampoline;
+
+      // Clear a possible SHORT form as we have no clue now if the SHORT form would be encodable after patching
+      // the target to `trampoline` (X86 specific).
+      node->clearOptions(InstOptions::kShortForm);
+
+      // Finalize the switch assignment sequence.
+      ASMJIT_PROPAGATE(_pass->emitJump(savedTarget));
+      _cc->_setCursor(injectionPoint);
+      _cc->bind(trampoline);
+    }
+
+    _cc->_setCursor(prevCursor);
+    _curAssignment.swap(_tmpAssignment);
+  }
+  else {
+    ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(target, block(), _curAssignment));
+  }
+
+  return kErrorOk;
+}
+
+Error RALocalAllocator::allocJumpTable(InstNode* node, const RABlocks& targets, RABlock* cont) noexcept {
+  // TODO: Do we really need to use `cont`?
+  DebugUtils::unused(cont);
+
+  if (targets.empty())
+    return DebugUtils::errored(kErrorInvalidState);
+
+  // The cursor must point to the previous instruction for a possible instruction insertion.
+  _cc->_setCursor(node->prev());
+
+  // All `targets` should have the same sharedAssignmentId, we just read the first.
+  RABlock* anyTarget = targets[0];
+  if (!anyTarget->hasSharedAssignmentId())
+    return DebugUtils::errored(kErrorInvalidState);
+
+  RASharedAssignment& sharedAssignment = _pass->_sharedAssignments[anyTarget->sharedAssignmentId()];
+
+  ASMJIT_PROPAGATE(allocInst(node));
+
+  if (!sharedAssignment.empty()) {
+    ASMJIT_PROPAGATE(switchToAssignment(
+      sharedAssignment.physToWorkMap(),
+      sharedAssignment.liveIn(),
+      true,  // Read-only.
+      false  // Try-mode.
+    ));
+  }
+
+  ASMJIT_PROPAGATE(spillRegsBeforeEntry(anyTarget));
+
+  if (sharedAssignment.empty()) {
+    ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(anyTarget, block(), _curAssignment));
+  }
+
+  return kErrorOk;
+}
+
+// RALocalAllocator - Decision Making
+// ==================================
+
+uint32_t RALocalAllocator::decideOnAssignment(RegGroup group, uint32_t workId, uint32_t physId, RegMask allocableRegs) const noexcept {
+  ASMJIT_ASSERT(allocableRegs != 0);
+  DebugUtils::unused(group, physId);
+
+  RAWorkReg* workReg = workRegById(workId);
+
+  // Prefer home register id, if possible.
+  if (workReg->hasHomeRegId()) {
+    uint32_t homeId = workReg->homeRegId();
+    if (Support::bitTest(allocableRegs, homeId))
+      return homeId;
+  }
+
+  // Prefer registers used upon block entries.
+  RegMask previouslyAssignedRegs = workReg->allocatedMask();
+  if (allocableRegs & previouslyAssignedRegs)
+    allocableRegs &= previouslyAssignedRegs;
+
+  return Support::ctz(allocableRegs);
+}
+
+uint32_t RALocalAllocator::decideOnReassignment(RegGroup group, uint32_t workId, uint32_t physId, RegMask allocableRegs) const noexcept {
+  ASMJIT_ASSERT(allocableRegs != 0);
+  DebugUtils::unused(group, physId);
+
+  RAWorkReg* workReg = workRegById(workId);
+
+  // Prefer allocating back to HomeId, if possible.
+  if (workReg->hasHomeRegId()) {
+    if (Support::bitTest(allocableRegs, workReg->homeRegId()))
+      return workReg->homeRegId();
+  }
+
+  // TODO: [Register Allocator] This could be improved.
+
+  // Decided to SPILL.
+  return RAAssignment::kPhysNone;
+}
+
+uint32_t RALocalAllocator::decideOnSpillFor(RegGroup group, uint32_t workId, RegMask spillableRegs, uint32_t* spillWorkId) const noexcept {
+  // May be used in the future to decide which register would be best to spill so `workId` can be assigned.
+  DebugUtils::unused(workId);
+  ASMJIT_ASSERT(spillableRegs != 0);
+
+  Support::BitWordIterator<RegMask> it(spillableRegs);
+  uint32_t bestPhysId = it.next();
+  uint32_t bestWorkId = _curAssignment.physToWorkId(group, bestPhysId);
+
+  // Avoid calculating the cost model if there is only one spillable register.
+  if (it.hasNext()) {
+    uint32_t bestCost = calculateSpillCost(group, bestWorkId, bestPhysId);
+    do {
+      uint32_t localPhysId = it.next();
+      uint32_t localWorkId = _curAssignment.physToWorkId(group, localPhysId);
+      uint32_t localCost = calculateSpillCost(group, localWorkId, localPhysId);
+
+      if (localCost < bestCost) {
+        bestCost = localCost;
+        bestPhysId = localPhysId;
+        bestWorkId = localWorkId;
+      }
+    } while (it.hasNext());
+  }
+
+  *spillWorkId = bestWorkId;
+  return bestPhysId;
+}
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/core/ralocal_p.h b/lib/lepton/asmjit/core/ralocal_p.h
new file mode 100644
index 0000000000..b40e867427
--- /dev/null
+++ b/lib/lepton/asmjit/core/ralocal_p.h
@@ -0,0 +1,254 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_RALOCAL_P_H_INCLUDED
+#define ASMJIT_CORE_RALOCAL_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/raassignment_p.h"
+#include "../core/radefs_p.h"
+#include "../core/rapass_p.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_ra
+//! \{
+
+//! Local register allocator.
+class RALocalAllocator {
+public:
+  ASMJIT_NONCOPYABLE(RALocalAllocator)
+
+  typedef RAAssignment::PhysToWorkMap PhysToWorkMap;
+  typedef RAAssignment::WorkToPhysMap WorkToPhysMap;
+
+  //! Link to `BaseRAPass`.
+  BaseRAPass* _pass;
+  //! Link to `BaseCompiler`.
+  BaseCompiler* _cc;
+
+  //! Architecture traits.
+  const ArchTraits* _archTraits;
+  //! Registers available to the allocator.
+  RARegMask _availableRegs;
+  //! Registers clobbered by the allocator.
+  RARegMask _clobberedRegs;
+
+  //! Register assignment (current).
+  RAAssignment _curAssignment;
+  //! Register assignment used temporarily during assignment switches.
+  RAAssignment _tmpAssignment;
+
+  //! Link to the current `RABlock`.
+  RABlock* _block;
+  //! InstNode.
+  InstNode* _node;
+  //! RA instruction.
+  RAInst* _raInst;
+
+  //! Count of all TiedReg's.
+  uint32_t _tiedTotal;
+  //! TiedReg's total counter.
+  RARegCount _tiedCount;
+
+  //! Temporary workToPhysMap that can be used freely by the allocator.
+  WorkToPhysMap* _tmpWorkToPhysMap;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RALocalAllocator(BaseRAPass* pass) noexcept
+    : _pass(pass),
+      _cc(pass->cc()),
+      _archTraits(pass->_archTraits),
+      _availableRegs(pass->_availableRegs),
+      _clobberedRegs(),
+      _curAssignment(),
+      _block(nullptr),
+      _node(nullptr),
+      _raInst(nullptr),
+      _tiedTotal(),
+      _tiedCount() {}
+
+  Error init() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline RAWorkReg* workRegById(uint32_t workId) const noexcept { return _pass->workRegById(workId); }
+  inline PhysToWorkMap* physToWorkMap() const noexcept { return _curAssignment.physToWorkMap(); }
+  inline WorkToPhysMap* workToPhysMap() const noexcept { return _curAssignment.workToPhysMap(); }
+
+  //! Returns the currently processed block.
+  inline RABlock* block() const noexcept { return _block; }
+  //! Sets the currently processed block.
+  inline void setBlock(RABlock* block) noexcept { _block = block; }
+
+  //! Returns the currently processed `InstNode`.
+  inline InstNode* node() const noexcept { return _node; }
+  //! Returns the currently processed `RAInst`.
+  inline RAInst* raInst() const noexcept { return _raInst; }
+
+  //! Returns all tied regs as `RATiedReg` array.
+  inline RATiedReg* tiedRegs() const noexcept { return _raInst->tiedRegs(); }
+  //! Returns tied registers grouped by the given `group`.
+  inline RATiedReg* tiedRegs(RegGroup group) const noexcept { return _raInst->tiedRegs(group); }
+
+  //! Returns count of all TiedRegs used by the instruction.
+  inline uint32_t tiedCount() const noexcept { return _tiedTotal; }
+  //! Returns count of TiedRegs used by the given register `group`.
+  inline uint32_t tiedCount(RegGroup group) const noexcept { return _tiedCount.get(group); }
+
+  inline bool isGroupUsed(RegGroup group) const noexcept { return _tiedCount[group] != 0; }
+
+  //! \}
+
+  //! \name Assignment
+  //! \{
+
+  Error makeInitialAssignment() noexcept;
+
+  Error replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept;
+
+  //! Switch to the given assignment by reassigning all register and emitting code that reassigns them.
+  //! This is always used to switch to a previously stored assignment.
+  //!
+  //! If `tryMode` is true then the final assignment doesn't have to be exactly same as specified by `dstPhysToWorkMap`
+  //! and `dstWorkToPhysMap`. This mode is only used before conditional jumps that already have assignment to generate
+  //! a code sequence that is always executed regardless of the flow.
+  Error switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept;
+
+  inline Error spillRegsBeforeEntry(RABlock* block) noexcept {
+    return spillScratchGpRegsBeforeEntry(block->entryScratchGpRegs());
+  }
+
+  Error spillScratchGpRegsBeforeEntry(uint32_t scratchRegs) noexcept;
+
+  //! \}
+
+  //! \name Allocation
+  //! \{
+
+  Error allocInst(InstNode* node) noexcept;
+  Error spillAfterAllocation(InstNode* node) noexcept;
+
+  Error allocBranch(InstNode* node, RABlock* target, RABlock* cont) noexcept;
+  Error allocJumpTable(InstNode* node, const RABlocks& targets, RABlock* cont) noexcept;
+
+  //! \}
+
+  //! \name Decision Making
+  //! \{
+
+  enum CostModel : uint32_t {
+    kCostOfFrequency = 1048576,
+    kCostOfDirtyFlag = kCostOfFrequency / 4
+  };
+
+  inline uint32_t costByFrequency(float freq) const noexcept {
+    return uint32_t(int32_t(freq * float(kCostOfFrequency)));
+  }
+
+  inline uint32_t calculateSpillCost(RegGroup group, uint32_t workId, uint32_t assignedId) const noexcept {
+    RAWorkReg* workReg = workRegById(workId);
+    uint32_t cost = costByFrequency(workReg->liveStats().freq());
+
+    if (_curAssignment.isPhysDirty(group, assignedId))
+      cost += kCostOfDirtyFlag;
+
+    return cost;
+  }
+
+  //! Decides on register assignment.
+  uint32_t decideOnAssignment(RegGroup group, uint32_t workId, uint32_t assignedId, RegMask allocableRegs) const noexcept;
+
+  //! Decides on whether to MOVE or SPILL the given WorkReg, because it's allocated in a physical register that have
+  //! to be used by another WorkReg.
+  //!
+  //! The function must return either `RAAssignment::kPhysNone`, which means that the WorkReg of `workId` should be
+  //! spilled, or a valid physical register ID, which means that the register should be moved to that physical register
+  //! instead.
+  uint32_t decideOnReassignment(RegGroup group, uint32_t workId, uint32_t assignedId, RegMask allocableRegs) const noexcept;
+
+  //! Decides on best spill given a register mask `spillableRegs`
+  uint32_t decideOnSpillFor(RegGroup group, uint32_t workId, RegMask spillableRegs, uint32_t* spillWorkId) const noexcept;
+
+  //! \}
+
+  //! \name Emit
+  //! \{
+
+  //! Emits a move between a destination and source register, and fixes the
+  //! register assignment.
+  inline Error onMoveReg(RegGroup group, uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
+    if (dstPhysId == srcPhysId) return kErrorOk;
+    _curAssignment.reassign(group, workId, dstPhysId, srcPhysId);
+    return _pass->emitMove(workId, dstPhysId, srcPhysId);
+  }
+
+  //! Emits a swap between two physical registers and fixes their assignment.
+  //!
+  //! \note Target must support this operation otherwise this would ASSERT.
+  inline Error onSwapReg(RegGroup group, uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept {
+    _curAssignment.swap(group, aWorkId, aPhysId, bWorkId, bPhysId);
+    return _pass->emitSwap(aWorkId, aPhysId, bWorkId, bPhysId);
+  }
+
+  //! Emits a load from [VirtReg/WorkReg]'s spill slot to a physical register
+  //! and makes it assigned and clean.
+  inline Error onLoadReg(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    _curAssignment.assign(group, workId, physId, RAAssignment::kClean);
+    return _pass->emitLoad(workId, physId);
+  }
+
+  //! Emits a save a physical register to a [VirtReg/WorkReg]'s spill slot,
+  //! keeps it assigned, and makes it clean.
+  inline Error onSaveReg(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    ASMJIT_ASSERT(_curAssignment.workToPhysId(group, workId) == physId);
+    ASMJIT_ASSERT(_curAssignment.physToWorkId(group, physId) == workId);
+
+    _curAssignment.makeClean(group, workId, physId);
+    return _pass->emitSave(workId, physId);
+  }
+
+  //! Assigns a register, the content of it is undefined at this point.
+  inline Error onAssignReg(RegGroup group, uint32_t workId, uint32_t physId, bool dirty) noexcept {
+    _curAssignment.assign(group, workId, physId, dirty);
+    return kErrorOk;
+  }
+
+  //! Spills a variable/register, saves the content to the memory-home if modified.
+  inline Error onSpillReg(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    if (_curAssignment.isPhysDirty(group, physId))
+      ASMJIT_PROPAGATE(onSaveReg(group, workId, physId));
+    return onKillReg(group, workId, physId);
+  }
+
+  inline Error onDirtyReg(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    _curAssignment.makeDirty(group, workId, physId);
+    return kErrorOk;
+  }
+
+  inline Error onKillReg(RegGroup group, uint32_t workId, uint32_t physId) noexcept {
+    _curAssignment.unassign(group, workId, physId);
+    return kErrorOk;
+  }
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_CORE_RALOCAL_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/rapass.cpp b/lib/lepton/asmjit/core/rapass.cpp
new file mode 100644
index 0000000000..0e87ab6a25
--- /dev/null
+++ b/lib/lepton/asmjit/core/rapass.cpp
@@ -0,0 +1,1969 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/formatter.h"
+#include "../core/ralocal_p.h"
+#include "../core/rapass_p.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../core/zonestack.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// RABlock - Control Flow
+// ======================
+
+Error RABlock::appendSuccessor(RABlock* successor) noexcept {
+  RABlock* predecessor = this;
+
+  if (predecessor->hasSuccessor(successor))
+    return kErrorOk;
+
+  ASMJIT_PROPAGATE(successor->_predecessors.willGrow(allocator()));
+  ASMJIT_PROPAGATE(predecessor->_successors.willGrow(allocator()));
+
+  predecessor->_successors.appendUnsafe(successor);
+  successor->_predecessors.appendUnsafe(predecessor);
+
+  return kErrorOk;
+}
+
+Error RABlock::prependSuccessor(RABlock* successor) noexcept {
+  RABlock* predecessor = this;
+
+  if (predecessor->hasSuccessor(successor))
+    return kErrorOk;
+
+  ASMJIT_PROPAGATE(successor->_predecessors.willGrow(allocator()));
+  ASMJIT_PROPAGATE(predecessor->_successors.willGrow(allocator()));
+
+  predecessor->_successors.prependUnsafe(successor);
+  successor->_predecessors.prependUnsafe(predecessor);
+
+  return kErrorOk;
+}
+
+// BaseRAPass - Construction & Destruction
+// =======================================
+
+BaseRAPass::BaseRAPass() noexcept : FuncPass("BaseRAPass") {}
+BaseRAPass::~BaseRAPass() noexcept {}
+
+// BaseRAPass - RunOnFunction
+// ==========================
+
+static void BaseRAPass_reset(BaseRAPass* self, FuncDetail* funcDetail) noexcept {
+  ZoneAllocator* allocator = self->allocator();
+
+  self->_blocks.reset();
+  self->_exits.reset();
+  self->_pov.reset();
+  self->_workRegs.reset();
+  self->_instructionCount = 0;
+  self->_createdBlockCount = 0;
+
+  self->_sharedAssignments.reset();
+  self->_lastTimestamp = 0;
+
+  self->_archTraits = nullptr;
+  self->_physRegIndex.reset();
+  self->_physRegCount.reset();
+  self->_physRegTotal = 0;
+  self->_scratchRegIndexes.fill(BaseReg::kIdBad);
+
+  self->_availableRegs.reset();
+  self->_availableRegCount.reset();
+  self->_clobberedRegs.reset();
+
+  self->_workRegs.reset();
+  self->_workRegsOfGroup.forEach([](RAWorkRegs& regs) { regs.reset(); });
+  self->_strategy.forEach([](RAStrategy& strategy) { strategy.reset(); });
+  self->_globalLiveSpans.fill(nullptr);
+  self->_globalMaxLiveCount.reset();
+  self->_temporaryMem.reset();
+
+  self->_stackAllocator.reset(allocator);
+  self->_argsAssignment.reset(funcDetail);
+  self->_numStackArgsToStackSlots = 0;
+  self->_maxWorkRegNameSize = 0;
+}
+
+static void BaseRAPass_resetVirtRegData(BaseRAPass* self) noexcept {
+  for (RAWorkReg* wReg : self->_workRegs) {
+    VirtReg* vReg = wReg->virtReg();
+
+    // Update the information regarding the stack of the virtual register.
+    if (wReg->hasStackSlot()) {
+      RAStackSlot* slot = wReg->stackSlot();
+      vReg->assignStackSlot(slot->offset());
+    }
+
+    // Reset work reg association so it cannot be used by accident (RAWorkReg data will be destroyed).
+    vReg->_workReg = nullptr;
+  }
+}
+
+Error BaseRAPass::runOnFunction(Zone* zone, Logger* logger, FuncNode* func) {
+  _allocator.reset(zone);
+
+#ifndef ASMJIT_NO_LOGGING
+  _logger = logger;
+  _formatOptions.reset();
+  _diagnosticOptions = DiagnosticOptions::kNone;
+
+  if (logger) {
+    _formatOptions = logger->options();
+    _diagnosticOptions = _cb->diagnosticOptions();
+  }
+#else
+  DebugUtils::unused(logger);
+#endif
+
+  // Initialize all core structures to use `zone` and `func`.
+  BaseNode* end = func->endNode();
+  _func = func;
+  _stop = end->next();
+  _extraBlock = end;
+
+  BaseRAPass_reset(this, &_func->_funcDetail);
+
+  // Initialize architecture-specific members.
+  onInit();
+
+  // Perform all allocation steps required.
+  Error err = onPerformAllSteps();
+
+  // Must be called regardless of the allocation status.
+  onDone();
+
+  // Reset possible connections introduced by the register allocator.
+  BaseRAPass_resetVirtRegData(this);
+
+  // Reset all core structures and everything that depends on the passed `Zone`.
+  BaseRAPass_reset(this, nullptr);
+  _allocator.reset(nullptr);
+
+#ifndef ASMJIT_NO_LOGGING
+  _logger = nullptr;
+  _formatOptions.reset();
+  _diagnosticOptions = DiagnosticOptions::kNone;
+#endif
+
+  _func = nullptr;
+  _stop = nullptr;
+  _extraBlock = nullptr;
+
+  // Reset `Zone` as nothing should persist between `runOnFunction()` calls.
+  zone->reset();
+
+  // We alter the compiler cursor, because it doesn't make sense to reference it after the compilation - some
+  // nodes may disappear and the old cursor can go out anyway.
+  cc()->_setCursor(cc()->lastNode());
+
+  return err;
+}
+
+Error BaseRAPass::onPerformAllSteps() noexcept {
+  ASMJIT_PROPAGATE(buildCFG());
+  ASMJIT_PROPAGATE(buildCFGViews());
+  ASMJIT_PROPAGATE(removeUnreachableCode());
+
+  ASMJIT_PROPAGATE(buildCFGDominators());
+  ASMJIT_PROPAGATE(buildLiveness());
+  ASMJIT_PROPAGATE(assignArgIndexToWorkRegs());
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate))
+    ASMJIT_PROPAGATE(annotateCode());
+#endif
+
+  ASMJIT_PROPAGATE(runGlobalAllocator());
+  ASMJIT_PROPAGATE(runLocalAllocator());
+
+  ASMJIT_PROPAGATE(updateStackFrame());
+  ASMJIT_PROPAGATE(insertPrologEpilog());
+
+  ASMJIT_PROPAGATE(rewrite());
+
+  return kErrorOk;
+}
+
+// BaseRAPass - CFG - Basic Block Management
+// =========================================
+
+RABlock* BaseRAPass::newBlock(BaseNode* initialNode) noexcept {
+  RABlock* block = zone()->newT<RABlock>(this);
+  if (ASMJIT_UNLIKELY(!block))
+    return nullptr;
+
+  block->setFirst(initialNode);
+  block->setLast(initialNode);
+
+  _createdBlockCount++;
+  return block;
+}
+
+RABlock* BaseRAPass::newBlockOrExistingAt(LabelNode* cbLabel, BaseNode** stoppedAt) noexcept {
+  if (cbLabel->hasPassData())
+    return cbLabel->passData<RABlock>();
+
+  FuncNode* func = this->func();
+  BaseNode* node = cbLabel->prev();
+  RABlock* block = nullptr;
+
+  // Try to find some label, but terminate the loop on any code. We try hard to coalesce code that contains two
+  // consecutive labels or a combination of non-code nodes between 2 or more labels.
+  //
+  // Possible cases that would share the same basic block:
+  //
+  //   1. Two or more consecutive labels:
+  //     Label1:
+  //     Label2:
+  //
+  //   2. Two or more labels separated by non-code nodes:
+  //     Label1:
+  //     ; Some comment...
+  //     .align 16
+  //     Label2:
+  size_t nPendingLabels = 0;
+
+  while (node) {
+    if (node->type() == NodeType::kLabel) {
+      // Function has a different NodeType, just make sure this was not messed up as we must never associate
+      // BasicBlock with a `func` itself.
+      ASMJIT_ASSERT(node != func);
+
+      block = node->passData<RABlock>();
+      if (block) {
+        // Exit node has always a block associated with it. If we went here it means that `cbLabel` passed here
+        // is after the end of the function and cannot be merged with the function exit block.
+        if (node == func->exitNode())
+          block = nullptr;
+        break;
+      }
+
+      nPendingLabels++;
+    }
+    else if (node->type() == NodeType::kAlign) {
+      // Align node is fine.
+    }
+    else {
+      break;
+    }
+
+    node = node->prev();
+  }
+
+  if (stoppedAt)
+    *stoppedAt = node;
+
+  if (!block) {
+    block = newBlock();
+    if (ASMJIT_UNLIKELY(!block))
+      return nullptr;
+  }
+
+  cbLabel->setPassData<RABlock>(block);
+  node = cbLabel;
+
+  while (nPendingLabels) {
+    node = node->prev();
+    for (;;) {
+      if (node->type() == NodeType::kLabel) {
+        node->setPassData<RABlock>(block);
+        nPendingLabels--;
+        break;
+      }
+
+      node = node->prev();
+      ASMJIT_ASSERT(node != nullptr);
+    }
+  }
+
+  if (!block->first()) {
+    block->setFirst(node);
+    block->setLast(cbLabel);
+  }
+
+  return block;
+}
+
+Error BaseRAPass::addBlock(RABlock* block) noexcept {
+  ASMJIT_PROPAGATE(_blocks.willGrow(allocator()));
+
+  block->_blockId = blockCount();
+  _blocks.appendUnsafe(block);
+  return kErrorOk;
+}
+
+// BaseRAPass - CFG - Build
+// ========================
+
+Error BaseRAPass::initSharedAssignments(const ZoneVector<uint32_t>& sharedAssignmentsMap) noexcept {
+  if (sharedAssignmentsMap.empty())
+    return kErrorOk;
+
+  uint32_t count = 0;
+  for (RABlock* block : _blocks) {
+    if (block->hasSharedAssignmentId()) {
+      uint32_t sharedAssignmentId = sharedAssignmentsMap[block->sharedAssignmentId()];
+      block->setSharedAssignmentId(sharedAssignmentId);
+      count = Support::max(count, sharedAssignmentId + 1);
+    }
+  }
+
+  ASMJIT_PROPAGATE(_sharedAssignments.resize(allocator(), count));
+
+  // Aggregate all entry scratch GP regs from blocks of the same assignment to the assignment itself. It will then be
+  // used instead of RABlock's own scratch regs mask, as shared assignments have precedence.
+  for (RABlock* block : _blocks) {
+    if (block->hasJumpTable()) {
+      const RABlocks& successors = block->successors();
+      if (!successors.empty()) {
+        RABlock* firstSuccessor = successors[0];
+        // NOTE: Shared assignments connect all possible successors so we only need the first to propagate exit scratch
+        // GP registers.
+        ASMJIT_ASSERT(firstSuccessor->hasSharedAssignmentId());
+        RASharedAssignment& sa = _sharedAssignments[firstSuccessor->sharedAssignmentId()];
+        sa.addEntryScratchGpRegs(block->exitScratchGpRegs());
+      }
+    }
+    if (block->hasSharedAssignmentId()) {
+      RASharedAssignment& sa = _sharedAssignments[block->sharedAssignmentId()];
+      sa.addEntryScratchGpRegs(block->_entryScratchGpRegs);
+    }
+  }
+
+  return kErrorOk;
+}
+
+// BaseRAPass - CFG - Views Order
+// ==============================
+
+class RABlockVisitItem {
+public:
+  inline RABlockVisitItem(RABlock* block, uint32_t index) noexcept
+    : _block(block),
+      _index(index) {}
+
+  inline RABlockVisitItem(const RABlockVisitItem& other) noexcept
+    : _block(other._block),
+      _index(other._index) {}
+
+  inline RABlockVisitItem& operator=(const RABlockVisitItem& other) noexcept = default;
+
+  inline RABlock* block() const noexcept { return _block; }
+  inline uint32_t index() const noexcept { return _index; }
+
+  RABlock* _block;
+  uint32_t _index;
+};
+
+Error BaseRAPass::buildCFGViews() noexcept {
+#ifndef ASMJIT_NO_LOGGING
+  Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugCFG);
+  ASMJIT_RA_LOG_FORMAT("[BuildCFGViews]\n");
+#endif
+
+  uint32_t count = blockCount();
+  if (ASMJIT_UNLIKELY(!count)) return kErrorOk;
+
+  ASMJIT_PROPAGATE(_pov.reserve(allocator(), count));
+
+  ZoneStack<RABlockVisitItem> stack;
+  ASMJIT_PROPAGATE(stack.init(allocator()));
+
+  ZoneBitVector visited;
+  ASMJIT_PROPAGATE(visited.resize(allocator(), count));
+
+  RABlock* current = _blocks[0];
+  uint32_t i = 0;
+
+  for (;;) {
+    for (;;) {
+      if (i >= current->successors().size())
+        break;
+
+      // Skip if already visited.
+      RABlock* child = current->successors()[i++];
+      if (visited.bitAt(child->blockId()))
+        continue;
+
+      // Mark as visited to prevent visiting the same block multiple times.
+      visited.setBit(child->blockId(), true);
+
+      // Add the current block on the stack, we will get back to it later.
+      ASMJIT_PROPAGATE(stack.append(RABlockVisitItem(current, i)));
+      current = child;
+      i = 0;
+    }
+
+    current->makeReachable();
+    current->_povOrder = _pov.size();
+    _pov.appendUnsafe(current);
+
+    if (stack.empty())
+      break;
+
+    RABlockVisitItem top = stack.pop();
+    current = top.block();
+    i = top.index();
+  }
+
+  ASMJIT_RA_LOG_COMPLEX({
+    StringTmp<1024> sb;
+    for (RABlock* block : blocks()) {
+      sb.clear();
+      if (block->hasSuccessors()) {
+        sb.appendFormat("  #%u -> {", block->blockId());
+        _dumpBlockIds(sb, block->successors());
+        sb.append("}\n");
+      }
+      else {
+        sb.appendFormat("  #%u -> {Exit}\n", block->blockId());
+      }
+      logger->log(sb);
+    }
+  });
+
+  visited.release(allocator());
+  return kErrorOk;
+}
+
+// BaseRAPass - CFG - Dominators
+// =============================
+
+static ASMJIT_FORCE_INLINE RABlock* intersectBlocks(RABlock* b1, RABlock* b2) noexcept {
+  while (b1 != b2) {
+    while (b2->povOrder() > b1->povOrder()) b1 = b1->iDom();
+    while (b1->povOrder() > b2->povOrder()) b2 = b2->iDom();
+  }
+  return b1;
+}
+
+// Based on "A Simple, Fast Dominance Algorithm".
+Error BaseRAPass::buildCFGDominators() noexcept {
+#ifndef ASMJIT_NO_LOGGING
+  Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugCFG);
+  ASMJIT_RA_LOG_FORMAT("[BuildCFGDominators]\n");
+#endif
+
+  if (_blocks.empty())
+    return kErrorOk;
+
+  RABlock* entryBlock = this->entryBlock();
+  entryBlock->setIDom(entryBlock);
+
+  bool changed = true;
+  uint32_t nIters = 0;
+
+  while (changed) {
+    nIters++;
+    changed = false;
+
+    uint32_t i = _pov.size();
+    while (i) {
+      RABlock* block = _pov[--i];
+      if (block == entryBlock)
+        continue;
+
+      RABlock* iDom = nullptr;
+      const RABlocks& preds = block->predecessors();
+
+      uint32_t j = preds.size();
+      while (j) {
+        RABlock* p = preds[--j];
+        if (!p->iDom())
+          continue;
+        iDom = !iDom ? p : intersectBlocks(iDom, p);
+      }
+
+      if (block->iDom() != iDom) {
+        ASMJIT_ASSUME(iDom != nullptr);
+        ASMJIT_RA_LOG_FORMAT("  IDom of #%u -> #%u\n", block->blockId(), iDom->blockId());
+        block->setIDom(iDom);
+        changed = true;
+      }
+    }
+  }
+
+  ASMJIT_RA_LOG_FORMAT("  Done (%u iterations)\n", nIters);
+  return kErrorOk;
+}
+
+bool BaseRAPass::_strictlyDominates(const RABlock* a, const RABlock* b) const noexcept {
+  ASMJIT_ASSERT(a != nullptr); // There must be at least one block if this function is
+  ASMJIT_ASSERT(b != nullptr); // called, as both `a` and `b` must be valid blocks.
+  ASMJIT_ASSERT(a != b);       // Checked by `dominates()` and `strictlyDominates()`.
+
+  // Nothing strictly dominates the entry block.
+  const RABlock* entryBlock = this->entryBlock();
+  if (a == entryBlock)
+    return false;
+
+  const RABlock* iDom = b->iDom();
+  while (iDom != a && iDom != entryBlock)
+    iDom = iDom->iDom();
+
+  return iDom != entryBlock;
+}
+
+const RABlock* BaseRAPass::_nearestCommonDominator(const RABlock* a, const RABlock* b) const noexcept {
+  ASMJIT_ASSERT(a != nullptr); // There must be at least one block if this function is
+  ASMJIT_ASSERT(b != nullptr); // called, as both `a` and `b` must be valid blocks.
+  ASMJIT_ASSERT(a != b);       // Checked by `dominates()` and `properlyDominates()`.
+
+  if (a == b)
+    return a;
+
+  // If `a` strictly dominates `b` then `a` is the nearest common dominator.
+  if (_strictlyDominates(a, b))
+    return a;
+
+  // If `b` strictly dominates `a` then `b` is the nearest common dominator.
+  if (_strictlyDominates(b, a))
+    return b;
+
+  const RABlock* entryBlock = this->entryBlock();
+  uint64_t timestamp = nextTimestamp();
+
+  // Mark all A's dominators.
+  const RABlock* block = a->iDom();
+  while (block != entryBlock) {
+    block->setTimestamp(timestamp);
+    block = block->iDom();
+  }
+
+  // Check all B's dominators against marked dominators of A.
+  block = b->iDom();
+  while (block != entryBlock) {
+    if (block->hasTimestamp(timestamp))
+      return block;
+    block = block->iDom();
+  }
+
+  return entryBlock;
+}
+
+// BaseRAPass - CFG - Utilities
+// ============================
+
+Error BaseRAPass::removeUnreachableCode() noexcept {
+  uint32_t numAllBlocks = blockCount();
+  uint32_t numReachableBlocks = reachableBlockCount();
+
+  // All reachable -> nothing to do.
+  if (numAllBlocks == numReachableBlocks)
+    return kErrorOk;
+
+#ifndef ASMJIT_NO_LOGGING
+  StringTmp<256> sb;
+  Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugUnreachable);
+  ASMJIT_RA_LOG_FORMAT("[RemoveUnreachableCode - detected %u of %u unreachable blocks]\n", numAllBlocks - numReachableBlocks, numAllBlocks);
+#endif
+
+  for (uint32_t i = 0; i < numAllBlocks; i++) {
+    RABlock* block = _blocks[i];
+    if (block->isReachable())
+      continue;
+
+    ASMJIT_RA_LOG_FORMAT("  Removing code from unreachable block {%u}\n", i);
+    BaseNode* first = block->first();
+    BaseNode* last = block->last();
+
+    BaseNode* beforeFirst = first->prev();
+    BaseNode* afterLast = last->next();
+
+    BaseNode* node = first;
+    while (node != afterLast) {
+      BaseNode* next = node->next();
+
+      if (node->isCode() || node->isRemovable()) {
+#ifndef ASMJIT_NO_LOGGING
+        if (logger) {
+          sb.clear();
+          Formatter::formatNode(sb, _formatOptions, cc(), node);
+          logger->logf("    %s\n", sb.data());
+        }
+#endif
+        cc()->removeNode(node);
+      }
+      node = next;
+    }
+
+    if (beforeFirst->next() == afterLast) {
+      block->setFirst(nullptr);
+      block->setLast(nullptr);
+    }
+    else {
+      block->setFirst(beforeFirst->next());
+      block->setLast(afterLast->prev());
+    }
+  }
+
+  return kErrorOk;
+}
+
+BaseNode* BaseRAPass::findSuccessorStartingAt(BaseNode* node) noexcept {
+  while (node && (node->isInformative() || node->hasNoEffect()))
+    node = node->next();
+  return node;
+}
+
+bool BaseRAPass::isNextTo(BaseNode* node, BaseNode* target) noexcept {
+  for (;;) {
+    node = node->next();
+    if (node == target)
+      return true;
+
+    if (!node)
+      return false;
+
+    if (node->isCode() || node->isData())
+      return false;
+  }
+}
+
+// BaseRAPass - Registers - VirtReg / WorkReg Mapping
+// ==================================================
+
+Error BaseRAPass::_asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept {
+  // Checked by `asWorkReg()` - must be true.
+  ASMJIT_ASSERT(vReg->_workReg == nullptr);
+
+  RegGroup group = vReg->group();
+  ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
+
+  RAWorkRegs& wRegs = workRegs();
+  RAWorkRegs& wRegsByGroup = workRegs(group);
+
+  ASMJIT_PROPAGATE(wRegs.willGrow(allocator()));
+  ASMJIT_PROPAGATE(wRegsByGroup.willGrow(allocator()));
+
+  RAWorkReg* wReg = zone()->newT<RAWorkReg>(vReg, wRegs.size());
+  if (ASMJIT_UNLIKELY(!wReg))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  vReg->setWorkReg(wReg);
+  if (!vReg->isStack())
+    wReg->setRegByteMask(Support::lsbMask<uint64_t>(vReg->virtSize()));
+  wRegs.appendUnsafe(wReg);
+  wRegsByGroup.appendUnsafe(wReg);
+
+  // Only used by RA logging.
+  _maxWorkRegNameSize = Support::max(_maxWorkRegNameSize, vReg->nameSize());
+
+  *out = wReg;
+  return kErrorOk;
+}
+
+RAAssignment::WorkToPhysMap* BaseRAPass::newWorkToPhysMap() noexcept {
+  uint32_t count = workRegCount();
+  size_t size = WorkToPhysMap::sizeOf(count);
+
+  // If no registers are used it could be zero, in that case return a dummy
+  // map instead of NULL.
+  if (ASMJIT_UNLIKELY(!size)) {
+    static const RAAssignment::WorkToPhysMap nullMap = {{ 0 }};
+    return const_cast<RAAssignment::WorkToPhysMap*>(&nullMap);
+  }
+
+  WorkToPhysMap* map = zone()->allocT<WorkToPhysMap>(size);
+  if (ASMJIT_UNLIKELY(!map))
+    return nullptr;
+
+  map->reset(count);
+  return map;
+}
+
+RAAssignment::PhysToWorkMap* BaseRAPass::newPhysToWorkMap() noexcept {
+  uint32_t count = physRegTotal();
+  size_t size = PhysToWorkMap::sizeOf(count);
+
+  PhysToWorkMap* map = zone()->allocT<PhysToWorkMap>(size);
+  if (ASMJIT_UNLIKELY(!map))
+    return nullptr;
+
+  map->reset(count);
+  return map;
+}
+
+// BaseRAPass - Registers - Liveness Analysis and Statistics
+// =========================================================
+
+namespace LiveOps {
+  typedef ZoneBitVector::BitWord BitWord;
+
+  struct In {
+    static ASMJIT_FORCE_INLINE BitWord op(BitWord dst, BitWord out, BitWord gen, BitWord kill) noexcept {
+      DebugUtils::unused(dst);
+      return (out | gen) & ~kill;
+    }
+  };
+
+  template<typename Operator>
+  static ASMJIT_FORCE_INLINE bool op(BitWord* dst, const BitWord* a, uint32_t n) noexcept {
+    BitWord changed = 0;
+
+    for (uint32_t i = 0; i < n; i++) {
+      BitWord before = dst[i];
+      BitWord after = Operator::op(before, a[i]);
+
+      dst[i] = after;
+      changed |= (before ^ after);
+    }
+
+    return changed != 0;
+  }
+
+  template<typename Operator>
+  static ASMJIT_FORCE_INLINE bool op(BitWord* dst, const BitWord* a, const BitWord* b, uint32_t n) noexcept {
+    BitWord changed = 0;
+
+    for (uint32_t i = 0; i < n; i++) {
+      BitWord before = dst[i];
+      BitWord after = Operator::op(before, a[i], b[i]);
+
+      dst[i] = after;
+      changed |= (before ^ after);
+    }
+
+    return changed != 0;
+  }
+
+  template<typename Operator>
+  static ASMJIT_FORCE_INLINE bool op(BitWord* dst, const BitWord* a, const BitWord* b, const BitWord* c, uint32_t n) noexcept {
+    BitWord changed = 0;
+
+    for (uint32_t i = 0; i < n; i++) {
+      BitWord before = dst[i];
+      BitWord after = Operator::op(before, a[i], b[i], c[i]);
+
+      dst[i] = after;
+      changed |= (before ^ after);
+    }
+
+    return changed != 0;
+  }
+
+  static ASMJIT_FORCE_INLINE bool recalcInOut(RABlock* block, uint32_t numBitWords, bool initial = false) noexcept {
+    bool changed = initial;
+
+    const RABlocks& successors = block->successors();
+    uint32_t numSuccessors = successors.size();
+
+    // Calculate `OUT` based on `IN` of all successors.
+    for (uint32_t i = 0; i < numSuccessors; i++)
+      changed |= op<Support::Or>(block->liveOut().data(), successors[i]->liveIn().data(), numBitWords);
+
+    // Calculate `IN` based on `OUT`, `GEN`, and `KILL` bits.
+    if (changed)
+      changed = op<In>(block->liveIn().data(), block->liveOut().data(), block->gen().data(), block->kill().data(), numBitWords);
+
+    return changed;
+  }
+}
+
+ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
+#ifndef ASMJIT_NO_LOGGING
+  Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugLiveness);
+  StringTmp<512> sb;
+#endif
+
+  ASMJIT_RA_LOG_FORMAT("[BuildLiveness]\n");
+
+  uint32_t i;
+
+  uint32_t numAllBlocks = blockCount();
+  uint32_t numReachableBlocks = reachableBlockCount();
+
+  uint32_t numVisits = numReachableBlocks;
+  uint32_t numWorkRegs = workRegCount();
+  uint32_t numBitWords = ZoneBitVector::_wordsPerBits(numWorkRegs);
+
+  if (!numWorkRegs) {
+    ASMJIT_RA_LOG_FORMAT("  Done (no virtual registers)\n");
+    return kErrorOk;
+  }
+
+  ZoneVector<uint32_t> nUsesPerWorkReg; // Number of USEs of each RAWorkReg.
+  ZoneVector<uint32_t> nOutsPerWorkReg; // Number of OUTs of each RAWorkReg.
+  ZoneVector<uint32_t> nInstsPerBlock;  // Number of instructions of each RABlock.
+
+  ASMJIT_PROPAGATE(nUsesPerWorkReg.resize(allocator(), numWorkRegs));
+  ASMJIT_PROPAGATE(nOutsPerWorkReg.resize(allocator(), numWorkRegs));
+  ASMJIT_PROPAGATE(nInstsPerBlock.resize(allocator(), numAllBlocks));
+
+  // Calculate GEN/KILL of Each Block
+  // --------------------------------
+
+  for (i = 0; i < numReachableBlocks; i++) {
+    RABlock* block = _pov[i];
+    ASMJIT_PROPAGATE(block->resizeLiveBits(numWorkRegs));
+
+    BaseNode* node = block->last();
+    BaseNode* stop = block->first();
+
+    uint32_t nInsts = 0;
+    for (;;) {
+      if (node->isInst()) {
+        InstNode* inst = node->as<InstNode>();
+        RAInst* raInst = inst->passData<RAInst>();
+        ASMJIT_ASSERT(raInst != nullptr);
+
+        RATiedReg* tiedRegs = raInst->tiedRegs();
+        uint32_t count = raInst->tiedCount();
+
+        for (uint32_t j = 0; j < count; j++) {
+          RATiedReg* tiedReg = &tiedRegs[j];
+          uint32_t workId = tiedReg->workId();
+
+          // Update `nUses` and `nOuts`.
+          nUsesPerWorkReg[workId] += 1u;
+          nOutsPerWorkReg[workId] += uint32_t(tiedReg->isWrite());
+
+          // Mark as:
+          //   KILL - if this VirtReg is killed afterwards.
+          //   LAST - if this VirtReg is last in this basic block.
+          if (block->kill().bitAt(workId))
+            tiedReg->addFlags(RATiedFlags::kKill);
+          else if (!block->gen().bitAt(workId))
+            tiedReg->addFlags(RATiedFlags::kLast);
+
+          if (tiedReg->isWriteOnly()) {
+            // KILL.
+            block->kill().setBit(workId, true);
+          }
+          else {
+            // GEN.
+            block->kill().setBit(workId, false);
+            block->gen().setBit(workId, true);
+          }
+
+          if (tiedReg->isLeadConsecutive()) {
+            RAWorkReg* workReg = workRegById(workId);
+            workReg->markLeadConsecutive();
+          }
+
+          if (tiedReg->hasConsecutiveParent()) {
+            RAWorkReg* consecutiveParentReg = workRegById(tiedReg->consecutiveParent());
+            consecutiveParentReg->addImmediateConsecutive(allocator(), workId);
+          }
+        }
+
+        nInsts++;
+      }
+
+      if (node == stop)
+        break;
+
+      node = node->prev();
+      ASMJIT_ASSERT(node != nullptr);
+    }
+
+    nInstsPerBlock[block->blockId()] = nInsts;
+  }
+
+  // Calculate IN/OUT of Each Block
+  // ------------------------------
+
+  {
+    ZoneStack<RABlock*> workList;
+    ZoneBitVector workBits;
+
+    ASMJIT_PROPAGATE(workList.init(allocator()));
+    ASMJIT_PROPAGATE(workBits.resize(allocator(), blockCount(), true));
+
+    for (i = 0; i < numReachableBlocks; i++) {
+      RABlock* block = _pov[i];
+      LiveOps::recalcInOut(block, numBitWords, true);
+      ASMJIT_PROPAGATE(workList.append(block));
+    }
+
+    while (!workList.empty()) {
+      RABlock* block = workList.popFirst();
+      uint32_t blockId = block->blockId();
+
+      workBits.setBit(blockId, false);
+      if (LiveOps::recalcInOut(block, numBitWords)) {
+        const RABlocks& predecessors = block->predecessors();
+        uint32_t numPredecessors = predecessors.size();
+
+        for (uint32_t j = 0; j < numPredecessors; j++) {
+          RABlock* pred = predecessors[j];
+          if (!workBits.bitAt(pred->blockId())) {
+            workBits.setBit(pred->blockId(), true);
+            ASMJIT_PROPAGATE(workList.append(pred));
+          }
+        }
+      }
+      numVisits++;
+    }
+
+    workList.reset();
+    workBits.release(allocator());
+  }
+
+  ASMJIT_RA_LOG_COMPLEX({
+    logger->logf("  LiveIn/Out Done (%u visits)\n", numVisits);
+    for (i = 0; i < numAllBlocks; i++) {
+      RABlock* block = _blocks[i];
+
+      ASMJIT_PROPAGATE(sb.assignFormat("  {#%u}\n", block->blockId()));
+      ASMJIT_PROPAGATE(_dumpBlockLiveness(sb, block));
+
+      logger->log(sb);
+    }
+  });
+
+  // Reserve the space in each `RAWorkReg` for references
+  // ----------------------------------------------------
+
+  for (i = 0; i < numWorkRegs; i++) {
+    RAWorkReg* workReg = workRegById(i);
+    ASMJIT_PROPAGATE(workReg->_refs.reserve(allocator(), nUsesPerWorkReg[i]));
+    ASMJIT_PROPAGATE(workReg->_writes.reserve(allocator(), nOutsPerWorkReg[i]));
+  }
+
+  // Assign block and instruction positions, build LiveCount and LiveSpans
+  // ---------------------------------------------------------------------
+
+  uint32_t position = 2;
+  for (i = 0; i < numAllBlocks; i++) {
+    RABlock* block = _blocks[i];
+    if (!block->isReachable())
+      continue;
+
+    BaseNode* node = block->first();
+    BaseNode* stop = block->last();
+
+    uint32_t endPosition = position + nInstsPerBlock[i] * 2;
+    block->setFirstPosition(position);
+    block->setEndPosition(endPosition);
+
+    RALiveCount curLiveCount;
+    RALiveCount maxLiveCount;
+
+    // Process LIVE-IN.
+    ZoneBitVector::ForEachBitSet it(block->liveIn());
+    while (it.hasNext()) {
+      RAWorkReg* workReg = _workRegs[uint32_t(it.next())];
+      curLiveCount[workReg->group()]++;
+      ASMJIT_PROPAGATE(workReg->liveSpans().openAt(allocator(), position, endPosition));
+    }
+
+    for (;;) {
+      if (node->isInst()) {
+        InstNode* inst = node->as<InstNode>();
+        RAInst* raInst = inst->passData<RAInst>();
+        ASMJIT_ASSERT(raInst != nullptr);
+
+        RATiedReg* tiedRegs = raInst->tiedRegs();
+        uint32_t count = raInst->tiedCount();
+
+        inst->setPosition(position);
+        raInst->_liveCount = curLiveCount;
+
+        for (uint32_t j = 0; j < count; j++) {
+          RATiedReg* tiedReg = &tiedRegs[j];
+          uint32_t workId = tiedReg->workId();
+
+          // Create refs and writes.
+          RAWorkReg* workReg = workRegById(workId);
+          workReg->_refs.appendUnsafe(node);
+          if (tiedReg->isWrite())
+            workReg->_writes.appendUnsafe(node);
+
+          // We couldn't calculate this in previous steps, but since we know all LIVE-OUT at this point it becomes
+          // trivial. If this is the last instruction that uses this `workReg` and it's not LIVE-OUT then it is
+          // KILLed here.
+          if (tiedReg->isLast() && !block->liveOut().bitAt(workId))
+            tiedReg->addFlags(RATiedFlags::kKill);
+
+          LiveRegSpans& liveSpans = workReg->liveSpans();
+          bool wasOpen;
+          ASMJIT_PROPAGATE(liveSpans.openAt(allocator(), position + !tiedReg->isRead(), endPosition, wasOpen));
+
+          RegGroup group = workReg->group();
+          if (!wasOpen) {
+            curLiveCount[group]++;
+            raInst->_liveCount[group]++;
+          }
+
+          if (tiedReg->isKill()) {
+            liveSpans.closeAt(position + !tiedReg->isRead() + 1);
+            curLiveCount[group]--;
+          }
+
+          // Update `RAWorkReg::useIdMask` and `RAWorkReg::hintRegId`.
+          if (tiedReg->hasUseId()) {
+            uint32_t useId = tiedReg->useId();
+            workReg->addUseIdMask(Support::bitMask(useId));
+            if (!workReg->hasHintRegId() && !Support::bitTest(raInst->_clobberedRegs[group], useId))
+              workReg->setHintRegId(useId);
+          }
+
+          if (tiedReg->useRegMask()) {
+            workReg->restrictPreferredMask(tiedReg->useRegMask());
+            if (workReg->isLeadConsecutive())
+              workReg->restrictConsecutiveMask(tiedReg->useRegMask());
+          }
+
+          if (tiedReg->outRegMask()) {
+            workReg->restrictPreferredMask(tiedReg->outRegMask());
+            if (workReg->isLeadConsecutive())
+              workReg->restrictConsecutiveMask(tiedReg->outRegMask());
+          }
+
+          // Update `RAWorkReg::clobberedSurvivalMask`.
+          if (raInst->_clobberedRegs[group] && !tiedReg->isOutOrKill()) {
+            workReg->addClobberSurvivalMask(raInst->_clobberedRegs[group]);
+          }
+        }
+
+        position += 2;
+        maxLiveCount.op<Support::Max>(raInst->_liveCount);
+      }
+
+      if (node == stop)
+        break;
+
+      node = node->next();
+      ASMJIT_ASSERT(node != nullptr);
+    }
+
+    block->_maxLiveCount = maxLiveCount;
+    _globalMaxLiveCount.op<Support::Max>(maxLiveCount);
+    ASMJIT_ASSERT(position == block->endPosition());
+  }
+
+  // Calculate WorkReg statistics
+  // ----------------------------
+
+  for (i = 0; i < numWorkRegs; i++) {
+    RAWorkReg* workReg = _workRegs[i];
+
+    LiveRegSpans& spans = workReg->liveSpans();
+    uint32_t width = spans.width();
+    float freq = width ? float(double(workReg->_refs.size()) / double(width)) : float(0);
+
+    RALiveStats& stats = workReg->liveStats();
+    stats._width = width;
+    stats._freq = freq;
+    stats._priority = freq + float(int(workReg->virtReg()->weight())) * 0.01f;
+  }
+
+  ASMJIT_RA_LOG_COMPLEX({
+    sb.clear();
+    _dumpLiveSpans(sb);
+    logger->log(sb);
+  });
+
+  nUsesPerWorkReg.release(allocator());
+  nOutsPerWorkReg.release(allocator());
+  nInstsPerBlock.release(allocator());
+
+  return kErrorOk;
+}
+
+Error BaseRAPass::assignArgIndexToWorkRegs() noexcept {
+  ZoneBitVector& liveIn = entryBlock()->liveIn();
+  uint32_t argCount = func()->argCount();
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      // Unassigned argument.
+      const RegOnly& regArg = func()->argPack(argIndex)[valueIndex];
+      if (!regArg.isReg() || !cc()->isVirtIdValid(regArg.id()))
+        continue;
+
+      VirtReg* virtReg = cc()->virtRegById(regArg.id());
+      if (!virtReg)
+        continue;
+
+      // Unreferenced argument.
+      RAWorkReg* workReg = virtReg->workReg();
+      if (!workReg)
+        continue;
+
+      // Overwritten argument.
+      uint32_t workId = workReg->workId();
+      if (!liveIn.bitAt(workId))
+        continue;
+
+      workReg->setArgIndex(argIndex, valueIndex);
+      const FuncValue& arg = func()->detail().arg(argIndex, valueIndex);
+
+      if (arg.isReg() && _archTraits->regTypeToGroup(arg.regType()) == workReg->group()) {
+        workReg->setHintRegId(arg.regId());
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+// BaseRAPass - Allocation - Global
+// ================================
+
+#ifndef ASMJIT_NO_LOGGING
+static void RAPass_dumpSpans(String& sb, uint32_t index, const LiveRegSpans& liveSpans) noexcept {
+  sb.appendFormat("  %02u: ", index);
+
+  for (uint32_t i = 0; i < liveSpans.size(); i++) {
+    const LiveRegSpan& liveSpan = liveSpans[i];
+    if (i) sb.append(", ");
+    sb.appendFormat("[%u:%u@%u]", liveSpan.a, liveSpan.b, liveSpan.id);
+  }
+
+  sb.append('\n');
+}
+#endif
+
+Error BaseRAPass::runGlobalAllocator() noexcept {
+  ASMJIT_PROPAGATE(initGlobalLiveSpans());
+
+  for (RegGroup group : RegGroupVirtValues{}) {
+    ASMJIT_PROPAGATE(binPack(group));
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SPEED Error BaseRAPass::initGlobalLiveSpans() noexcept {
+  for (RegGroup group : RegGroupVirtValues{}) {
+    size_t physCount = _physRegCount[group];
+    LiveRegSpans* liveSpans = nullptr;
+
+    if (physCount) {
+      liveSpans = allocator()->allocT<LiveRegSpans>(physCount * sizeof(LiveRegSpans));
+      if (ASMJIT_UNLIKELY(!liveSpans))
+        return DebugUtils::errored(kErrorOutOfMemory);
+
+      for (size_t physId = 0; physId < physCount; physId++)
+        new(&liveSpans[physId]) LiveRegSpans();
+    }
+
+    _globalLiveSpans[group] = liveSpans;
+  }
+
+  return kErrorOk;
+}
+
+struct RAConsecutiveReg {
+  RAWorkReg* workReg;
+  RAWorkReg* parentReg;
+};
+
+ASMJIT_FAVOR_SPEED Error BaseRAPass::binPack(RegGroup group) noexcept {
+  if (workRegCount(group) == 0)
+    return kErrorOk;
+
+#ifndef ASMJIT_NO_LOGGING
+  Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugAssignment);
+  StringTmp<512> sb;
+
+  ASMJIT_RA_LOG_FORMAT("[BinPack] Available=%u (0x%08X) Count=%u RegGroup=%u\n",
+    Support::popcnt(_availableRegs[group]),
+    _availableRegs[group],
+    workRegCount(group),
+    uint32_t(group));
+#endif
+
+  uint32_t i;
+  uint32_t physCount = _physRegCount[group];
+
+  RAWorkRegs workRegs;
+  ZoneVector<RAConsecutiveReg> consecutiveRegs;
+  LiveRegSpans tmpSpans;
+
+  ASMJIT_PROPAGATE(workRegs.concat(allocator(), this->workRegs(group)));
+  workRegs.sort([](const RAWorkReg* a, const RAWorkReg* b) noexcept {
+    return b->liveStats().priority() - a->liveStats().priority();
+  });
+
+  uint32_t numWorkRegs = workRegs.size();
+  RegMask availableRegs = _availableRegs[group];
+
+  // First try to pack everything that provides register-id hint as these are most likely function arguments and fixed
+  // (precolored) virtual registers.
+  if (!workRegs.empty()) {
+    uint32_t dstIndex = 0;
+
+    for (i = 0; i < numWorkRegs; i++) {
+      RAWorkReg* workReg = workRegs[i];
+
+      if (workReg->isLeadConsecutive()) {
+        ASMJIT_PROPAGATE(consecutiveRegs.append(allocator(), RAConsecutiveReg{workReg, nullptr}));
+        workReg->markProcessedConsecutive();
+      }
+
+      if (workReg->hasHintRegId()) {
+        uint32_t physId = workReg->hintRegId();
+        if (Support::bitTest(availableRegs, physId)) {
+          LiveRegSpans& live = _globalLiveSpans[group][physId];
+          Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
+
+          if (err == kErrorOk) {
+            live.swap(tmpSpans);
+            workReg->setHomeRegId(physId);
+            workReg->markAllocated();
+            continue;
+          }
+
+          if (err != 0xFFFFFFFFu)
+            return err;
+        }
+      }
+
+      workRegs[dstIndex++] = workReg;
+    }
+
+    workRegs._setSize(dstIndex);
+    numWorkRegs = dstIndex;
+  }
+
+  // Allocate consecutive registers - both leads and all consecutives. This is important and prioritized over the rest,
+  // because once a lead is allocated we really need to allocate its consecutives, otherwise we may bin pack other
+  // registers into their places, which would result in wrong hints to the local allocator, and then into many moves
+  // or spills.
+  if (!consecutiveRegs.empty()) {
+    // This loop appends all other consecutive registers into `consecutiveRegs` array. Leads are at the beginning,
+    // non-leads follow.
+    i = 0;
+    for (;;) {
+      uint32_t stop = consecutiveRegs.size();
+      if (i == stop)
+        break;
+
+      while (i < stop) {
+        RAWorkReg* workReg = consecutiveRegs[i].workReg;
+        if (workReg->hasImmediateConsecutives()) {
+          ZoneBitVector::ForEachBitSet it(workReg->immediateConsecutives());
+          while (it.hasNext()) {
+            uint32_t consecutiveWorkId = uint32_t(it.next());
+            RAWorkReg* consecutiveReg = workRegById(consecutiveWorkId);
+            if (!consecutiveReg->isProcessedConsecutive()) {
+              ASMJIT_PROPAGATE(consecutiveRegs.append(allocator(), RAConsecutiveReg{consecutiveReg, workReg}));
+              consecutiveReg->markProcessedConsecutive();
+            }
+          }
+        }
+        i++;
+      }
+    }
+
+    uint32_t numConsecutiveRegs = consecutiveRegs.size();
+    for (i = 0; i < numConsecutiveRegs; i++) {
+      RAWorkReg* workReg = consecutiveRegs[i].workReg;
+      if (workReg->isAllocated())
+        continue;
+
+      RAWorkReg* parentReg = consecutiveRegs[i].parentReg;
+      RegMask physRegs = 0;
+
+      if (!parentReg) {
+        physRegs = availableRegs & workReg->preferredMask();
+        if (!physRegs) {
+          physRegs = availableRegs & workReg->consecutiveMask();
+
+          // NOTE: This should never be true as it would mean we would never allocate this virtual register
+          // (not here, and not later when local register allocator processes RATiedReg sets).
+          if (ASMJIT_UNLIKELY(!physRegs))
+            return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
+        }
+      }
+      else if (parentReg->hasHomeRegId()) {
+        uint32_t consecutiveId = parentReg->homeRegId() + 1;
+
+        // NOTE: We don't support wrapping. If this goes beyond all allocable registers there is something wrong.
+        if (consecutiveId > 31 || !Support::bitTest(availableRegs, consecutiveId))
+          return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
+
+        workReg->setHintRegId(consecutiveId);
+        physRegs = Support::bitMask(consecutiveId);
+      }
+
+      while (physRegs) {
+        uint32_t physId = Support::bitSizeOf<RegMask>() - 1 - Support::clz(physRegs);
+
+        LiveRegSpans& live = _globalLiveSpans[group][physId];
+        Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
+
+        if (err == kErrorOk) {
+          workReg->setHomeRegId(physId);
+          workReg->markAllocated();
+          live.swap(tmpSpans);
+          break;
+        }
+
+        if (ASMJIT_UNLIKELY(err != 0xFFFFFFFFu))
+          return err;
+
+        physRegs ^= Support::bitMask(physId);
+      }
+    }
+  }
+
+  // Try to pack the rest.
+  if (!workRegs.empty()) {
+    uint32_t dstIndex = 0;
+
+    for (i = 0; i < numWorkRegs; i++) {
+      RAWorkReg* workReg = workRegs[i];
+
+      if (workReg->isAllocated())
+        continue;
+
+      RegMask physRegs = availableRegs;
+      if (physRegs & workReg->preferredMask())
+        physRegs &= workReg->preferredMask();
+
+      while (physRegs) {
+        RegMask preferredMask = physRegs;
+        uint32_t physId = Support::ctz(preferredMask);
+
+        if (workReg->clobberSurvivalMask()) {
+          preferredMask &= workReg->clobberSurvivalMask();
+          if (preferredMask)
+            physId = Support::ctz(preferredMask);
+        }
+
+        LiveRegSpans& live = _globalLiveSpans[group][physId];
+        Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
+
+        if (err == kErrorOk) {
+          workReg->setHomeRegId(physId);
+          workReg->markAllocated();
+          live.swap(tmpSpans);
+          break;
+        }
+
+        if (ASMJIT_UNLIKELY(err != 0xFFFFFFFFu))
+          return err;
+
+        physRegs ^= Support::bitMask(physId);
+      }
+
+      // Keep it in `workRegs` if it was not allocated.
+      if (!physRegs)
+        workRegs[dstIndex++] = workReg;
+    }
+
+    workRegs._setSize(dstIndex);
+    numWorkRegs = dstIndex;
+  }
+
+  ASMJIT_RA_LOG_COMPLEX({
+    for (uint32_t physId = 0; physId < physCount; physId++) {
+      LiveRegSpans& live = _globalLiveSpans[group][physId];
+      if (live.empty())
+        continue;
+
+      sb.clear();
+      RAPass_dumpSpans(sb, physId, live);
+      logger->log(sb);
+    }
+  });
+
+  // Maybe unused if logging is disabled.
+  DebugUtils::unused(physCount);
+
+  if (workRegs.empty()) {
+    ASMJIT_RA_LOG_FORMAT("  Completed.\n");
+  }
+  else {
+    _strategy[group].setType(RAStrategyType::kComplex);
+    for (RAWorkReg* workReg : workRegs)
+      workReg->markStackPreferred();
+
+    ASMJIT_RA_LOG_COMPLEX({
+      uint32_t count = workRegs.size();
+      sb.clear();
+      sb.appendFormat("  Unassigned (%u): ", count);
+      for (i = 0; i < numWorkRegs; i++) {
+        RAWorkReg* workReg = workRegs[i];
+        if (i) sb.append(", ");
+        sb.append(workReg->name());
+      }
+      sb.append('\n');
+      logger->log(sb);
+    });
+  }
+
+  return kErrorOk;
+}
+
+// BaseRAPass - Allocation - Local
+// ===============================
+
+Error BaseRAPass::runLocalAllocator() noexcept {
+  RALocalAllocator lra(this);
+  ASMJIT_PROPAGATE(lra.init());
+
+  if (!blockCount())
+    return kErrorOk;
+
+  // The allocation is done when this reaches zero.
+  uint32_t blocksRemaining = reachableBlockCount();
+
+  // Current block.
+  uint32_t blockId = 0;
+  RABlock* block = _blocks[blockId];
+
+  // The first block (entry) must always be reachable.
+  ASMJIT_ASSERT(block->isReachable());
+
+  // Assign function arguments for the initial block. The `lra` is valid now.
+  lra.makeInitialAssignment();
+  ASMJIT_PROPAGATE(setBlockEntryAssignment(block, block, lra._curAssignment));
+
+  // The loop starts from the first block and iterates blocks in order, however, the algorithm also allows to jump to
+  // any other block when finished if it's a jump target. In-order iteration just makes sure that all blocks are visited.
+  for (;;) {
+    BaseNode* first = block->first();
+    BaseNode* last = block->last();
+    BaseNode* terminator = block->hasTerminator() ? last : nullptr;
+
+    BaseNode* beforeFirst = first->prev();
+    BaseNode* afterLast = last->next();
+
+    bool unconditionalJump = false;
+    RABlock* consecutive = nullptr;
+
+    if (block->hasSuccessors())
+      consecutive = block->successors()[0];
+
+    lra.setBlock(block);
+    block->makeAllocated();
+
+    BaseNode* node = first;
+    while (node != afterLast) {
+      BaseNode* next = node->next();
+      if (node->isInst()) {
+        InstNode* inst = node->as<InstNode>();
+
+        if (ASMJIT_UNLIKELY(inst == terminator)) {
+          const RABlocks& successors = block->successors();
+          if (block->hasConsecutive()) {
+            ASMJIT_PROPAGATE(lra.allocBranch(inst, successors.last(), successors.first()));
+
+            node = next;
+            continue;
+          }
+          else if (successors.size() > 1) {
+            RABlock* cont = block->hasConsecutive() ? successors.first() : nullptr;
+            ASMJIT_PROPAGATE(lra.allocJumpTable(inst, successors, cont));
+
+            node = next;
+            continue;
+          }
+          else {
+            // Otherwise this is an unconditional jump, special handling isn't required.
+            unconditionalJump = true;
+          }
+        }
+
+        ASMJIT_PROPAGATE(lra.allocInst(inst));
+        if (inst->type() == NodeType::kInvoke)
+          ASMJIT_PROPAGATE(emitPreCall(inst->as<InvokeNode>()));
+        else
+          ASMJIT_PROPAGATE(lra.spillAfterAllocation(inst));
+      }
+      node = next;
+    }
+
+    if (consecutive) {
+      BaseNode* prev = afterLast ? afterLast->prev() : cc()->lastNode();
+      cc()->_setCursor(unconditionalJump ? prev->prev() : prev);
+
+      if (consecutive->hasEntryAssignment()) {
+        ASMJIT_PROPAGATE(lra.switchToAssignment(consecutive->entryPhysToWorkMap(), consecutive->liveIn(), consecutive->isAllocated(), false));
+      }
+      else {
+        ASMJIT_PROPAGATE(lra.spillRegsBeforeEntry(consecutive));
+        ASMJIT_PROPAGATE(setBlockEntryAssignment(consecutive, block, lra._curAssignment));
+        lra._curAssignment.copyFrom(consecutive->entryPhysToWorkMap());
+      }
+    }
+
+    // Important as the local allocator can insert instructions before
+    // and after any instruction within the basic block.
+    block->setFirst(beforeFirst->next());
+    block->setLast(afterLast ? afterLast->prev() : cc()->lastNode());
+
+    if (--blocksRemaining == 0)
+      break;
+
+    // Switch to the next consecutive block, if any.
+    if (consecutive) {
+      block = consecutive;
+      if (!block->isAllocated())
+        continue;
+    }
+
+    // Get the next block.
+    for (;;) {
+      if (++blockId >= blockCount())
+        blockId = 0;
+
+      block = _blocks[blockId];
+      if (!block->isReachable() || block->isAllocated() || !block->hasEntryAssignment())
+        continue;
+
+      break;
+    }
+
+    // If we switched to some block we have to update the local allocator.
+    lra.replaceAssignment(block->entryPhysToWorkMap());
+  }
+
+  _clobberedRegs.op<Support::Or>(lra._clobberedRegs);
+  return kErrorOk;
+}
+
+Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlock, const RAAssignment& fromAssignment) noexcept {
+  if (block->hasSharedAssignmentId()) {
+    uint32_t sharedAssignmentId = block->sharedAssignmentId();
+
+    // Shouldn't happen. Entry assignment of a block that has a shared-state will assign to all blocks
+    // with the same sharedAssignmentId. It's a bug if the shared state has been already assigned.
+    if (!_sharedAssignments[sharedAssignmentId].empty())
+      return DebugUtils::errored(kErrorInvalidState);
+
+    return setSharedAssignment(sharedAssignmentId, fromAssignment);
+  }
+
+  PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
+  if (ASMJIT_UNLIKELY(!physToWorkMap))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  block->setEntryAssignment(physToWorkMap);
+
+  // True if this is the first (entry) block, nothing to do in this case.
+  if (block == fromBlock) {
+    // Entry block should never have a shared state.
+    if (block->hasSharedAssignmentId())
+      return DebugUtils::errored(kErrorInvalidState);
+
+    return kErrorOk;
+  }
+
+  const ZoneBitVector& liveOut = fromBlock->liveOut();
+  const ZoneBitVector& liveIn = block->liveIn();
+
+  // It's possible that `fromBlock` has LIVE-OUT regs that `block` doesn't
+  // have in LIVE-IN, these have to be unassigned.
+  {
+    ZoneBitVector::ForEachBitOp<Support::AndNot> it(liveOut, liveIn);
+    while (it.hasNext()) {
+      uint32_t workId = uint32_t(it.next());
+      RAWorkReg* workReg = workRegById(workId);
+
+      RegGroup group = workReg->group();
+      uint32_t physId = fromAssignment.workToPhysId(group, workId);
+
+      if (physId != RAAssignment::kPhysNone)
+        physToWorkMap->unassign(group, physId, _physRegIndex.get(group) + physId);
+    }
+  }
+
+  return blockEntryAssigned(physToWorkMap);
+}
+
+Error BaseRAPass::setSharedAssignment(uint32_t sharedAssignmentId, const RAAssignment& fromAssignment) noexcept {
+  ASMJIT_ASSERT(_sharedAssignments[sharedAssignmentId].empty());
+
+  PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
+  if (ASMJIT_UNLIKELY(!physToWorkMap))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  _sharedAssignments[sharedAssignmentId].assignPhysToWorkMap(physToWorkMap);
+
+  ZoneBitVector& sharedLiveIn = _sharedAssignments[sharedAssignmentId]._liveIn;
+  ASMJIT_PROPAGATE(sharedLiveIn.resize(allocator(), workRegCount()));
+
+  Support::Array<uint32_t, Globals::kNumVirtGroups> sharedAssigned {};
+  for (RABlock* block : blocks()) {
+    if (block->sharedAssignmentId() == sharedAssignmentId) {
+      ASMJIT_ASSERT(!block->hasEntryAssignment());
+
+      PhysToWorkMap* entryPhysToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
+      if (ASMJIT_UNLIKELY(!entryPhysToWorkMap))
+        return DebugUtils::errored(kErrorOutOfMemory);
+
+      block->setEntryAssignment(entryPhysToWorkMap);
+
+      const ZoneBitVector& liveIn = block->liveIn();
+      sharedLiveIn.or_(liveIn);
+
+      for (RegGroup group : RegGroupVirtValues{}) {
+        sharedAssigned[group] |= entryPhysToWorkMap->assigned[group];
+
+        uint32_t physBaseIndex = _physRegIndex.get(group);
+        Support::BitWordIterator<RegMask> it(entryPhysToWorkMap->assigned[group]);
+
+        while (it.hasNext()) {
+          uint32_t physId = it.next();
+          uint32_t workId = entryPhysToWorkMap->workIds[physBaseIndex + physId];
+
+          if (!liveIn.bitAt(workId))
+            entryPhysToWorkMap->unassign(group, physId, physBaseIndex + physId);
+        }
+      }
+    }
+  }
+
+  for (RegGroup group : RegGroupVirtValues{}) {
+    uint32_t physBaseIndex = _physRegIndex.get(group);
+    Support::BitWordIterator<RegMask> it(_availableRegs[group] & ~sharedAssigned[group]);
+
+    while (it.hasNext()) {
+      uint32_t physId = it.next();
+      if (Support::bitTest(physToWorkMap->assigned[group], physId))
+        physToWorkMap->unassign(group, physId, physBaseIndex + physId);
+    }
+  }
+
+  return blockEntryAssigned(physToWorkMap);
+}
+
+Error BaseRAPass::blockEntryAssigned(const PhysToWorkMap* physToWorkMap) noexcept {
+  // Complex allocation strategy requires to record register assignments upon block entry (or per shared state).
+  for (RegGroup group : RegGroupVirtValues{}) {
+    if (!_strategy[group].isComplex())
+      continue;
+
+    uint32_t physBaseIndex = _physRegIndex[group];
+    Support::BitWordIterator<RegMask> it(physToWorkMap->assigned[group]);
+
+    while (it.hasNext()) {
+      uint32_t physId = it.next();
+      uint32_t workId = physToWorkMap->workIds[physBaseIndex + physId];
+
+      RAWorkReg* workReg = workRegById(workId);
+      workReg->addAllocatedMask(Support::bitMask(physId));
+    }
+  }
+
+  return kErrorOk;
+}
+
+// BaseRAPass - Allocation - Utilities
+// ===================================
+
+Error BaseRAPass::useTemporaryMem(BaseMem& out, uint32_t size, uint32_t alignment) noexcept {
+  ASMJIT_ASSERT(alignment <= 64);
+
+  if (_temporaryMem.isNone()) {
+    ASMJIT_PROPAGATE(cc()->_newStack(&_temporaryMem.as<BaseMem>(), size, alignment));
+  }
+  else {
+    ASMJIT_ASSERT(_temporaryMem.as<BaseMem>().isRegHome());
+
+    uint32_t virtId = _temporaryMem.as<BaseMem>().baseId();
+    VirtReg* virtReg = cc()->virtRegById(virtId);
+
+    cc()->setStackSize(virtId, Support::max(virtReg->virtSize(), size),
+                               Support::max(virtReg->alignment(), alignment));
+  }
+
+  out = _temporaryMem.as<BaseMem>();
+  return kErrorOk;
+}
+
+// BaseRAPass - Allocation - Prolog & Epilog
+// =========================================
+
+Error BaseRAPass::updateStackFrame() noexcept {
+  // Update some StackFrame information that we updated during allocation. The only information we don't have at the
+  // moment is final local stack size, which is calculated last.
+  FuncFrame& frame = func()->frame();
+  for (RegGroup group : RegGroupVirtValues{})
+    frame.addDirtyRegs(group, _clobberedRegs[group]);
+  frame.setLocalStackAlignment(_stackAllocator.alignment());
+
+  // If there are stack arguments that are not assigned to registers upon entry and the function doesn't require
+  // dynamic stack alignment we keep these arguments where they are. This will also mark all stack slots that match
+  // these arguments as allocated.
+  if (_numStackArgsToStackSlots)
+    ASMJIT_PROPAGATE(_markStackArgsToKeep());
+
+  // Calculate offsets of all stack slots and update StackSize to reflect the calculated local stack size.
+  ASMJIT_PROPAGATE(_stackAllocator.calculateStackFrame());
+  frame.setLocalStackSize(_stackAllocator.stackSize());
+
+  // Update the stack frame based on `_argsAssignment` and finalize it. Finalization means to apply final calculation
+  // to the stack layout.
+  ASMJIT_PROPAGATE(_argsAssignment.updateFuncFrame(frame));
+  ASMJIT_PROPAGATE(frame.finalize());
+
+  // StackAllocator allocates all stots starting from [0], adjust them when necessary.
+  if (frame.localStackOffset() != 0)
+    ASMJIT_PROPAGATE(_stackAllocator.adjustSlotOffsets(int32_t(frame.localStackOffset())));
+
+  // Again, if there are stack arguments allocated in function's stack we have to handle them. This handles all cases
+  // (either regular or dynamic stack alignment).
+  if (_numStackArgsToStackSlots)
+    ASMJIT_PROPAGATE(_updateStackArgs());
+
+  return kErrorOk;
+}
+
+Error BaseRAPass::_markStackArgsToKeep() noexcept {
+  FuncFrame& frame = func()->frame();
+  bool hasSAReg = frame.hasPreservedFP() || !frame.hasDynamicAlignment();
+
+  RAWorkRegs& workRegs = _workRegs;
+  uint32_t numWorkRegs = workRegCount();
+
+  for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
+    RAWorkReg* workReg = workRegs[workId];
+    if (workReg->hasFlag(RAWorkRegFlags::kStackArgToStack)) {
+      ASMJIT_ASSERT(workReg->hasArgIndex());
+      const FuncValue& srcArg = _func->detail().arg(workReg->argIndex());
+
+      // If the register doesn't have stack slot then we failed. It doesn't make much sense as it was marked as
+      // `kFlagStackArgToStack`, which requires the WorkReg was live-in upon function entry.
+      RAStackSlot* slot = workReg->stackSlot();
+      if (ASMJIT_UNLIKELY(!slot))
+        return DebugUtils::errored(kErrorInvalidState);
+
+      if (hasSAReg && srcArg.isStack() && !srcArg.isIndirect()) {
+        uint32_t typeSize = TypeUtils::sizeOf(srcArg.typeId());
+        if (typeSize == slot->size()) {
+          slot->addFlags(RAStackSlot::kFlagStackArg);
+          continue;
+        }
+      }
+
+      // NOTE: Update StackOffset here so when `_argsAssignment.updateFuncFrame()` is called it will take into
+      // consideration moving to stack slots. Without this we may miss some scratch registers later.
+      FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex(), workReg->argValueIndex());
+      dstArg.assignStackOffset(0);
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error BaseRAPass::_updateStackArgs() noexcept {
+  FuncFrame& frame = func()->frame();
+  RAWorkRegs& workRegs = _workRegs;
+  uint32_t numWorkRegs = workRegCount();
+
+  for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
+    RAWorkReg* workReg = workRegs[workId];
+    if (workReg->hasFlag(RAWorkRegFlags::kStackArgToStack)) {
+      ASMJIT_ASSERT(workReg->hasArgIndex());
+      RAStackSlot* slot = workReg->stackSlot();
+
+      if (ASMJIT_UNLIKELY(!slot))
+        return DebugUtils::errored(kErrorInvalidState);
+
+      if (slot->isStackArg()) {
+        const FuncValue& srcArg = _func->detail().arg(workReg->argIndex());
+        if (frame.hasPreservedFP()) {
+          slot->setBaseRegId(_fp.id());
+          slot->setOffset(int32_t(frame.saOffsetFromSA()) + srcArg.stackOffset());
+        }
+        else {
+          slot->setOffset(int32_t(frame.saOffsetFromSP()) + srcArg.stackOffset());
+        }
+      }
+      else {
+        FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex(), workReg->argValueIndex());
+        dstArg.setStackOffset(slot->offset());
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error BaseRAPass::insertPrologEpilog() noexcept {
+  FuncFrame& frame = _func->frame();
+
+  cc()->_setCursor(func());
+  ASMJIT_PROPAGATE(cc()->emitProlog(frame));
+  ASMJIT_PROPAGATE(_iEmitHelper->emitArgsAssignment(frame, _argsAssignment));
+
+  cc()->_setCursor(func()->exitNode());
+  ASMJIT_PROPAGATE(cc()->emitEpilog(frame));
+
+  return kErrorOk;
+}
+
+// BaseRAPass - Rewriter
+// =====================
+
+Error BaseRAPass::rewrite() noexcept {
+  return _rewrite(_func, _stop);
+}
+
+// BaseRAPass - Logging
+// ====================
+
+#ifndef ASMJIT_NO_LOGGING
+static void RAPass_formatLiveness(BaseRAPass* pass, String& sb, const RAInst* raInst) noexcept {
+  const RATiedReg* tiedRegs = raInst->tiedRegs();
+  uint32_t tiedCount = raInst->tiedCount();
+
+  for (uint32_t i = 0; i < tiedCount; i++) {
+    const RATiedReg& tiedReg = tiedRegs[i];
+
+    if (i != 0)
+      sb.append(' ');
+
+    sb.appendFormat("%s{", pass->workRegById(tiedReg.workId())->name());
+    sb.append(tiedReg.isReadWrite() ? 'X' :
+              tiedReg.isRead()      ? 'R' :
+              tiedReg.isWrite()     ? 'W' : '?');
+
+    if (tiedReg.isLeadConsecutive())
+      sb.appendFormat("|Lead[%u]", tiedReg.consecutiveData() + 1u);
+
+    if (tiedReg.hasUseId())
+      sb.appendFormat("|Use=%u", tiedReg.useId());
+    else if (tiedReg.isUse())
+      sb.append("|Use");
+
+    if (tiedReg.isUseConsecutive() && !tiedReg.isLeadConsecutive())
+      sb.appendFormat("+%u", tiedReg.consecutiveData());
+
+    if (tiedReg.hasOutId())
+      sb.appendFormat("|Out=%u", tiedReg.outId());
+    else if (tiedReg.isOut())
+      sb.append("|Out");
+
+    if (tiedReg.isOutConsecutive() && !tiedReg.isLeadConsecutive())
+      sb.appendFormat("+%u", tiedReg.consecutiveData());
+
+    if (tiedReg.isLast())
+      sb.append("|Last");
+
+    if (tiedReg.isKill())
+      sb.append("|Kill");
+
+    sb.append("}");
+  }
+}
+
+ASMJIT_FAVOR_SIZE Error BaseRAPass::annotateCode() noexcept {
+  StringTmp<1024> sb;
+
+  for (const RABlock* block : _blocks) {
+    BaseNode* node = block->first();
+    if (!node) continue;
+
+    BaseNode* last = block->last();
+    for (;;) {
+      sb.clear();
+      Formatter::formatNode(sb, _formatOptions, cc(), node);
+
+      if (hasDiagnosticOption(DiagnosticOptions::kRADebugLiveness) && node->isInst() && node->hasPassData()) {
+        const RAInst* raInst = node->passData<RAInst>();
+        if (raInst->tiedCount() > 0) {
+          sb.padEnd(40);
+          sb.append(" | ");
+          RAPass_formatLiveness(this, sb, raInst);
+        }
+      }
+
+      node->setInlineComment(static_cast<char*>(cc()->_dataZone.dup(sb.data(), sb.size(), true)));
+      if (node == last)
+        break;
+      node = node->next();
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpBlockIds(String& sb, const RABlocks& blocks) noexcept {
+  for (uint32_t i = 0, size = blocks.size(); i < size; i++) {
+    const RABlock* block = blocks[i];
+    if (i != 0)
+      ASMJIT_PROPAGATE(sb.appendFormat(", #%u", block->blockId()));
+    else
+      ASMJIT_PROPAGATE(sb.appendFormat("#%u", block->blockId()));
+  }
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpBlockLiveness(String& sb, const RABlock* block) noexcept {
+  for (uint32_t liveType = 0; liveType < RABlock::kLiveCount; liveType++) {
+    const char* bitsName = liveType == RABlock::kLiveIn  ? "IN  " :
+                           liveType == RABlock::kLiveOut ? "OUT " :
+                           liveType == RABlock::kLiveGen ? "GEN " : "KILL";
+
+    const ZoneBitVector& bits = block->_liveBits[liveType];
+    uint32_t size = bits.size();
+    ASMJIT_ASSERT(size <= workRegCount());
+
+    uint32_t n = 0;
+    for (uint32_t workId = 0; workId < size; workId++) {
+      if (bits.bitAt(workId)) {
+        RAWorkReg* wReg = workRegById(workId);
+
+        if (!n)
+          sb.appendFormat("    %s [", bitsName);
+        else
+          sb.append(", ");
+
+        sb.append(wReg->name());
+        n++;
+      }
+    }
+
+    if (n)
+      sb.append("]\n");
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpLiveSpans(String& sb) noexcept {
+  uint32_t numWorkRegs = _workRegs.size();
+  uint32_t maxSize = _maxWorkRegNameSize;
+
+  for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
+    RAWorkReg* workReg = _workRegs[workId];
+
+    sb.append("  ");
+
+    size_t oldSize = sb.size();
+    sb.append(workReg->name());
+    sb.padEnd(oldSize + maxSize);
+
+    RALiveStats& stats = workReg->liveStats();
+    sb.appendFormat(" {id:%04u width: %-4u freq: %0.4f priority=%0.4f}",
+      workReg->virtId(),
+      stats.width(),
+      stats.freq(),
+      stats.priority());
+    sb.append(": ");
+
+    LiveRegSpans& liveSpans = workReg->liveSpans();
+    for (uint32_t x = 0; x < liveSpans.size(); x++) {
+      const LiveRegSpan& liveSpan = liveSpans[x];
+      if (x)
+        sb.append(", ");
+      sb.appendFormat("[%u:%u]", liveSpan.a, liveSpan.b);
+    }
+
+    sb.append('\n');
+  }
+
+  return kErrorOk;
+}
+#endif
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/core/rapass_p.h b/lib/lepton/asmjit/core/rapass_p.h
new file mode 100644
index 0000000000..9473829366
--- /dev/null
+++ b/lib/lepton/asmjit/core/rapass_p.h
@@ -0,0 +1,1183 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_RAPASS_P_H_INCLUDED
+#define ASMJIT_CORE_RAPASS_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/emithelper_p.h"
+#include "../core/raassignment_p.h"
+#include "../core/radefs_p.h"
+#include "../core/rastack_p.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_ra
+//! \{
+
+//! Flags used by \ref RABlock.
+enum class RABlockFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+
+  //! Block has been constructed from nodes.
+  kIsConstructed = 0x00000001u,
+  //! Block is reachable (set by `buildCFGViews()`).
+  kIsReachable = 0x00000002u,
+  //! Block is a target (has an associated label or multiple labels).
+  kIsTargetable = 0x00000004u,
+  //! Block has been allocated.
+  kIsAllocated = 0x00000008u,
+  //! Block is a function-exit.
+  kIsFuncExit = 0x00000010u,
+
+  //! Block has a terminator (jump, conditional jump, ret).
+  kHasTerminator = 0x00000100u,
+  //! Block naturally flows to the next block.
+  kHasConsecutive = 0x00000200u,
+  //! Block has a jump to a jump-table at the end.
+  kHasJumpTable = 0x00000400u,
+  //! Block contains fixed registers (precolored).
+  kHasFixedRegs = 0x00000800u,
+  //! Block contains function calls.
+  kHasFuncCalls = 0x00001000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(RABlockFlags)
+
+//! Basic block used by register allocator pass.
+class RABlock {
+public:
+  ASMJIT_NONCOPYABLE(RABlock)
+
+  typedef RAAssignment::PhysToWorkMap PhysToWorkMap;
+  typedef RAAssignment::WorkToPhysMap WorkToPhysMap;
+
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    //! Unassigned block id.
+    kUnassignedId = 0xFFFFFFFFu
+  };
+
+  enum LiveType : uint32_t {
+    kLiveIn = 0,
+    kLiveOut = 1,
+    kLiveGen = 2,
+    kLiveKill = 3,
+    kLiveCount = 4
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Register allocator pass.
+  BaseRAPass* _ra;
+
+  //! Block id (indexed from zero).
+  uint32_t _blockId = kUnassignedId;
+  //! Block flags, see `Flags`.
+  RABlockFlags _flags = RABlockFlags::kNone;
+
+  //! First `BaseNode` of this block (inclusive).
+  BaseNode* _first = nullptr;
+  //! Last `BaseNode` of this block (inclusive).
+  BaseNode* _last = nullptr;
+
+  //! Initial position of this block (inclusive).
+  uint32_t _firstPosition = 0;
+  //! End position of this block (exclusive).
+  uint32_t _endPosition = 0;
+
+  //! Weight of this block (default 0, each loop adds one).
+  uint32_t _weight = 0;
+  //! Post-order view order, used during POV construction.
+  uint32_t _povOrder = 0;
+
+  //! Basic statistics about registers.
+  RARegsStats _regsStats = RARegsStats();
+  //! Maximum live-count per register group.
+  RALiveCount _maxLiveCount = RALiveCount();
+
+  //! Timestamp (used by block visitors).
+  mutable uint64_t _timestamp = 0;
+  //! Immediate dominator of this block.
+  RABlock* _idom = nullptr;
+
+  //! Block predecessors.
+  RABlocks _predecessors {};
+  //! Block successors.
+  RABlocks _successors {};
+
+  //! Liveness in/out/use/kill.
+  ZoneBitVector _liveBits[kLiveCount] {};
+
+  //! Shared assignment it or `Globals::kInvalidId` if this block doesn't have shared assignment.
+  //! See \ref RASharedAssignment for more details.
+  uint32_t _sharedAssignmentId = Globals::kInvalidId;
+  //! Scratch registers that cannot be allocated upon block entry.
+  RegMask _entryScratchGpRegs = 0;
+  //! Scratch registers used at exit, by a terminator instruction.
+  RegMask _exitScratchGpRegs = 0;
+
+  //! Register assignment on entry.
+  PhysToWorkMap* _entryPhysToWorkMap = nullptr;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RABlock(BaseRAPass* ra) noexcept
+    : _ra(ra) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline BaseRAPass* pass() const noexcept { return _ra; }
+  inline ZoneAllocator* allocator() const noexcept;
+
+  inline uint32_t blockId() const noexcept { return _blockId; }
+  inline RABlockFlags flags() const noexcept { return _flags; }
+
+  inline bool hasFlag(RABlockFlags flag) const noexcept { return Support::test(_flags, flag); }
+  inline void addFlags(RABlockFlags flags) noexcept { _flags |= flags; }
+
+  inline bool isAssigned() const noexcept { return _blockId != kUnassignedId; }
+
+  inline bool isConstructed() const noexcept { return hasFlag(RABlockFlags::kIsConstructed); }
+  inline bool isReachable() const noexcept { return hasFlag(RABlockFlags::kIsReachable); }
+  inline bool isTargetable() const noexcept { return hasFlag(RABlockFlags::kIsTargetable); }
+  inline bool isAllocated() const noexcept { return hasFlag(RABlockFlags::kIsAllocated); }
+  inline bool isFuncExit() const noexcept { return hasFlag(RABlockFlags::kIsFuncExit); }
+  inline bool hasTerminator() const noexcept { return hasFlag(RABlockFlags::kHasTerminator); }
+  inline bool hasConsecutive() const noexcept { return hasFlag(RABlockFlags::kHasConsecutive); }
+  inline bool hasJumpTable() const noexcept { return hasFlag(RABlockFlags::kHasJumpTable); }
+
+  inline void makeConstructed(const RARegsStats& regStats) noexcept {
+    _flags |= RABlockFlags::kIsConstructed;
+    _regsStats.combineWith(regStats);
+  }
+
+  inline void makeReachable() noexcept { _flags |= RABlockFlags::kIsReachable; }
+  inline void makeTargetable() noexcept { _flags |= RABlockFlags::kIsTargetable; }
+  inline void makeAllocated() noexcept { _flags |= RABlockFlags::kIsAllocated; }
+
+  inline const RARegsStats& regsStats() const noexcept { return _regsStats; }
+
+  inline bool hasPredecessors() const noexcept { return !_predecessors.empty(); }
+  inline bool hasSuccessors() const noexcept { return !_successors.empty(); }
+
+  inline bool hasSuccessor(RABlock* block) noexcept {
+    if (block->_predecessors.size() < _successors.size())
+      return block->_predecessors.contains(this);
+    else
+      return _successors.contains(block);
+  }
+
+  inline const RABlocks& predecessors() const noexcept { return _predecessors; }
+  inline const RABlocks& successors() const noexcept { return _successors; }
+
+  inline BaseNode* first() const noexcept { return _first; }
+  inline BaseNode* last() const noexcept { return _last; }
+
+  inline void setFirst(BaseNode* node) noexcept { _first = node; }
+  inline void setLast(BaseNode* node) noexcept { _last = node; }
+
+  inline uint32_t firstPosition() const noexcept { return _firstPosition; }
+  inline void setFirstPosition(uint32_t position) noexcept { _firstPosition = position; }
+
+  inline uint32_t endPosition() const noexcept { return _endPosition; }
+  inline void setEndPosition(uint32_t position) noexcept { _endPosition = position; }
+
+  inline uint32_t povOrder() const noexcept { return _povOrder; }
+
+  inline RegMask entryScratchGpRegs() const noexcept;
+  inline RegMask exitScratchGpRegs() const noexcept { return _exitScratchGpRegs; }
+
+  inline void addEntryScratchGpRegs(RegMask regMask) noexcept { _entryScratchGpRegs |= regMask; }
+  inline void addExitScratchGpRegs(RegMask regMask) noexcept { _exitScratchGpRegs |= regMask; }
+
+  inline bool hasSharedAssignmentId() const noexcept { return _sharedAssignmentId != Globals::kInvalidId; }
+  inline uint32_t sharedAssignmentId() const noexcept { return _sharedAssignmentId; }
+  inline void setSharedAssignmentId(uint32_t id) noexcept { _sharedAssignmentId = id; }
+
+  inline uint64_t timestamp() const noexcept { return _timestamp; }
+  inline bool hasTimestamp(uint64_t ts) const noexcept { return _timestamp == ts; }
+  inline void setTimestamp(uint64_t ts) const noexcept { _timestamp = ts; }
+  inline void resetTimestamp() const noexcept { _timestamp = 0; }
+
+  inline RABlock* consecutive() const noexcept { return hasConsecutive() ? _successors[0] : nullptr; }
+
+  inline RABlock* iDom() noexcept { return _idom; }
+  inline const RABlock* iDom() const noexcept { return _idom; }
+  inline void setIDom(RABlock* block) noexcept { _idom = block; }
+
+  inline ZoneBitVector& liveIn() noexcept { return _liveBits[kLiveIn]; }
+  inline const ZoneBitVector& liveIn() const noexcept { return _liveBits[kLiveIn]; }
+
+  inline ZoneBitVector& liveOut() noexcept { return _liveBits[kLiveOut]; }
+  inline const ZoneBitVector& liveOut() const noexcept { return _liveBits[kLiveOut]; }
+
+  inline ZoneBitVector& gen() noexcept { return _liveBits[kLiveGen]; }
+  inline const ZoneBitVector& gen() const noexcept { return _liveBits[kLiveGen]; }
+
+  inline ZoneBitVector& kill() noexcept { return _liveBits[kLiveKill]; }
+  inline const ZoneBitVector& kill() const noexcept { return _liveBits[kLiveKill]; }
+
+  inline Error resizeLiveBits(uint32_t size) noexcept {
+    ASMJIT_PROPAGATE(_liveBits[kLiveIn  ].resize(allocator(), size));
+    ASMJIT_PROPAGATE(_liveBits[kLiveOut ].resize(allocator(), size));
+    ASMJIT_PROPAGATE(_liveBits[kLiveGen ].resize(allocator(), size));
+    ASMJIT_PROPAGATE(_liveBits[kLiveKill].resize(allocator(), size));
+    return kErrorOk;
+  }
+
+  inline bool hasEntryAssignment() const noexcept { return _entryPhysToWorkMap != nullptr; }
+  inline PhysToWorkMap* entryPhysToWorkMap() const noexcept { return _entryPhysToWorkMap; }
+  inline void setEntryAssignment(PhysToWorkMap* physToWorkMap) noexcept { _entryPhysToWorkMap = physToWorkMap; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Adds a successor to this block, and predecessor to `successor`, making connection on both sides.
+  //!
+  //! This API must be used to manage successors and predecessors, never manage it manually.
+  Error appendSuccessor(RABlock* successor) noexcept;
+
+  //! Similar to `appendSuccessor()`, but does prepend instead append.
+  //!
+  //! This function is used to add a natural flow (always first) to the block.
+  Error prependSuccessor(RABlock* successor) noexcept;
+
+  //! \}
+};
+
+//! Register allocator's data associated with each `InstNode`.
+class RAInst {
+public:
+  ASMJIT_NONCOPYABLE(RAInst)
+
+  //! \name Members
+  //! \{
+
+  //! Parent block.
+  RABlock* _block;
+  //! Instruction RW flags.
+  InstRWFlags _instRWFlags;
+  //! Aggregated RATiedFlags from all operands & instruction specific flags.
+  RATiedFlags _flags;
+  //! Total count of RATiedReg's.
+  uint32_t _tiedTotal;
+  //! Index of RATiedReg's per register group.
+  RARegIndex _tiedIndex;
+  //! Count of RATiedReg's per register group.
+  RARegCount _tiedCount;
+  //! Number of live, and thus interfering VirtReg's at this point.
+  RALiveCount _liveCount;
+  //! Fixed physical registers used.
+  RARegMask _usedRegs;
+  //! Clobbered registers (by a function call).
+  RARegMask _clobberedRegs;
+  //! Tied registers.
+  RATiedReg _tiedRegs[1];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RAInst(RABlock* block, InstRWFlags instRWFlags, RATiedFlags tiedFlags, uint32_t tiedTotal, const RARegMask& clobberedRegs) noexcept {
+    _block = block;
+    _instRWFlags = instRWFlags;
+    _flags = tiedFlags;
+    _tiedTotal = tiedTotal;
+    _tiedIndex.reset();
+    _tiedCount.reset();
+    _liveCount.reset();
+    _usedRegs.reset();
+    _clobberedRegs = clobberedRegs;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns instruction RW flags.
+  inline InstRWFlags instRWFlags() const noexcept { return _instRWFlags; };
+  //! Tests whether the given `flag` is present in instruction RW flags.
+  inline bool hasInstRWFlag(InstRWFlags flag) const noexcept { return Support::test(_instRWFlags, flag); }
+  //! Adds `flags` to instruction RW flags.
+  inline void addInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags |= flags; }
+
+  //! Returns the instruction flags.
+  inline RATiedFlags flags() const noexcept { return _flags; }
+  //! Tests whether the instruction has flag `flag`.
+  inline bool hasFlag(RATiedFlags flag) const noexcept { return Support::test(_flags, flag); }
+  //! Replaces the existing instruction flags with `flags`.
+  inline void setFlags(RATiedFlags flags) noexcept { _flags = flags; }
+  //! Adds instruction `flags` to this RAInst.
+  inline void addFlags(RATiedFlags flags) noexcept { _flags |= flags; }
+  //! Clears instruction `flags` from  this RAInst.
+  inline void clearFlags(RATiedFlags flags) noexcept { _flags &= ~flags; }
+
+  //! Tests whether this instruction can be transformed to another instruction if necessary.
+  inline bool isTransformable() const noexcept { return hasFlag(RATiedFlags::kInst_IsTransformable); }
+
+  //! Returns the associated block with this RAInst.
+  inline RABlock* block() const noexcept { return _block; }
+
+  //! Returns tied registers (all).
+  inline RATiedReg* tiedRegs() const noexcept { return const_cast<RATiedReg*>(_tiedRegs); }
+  //! Returns tied registers for a given `group`.
+  inline RATiedReg* tiedRegs(RegGroup group) const noexcept { return const_cast<RATiedReg*>(_tiedRegs) + _tiedIndex.get(group); }
+
+  //! Returns count of all tied registers.
+  inline uint32_t tiedCount() const noexcept { return _tiedTotal; }
+  //! Returns count of tied registers of a given `group`.
+  inline uint32_t tiedCount(RegGroup group) const noexcept { return _tiedCount[group]; }
+
+  //! Returns `RATiedReg` at the given `index`.
+  inline RATiedReg* tiedAt(uint32_t index) const noexcept {
+    ASMJIT_ASSERT(index < _tiedTotal);
+    return tiedRegs() + index;
+  }
+
+  //! Returns `RATiedReg` at the given `index` of the given register `group`.
+  inline RATiedReg* tiedOf(RegGroup group, uint32_t index) const noexcept {
+    ASMJIT_ASSERT(index < _tiedCount.get(group));
+    return tiedRegs(group) + index;
+  }
+
+  inline void setTiedAt(uint32_t index, RATiedReg& tied) noexcept {
+    ASMJIT_ASSERT(index < _tiedTotal);
+    _tiedRegs[index] = tied;
+  }
+
+  //! \name Static Functions
+  //! \{
+
+  static inline size_t sizeOf(uint32_t tiedRegCount) noexcept {
+    return sizeof(RAInst) - sizeof(RATiedReg) + tiedRegCount * sizeof(RATiedReg);
+  }
+
+  //! \}
+};
+
+//! A helper class that is used to build an array of RATiedReg items that are then copied to `RAInst`.
+class RAInstBuilder {
+public:
+  ASMJIT_NONCOPYABLE(RAInstBuilder)
+
+  //! \name Members
+  //! \{
+
+  //! Instruction RW flags.
+  InstRWFlags _instRWFlags;
+
+  //! Flags combined from all RATiedReg's.
+  RATiedFlags _aggregatedFlags;
+  //! Flags that will be cleared before storing the aggregated flags to `RAInst`.
+  RATiedFlags _forbiddenFlags;
+  RARegCount _count;
+  RARegsStats _stats;
+
+  RARegMask _used;
+  RARegMask _clobbered;
+
+  //! Current tied register in `_tiedRegs`.
+  RATiedReg* _cur;
+  //! Array of temporary tied registers.
+  RATiedReg _tiedRegs[128];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RAInstBuilder() noexcept { reset(); }
+
+  inline void init() noexcept { reset(); }
+  inline void reset() noexcept {
+    _instRWFlags = InstRWFlags::kNone;
+    _aggregatedFlags = RATiedFlags::kNone;
+    _forbiddenFlags = RATiedFlags::kNone;
+    _count.reset();
+    _stats.reset();
+    _used.reset();
+    _clobbered.reset();
+    _cur = _tiedRegs;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline InstRWFlags instRWFlags() const noexcept { return _instRWFlags; }
+  inline bool hasInstRWFlag(InstRWFlags flag) const noexcept { return Support::test(_instRWFlags, flag); }
+  inline void addInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags |= flags; }
+  inline void clearInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags &= ~flags; }
+
+  inline RATiedFlags aggregatedFlags() const noexcept { return _aggregatedFlags; }
+  inline void addAggregatedFlags(RATiedFlags flags) noexcept { _aggregatedFlags |= flags; }
+
+  inline RATiedFlags forbiddenFlags() const noexcept { return _forbiddenFlags; }
+  inline void addForbiddenFlags(RATiedFlags flags) noexcept { _forbiddenFlags |= flags; }
+
+  //! Returns the number of tied registers added to the builder.
+  inline uint32_t tiedRegCount() const noexcept { return uint32_t((size_t)(_cur - _tiedRegs)); }
+
+  inline RATiedReg* begin() noexcept { return _tiedRegs; }
+  inline RATiedReg* end() noexcept { return _cur; }
+
+  inline const RATiedReg* begin() const noexcept { return _tiedRegs; }
+  inline const RATiedReg* end() const noexcept { return _cur; }
+
+  //! Returns `RATiedReg` at the given `index`.
+  inline RATiedReg* operator[](uint32_t index) noexcept {
+    ASMJIT_ASSERT(index < tiedRegCount());
+    return &_tiedRegs[index];
+  }
+
+  //! Returns `RATiedReg` at the given `index`. (const).
+  inline const RATiedReg* operator[](uint32_t index) const noexcept {
+    ASMJIT_ASSERT(index < tiedRegCount());
+    return &_tiedRegs[index];
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  Error add(
+    RAWorkReg* workReg,
+    RATiedFlags flags,
+    RegMask useRegMask, uint32_t useId, uint32_t useRewriteMask,
+    RegMask outRegMask, uint32_t outId, uint32_t outRewriteMask,
+    uint32_t rmSize = 0,
+    uint32_t consecutiveParent = Globals::kInvalidId) noexcept {
+
+    RegGroup group = workReg->group();
+    RATiedReg* tiedReg = workReg->tiedReg();
+
+    if (useId != BaseReg::kIdBad) {
+      _stats.makeFixed(group);
+      _used[group] |= Support::bitMask(useId);
+      flags |= RATiedFlags::kUseFixed;
+    }
+
+    if (outId != BaseReg::kIdBad) {
+      _clobbered[group] |= Support::bitMask(outId);
+      flags |= RATiedFlags::kOutFixed;
+    }
+
+    _aggregatedFlags |= flags;
+    _stats.makeUsed(group);
+
+    if (!tiedReg) {
+      // Could happen when the builder is not reset properly after each instruction.
+      ASMJIT_ASSERT(tiedRegCount() < ASMJIT_ARRAY_SIZE(_tiedRegs));
+
+      tiedReg = _cur++;
+      tiedReg->init(workReg->workId(), flags, useRegMask, useId, useRewriteMask, outRegMask, outId, outRewriteMask, rmSize, consecutiveParent);
+      workReg->setTiedReg(tiedReg);
+
+      _count.add(group);
+      return kErrorOk;
+    }
+    else {
+      if (consecutiveParent != tiedReg->consecutiveParent()) {
+        if (tiedReg->consecutiveParent() != Globals::kInvalidId)
+          return DebugUtils::errored(kErrorInvalidState);
+        tiedReg->_consecutiveParent = consecutiveParent;
+      }
+
+      if (useId != BaseReg::kIdBad) {
+        if (ASMJIT_UNLIKELY(tiedReg->hasUseId()))
+          return DebugUtils::errored(kErrorOverlappedRegs);
+        tiedReg->setUseId(useId);
+      }
+
+      if (outId != BaseReg::kIdBad) {
+        if (ASMJIT_UNLIKELY(tiedReg->hasOutId()))
+          return DebugUtils::errored(kErrorOverlappedRegs);
+        tiedReg->setOutId(outId);
+      }
+
+      tiedReg->addRefCount();
+      tiedReg->addFlags(flags);
+      tiedReg->_useRegMask &= useRegMask;
+      tiedReg->_useRewriteMask |= useRewriteMask;
+      tiedReg->_outRegMask &= outRegMask;
+      tiedReg->_outRewriteMask |= outRewriteMask;
+      tiedReg->_rmSize = uint8_t(Support::max<uint32_t>(tiedReg->rmSize(), rmSize));
+      return kErrorOk;
+    }
+  }
+
+  Error addCallArg(RAWorkReg* workReg, uint32_t useId) noexcept {
+    ASMJIT_ASSERT(useId != BaseReg::kIdBad);
+
+    RATiedFlags flags = RATiedFlags::kUse | RATiedFlags::kRead | RATiedFlags::kUseFixed;
+    RegGroup group = workReg->group();
+    RegMask allocable = Support::bitMask(useId);
+
+    _aggregatedFlags |= flags;
+    _used[group] |= allocable;
+    _stats.makeFixed(group);
+    _stats.makeUsed(group);
+
+    RATiedReg* tiedReg = workReg->tiedReg();
+    if (!tiedReg) {
+      // Could happen when the builder is not reset properly after each instruction.
+      ASMJIT_ASSERT(tiedRegCount() < ASMJIT_ARRAY_SIZE(_tiedRegs));
+
+      tiedReg = _cur++;
+      tiedReg->init(workReg->workId(), flags, allocable, useId, 0, allocable, BaseReg::kIdBad, 0);
+      workReg->setTiedReg(tiedReg);
+
+      _count.add(group);
+      return kErrorOk;
+    }
+    else {
+      if (tiedReg->hasUseId()) {
+        flags |= RATiedFlags::kDuplicate;
+        tiedReg->_useRegMask |= allocable;
+      }
+      else {
+        tiedReg->setUseId(useId);
+        tiedReg->_useRegMask &= allocable;
+      }
+
+      tiedReg->addRefCount();
+      tiedReg->addFlags(flags);
+      return kErrorOk;
+    }
+  }
+
+  Error addCallRet(RAWorkReg* workReg, uint32_t outId) noexcept {
+    ASMJIT_ASSERT(outId != BaseReg::kIdBad);
+
+    RATiedFlags flags = RATiedFlags::kOut | RATiedFlags::kWrite | RATiedFlags::kOutFixed;
+    RegGroup group = workReg->group();
+    RegMask outRegs = Support::bitMask(outId);
+
+    _aggregatedFlags |= flags;
+    _used[group] |= outRegs;
+    _stats.makeFixed(group);
+    _stats.makeUsed(group);
+
+    RATiedReg* tiedReg = workReg->tiedReg();
+    if (!tiedReg) {
+      // Could happen when the builder is not reset properly after each instruction.
+      ASMJIT_ASSERT(tiedRegCount() < ASMJIT_ARRAY_SIZE(_tiedRegs));
+
+      tiedReg = _cur++;
+      tiedReg->init(workReg->workId(), flags, Support::allOnes<RegMask>(), BaseReg::kIdBad, 0, outRegs, outId, 0);
+      workReg->setTiedReg(tiedReg);
+
+      _count.add(group);
+      return kErrorOk;
+    }
+    else {
+      if (tiedReg->hasOutId())
+        return DebugUtils::errored(kErrorOverlappedRegs);
+
+      tiedReg->addRefCount();
+      tiedReg->addFlags(flags);
+      tiedReg->setOutId(outId);
+      return kErrorOk;
+    }
+  }
+
+  //! \}
+};
+
+//! Intersection of multiple register assignments.
+//!
+//! See \ref RAAssignment for more information about register assignments.
+class RASharedAssignment {
+public:
+  typedef RAAssignment::PhysToWorkMap PhysToWorkMap;
+  typedef RAAssignment::WorkToPhysMap WorkToPhysMap;
+
+  //! \name Members
+  //! \{
+
+  //! Bit-mask of registers that cannot be used upon a block entry, for each block that has this shared assignment.
+  //! Scratch registers can come from ISA limits (like jecx/loop instructions on x86) or because the registers are
+  //! used by jump/branch instruction that uses registers to perform an indirect jump.
+  RegMask _entryScratchGpRegs = 0;
+  //! Union of all live-in registers.
+  ZoneBitVector _liveIn {};
+  //! Register assignment (PhysToWork).
+  PhysToWorkMap* _physToWorkMap = nullptr;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool empty() const noexcept { return _physToWorkMap == nullptr; }
+
+  inline RegMask entryScratchGpRegs() const noexcept { return _entryScratchGpRegs; }
+  inline void addEntryScratchGpRegs(RegMask mask) noexcept { _entryScratchGpRegs |= mask; }
+
+  inline const ZoneBitVector& liveIn() const noexcept { return _liveIn; }
+
+  inline PhysToWorkMap* physToWorkMap() const noexcept { return _physToWorkMap; }
+  inline void assignPhysToWorkMap(PhysToWorkMap* physToWorkMap) noexcept { _physToWorkMap = physToWorkMap; }
+
+  //! \}
+};
+
+//! Register allocation pass used by `BaseCompiler`.
+class BaseRAPass : public FuncPass {
+public:
+  ASMJIT_NONCOPYABLE(BaseRAPass)
+  typedef FuncPass Base;
+
+  enum : uint32_t {
+    kCallArgWeight = 80
+  };
+
+  typedef RAAssignment::PhysToWorkMap PhysToWorkMap;
+  typedef RAAssignment::WorkToPhysMap WorkToPhysMap;
+
+  //! \name Members
+  //! \{
+
+  //! Allocator that uses zone passed to `runOnFunction()`.
+  ZoneAllocator _allocator {};
+  //! Emit helper.
+  BaseEmitHelper* _iEmitHelper = nullptr;
+
+  //! Logger, disabled if null.
+  Logger* _logger = nullptr;
+  //! Format options, copied from Logger, or zeroed if there is no logger.
+  FormatOptions _formatOptions {};
+  //! Diagnostic options, copied from Emitter, or zeroed if there is no logger.
+  DiagnosticOptions _diagnosticOptions {};
+
+  //! Function being processed.
+  FuncNode* _func = nullptr;
+  //! Stop node.
+  BaseNode* _stop = nullptr;
+  //! Node that is used to insert extra code after the function body.
+  BaseNode* _extraBlock = nullptr;
+
+  //! Blocks (first block is the entry, always exists).
+  RABlocks _blocks {};
+  //! Function exit blocks (usually one, but can contain more).
+  RABlocks _exits {};
+  //! Post order view (POV).
+  RABlocks _pov {};
+
+  //! Number of instruction nodes.
+  uint32_t _instructionCount = 0;
+  //! Number of created blocks (internal).
+  uint32_t _createdBlockCount = 0;
+
+  //! Shared assignment blocks.
+  ZoneVector<RASharedAssignment> _sharedAssignments {};
+
+  //! Timestamp generator (incremental).
+  mutable uint64_t _lastTimestamp = 0;
+
+  //! Architecture traits.
+  const ArchTraits* _archTraits = nullptr;
+  //! Index to physical registers in `RAAssignment::PhysToWorkMap`.
+  RARegIndex _physRegIndex = RARegIndex();
+  //! Count of physical registers in `RAAssignment::PhysToWorkMap`.
+  RARegCount _physRegCount = RARegCount();
+  //! Total number of physical registers.
+  uint32_t _physRegTotal = 0;
+  //! Indexes of a possible scratch registers that can be selected if necessary.
+  Support::Array<uint8_t, 2> _scratchRegIndexes {};
+
+  //! Registers available for allocation.
+  RARegMask _availableRegs = RARegMask();
+  //! Count of physical registers per group.
+  RARegCount _availableRegCount = RARegCount();
+  //! Registers clobbered by the function.
+  RARegMask _clobberedRegs = RARegMask();
+
+  //! Work registers (registers used by the function).
+  RAWorkRegs _workRegs;
+  //! Work registers per register group.
+  Support::Array<RAWorkRegs, Globals::kNumVirtGroups> _workRegsOfGroup;
+
+  //! Register allocation strategy per register group.
+  Support::Array<RAStrategy, Globals::kNumVirtGroups> _strategy;
+  //! Global max live-count (from all blocks) per register group.
+  RALiveCount _globalMaxLiveCount = RALiveCount();
+  //! Global live spans per register group.
+  Support::Array<LiveRegSpans*, Globals::kNumVirtGroups> _globalLiveSpans {};
+  //! Temporary stack slot.
+  Operand _temporaryMem = Operand();
+
+  //! Stack pointer.
+  BaseReg _sp = BaseReg();
+  //! Frame pointer.
+  BaseReg _fp = BaseReg();
+  //! Stack manager.
+  RAStackAllocator _stackAllocator {};
+  //! Function arguments assignment.
+  FuncArgsAssignment _argsAssignment {};
+  //! Some StackArgs have to be assigned to StackSlots.
+  uint32_t _numStackArgsToStackSlots = 0;
+
+  //! Maximum name-size computed from all WorkRegs.
+  uint32_t _maxWorkRegNameSize = 0;
+  //! Temporary string builder used to format comments.
+  StringTmp<80> _tmpString;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  BaseRAPass() noexcept;
+  virtual ~BaseRAPass() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns \ref Logger passed to \ref runOnFunction().
+  inline Logger* logger() const noexcept { return _logger; }
+
+  //! Returns either a valid logger if the given `option` is set and logging is enabled, or nullptr.
+  inline Logger* getLoggerIf(DiagnosticOptions option) const noexcept { return Support::test(_diagnosticOptions, option) ? _logger : nullptr; }
+
+  //! Returns whether the diagnostic `option` is enabled.
+  //!
+  //! \note Returns false if there is no logger (as diagnostics without logging make no sense).
+  inline bool hasDiagnosticOption(DiagnosticOptions option) const noexcept { return Support::test(_diagnosticOptions, option); }
+
+  //! Returns \ref Zone passed to \ref runOnFunction().
+  inline Zone* zone() const noexcept { return _allocator.zone(); }
+  //! Returns \ref ZoneAllocator used by the register allocator.
+  inline ZoneAllocator* allocator() const noexcept { return const_cast<ZoneAllocator*>(&_allocator); }
+
+  inline const ZoneVector<RASharedAssignment>& sharedAssignments() const { return _sharedAssignments; }
+  inline uint32_t sharedAssignmentCount() const noexcept { return _sharedAssignments.size(); }
+
+  //! Returns the current function node.
+  inline FuncNode* func() const noexcept { return _func; }
+  //! Returns the stop of the current function.
+  inline BaseNode* stop() const noexcept { return _stop; }
+
+  //! Returns an extra block used by the current function being processed.
+  inline BaseNode* extraBlock() const noexcept { return _extraBlock; }
+  //! Sets an extra block, see `extraBlock()`.
+  inline void setExtraBlock(BaseNode* node) noexcept { _extraBlock = node; }
+
+  inline uint32_t endPosition() const noexcept { return _instructionCount * 2; }
+
+  inline const RARegMask& availableRegs() const noexcept { return _availableRegs; }
+  inline const RARegMask& cloberredRegs() const noexcept { return _clobberedRegs; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void makeUnavailable(RegGroup group, uint32_t regId) noexcept {
+    _availableRegs[group] &= ~Support::bitMask(regId);
+    _availableRegCount[group]--;
+  }
+
+  //! Runs the register allocator for the given `func`.
+  Error runOnFunction(Zone* zone, Logger* logger, FuncNode* func) override;
+
+  //! Performs all allocation steps sequentially, called by `runOnFunction()`.
+  Error onPerformAllSteps() noexcept;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  //! Called by \ref runOnFunction() before the register allocation to initialize
+  //! architecture-specific data and constraints.
+  virtual void onInit() noexcept = 0;
+
+  //! Called by \ref runOnFunction(` after register allocation to clean everything
+  //! up. Called even if the register allocation failed.
+  virtual void onDone() noexcept = 0;
+
+  //! \}
+
+  //! \name CFG - Basic-Block Management
+  //! \{
+
+  //! Returns the function's entry block.
+  inline RABlock* entryBlock() noexcept {
+    ASMJIT_ASSERT(!_blocks.empty());
+    return _blocks[0];
+  }
+
+  //! \overload
+  inline const RABlock* entryBlock() const noexcept {
+    ASMJIT_ASSERT(!_blocks.empty());
+    return _blocks[0];
+  }
+
+  //! Returns all basic blocks of this function.
+  inline RABlocks& blocks() noexcept { return _blocks; }
+  //! \overload
+  inline const RABlocks& blocks() const noexcept { return _blocks; }
+
+  //! Returns the count of basic blocks (returns size of `_blocks` array).
+  inline uint32_t blockCount() const noexcept { return _blocks.size(); }
+  //! Returns the count of reachable basic blocks (returns size of `_pov` array).
+  inline uint32_t reachableBlockCount() const noexcept { return _pov.size(); }
+
+  //! Tests whether the CFG has dangling blocks - these were created by `newBlock()`, but not added to CFG through
+  //! `addBlocks()`. If `true` is returned and the  CFG is constructed it means that something is missing and it's
+  //! incomplete.
+  //!
+  //! \note This is only used to check if the number of created blocks matches the number of added blocks.
+  inline bool hasDanglingBlocks() const noexcept { return _createdBlockCount != blockCount(); }
+
+  //! Gest a next timestamp to be used to mark CFG blocks.
+  inline uint64_t nextTimestamp() const noexcept { return ++_lastTimestamp; }
+
+  //! Createss a new `RABlock` instance.
+  //!
+  //! \note New blocks don't have ID assigned until they are added to the block array by calling `addBlock()`.
+  RABlock* newBlock(BaseNode* initialNode = nullptr) noexcept;
+
+  //! Tries to find a neighboring LabelNode (without going through code) that is already connected with `RABlock`.
+  //! If no label is found then a new RABlock is created and assigned to all possible labels in a backward direction.
+  RABlock* newBlockOrExistingAt(LabelNode* cbLabel, BaseNode** stoppedAt = nullptr) noexcept;
+
+  //! Adds the given `block` to the block list and assign it a unique block id.
+  Error addBlock(RABlock* block) noexcept;
+
+  inline Error addExitBlock(RABlock* block) noexcept {
+    block->addFlags(RABlockFlags::kIsFuncExit);
+    return _exits.append(allocator(), block);
+  }
+
+  ASMJIT_FORCE_INLINE RAInst* newRAInst(RABlock* block, InstRWFlags instRWFlags, RATiedFlags flags, uint32_t tiedRegCount, const RARegMask& clobberedRegs) noexcept {
+    void* p = zone()->alloc(RAInst::sizeOf(tiedRegCount));
+    if (ASMJIT_UNLIKELY(!p))
+      return nullptr;
+    return new(p) RAInst(block, instRWFlags, flags, tiedRegCount, clobberedRegs);
+  }
+
+  ASMJIT_FORCE_INLINE Error assignRAInst(BaseNode* node, RABlock* block, RAInstBuilder& ib) noexcept {
+    uint32_t tiedRegCount = ib.tiedRegCount();
+    RAInst* raInst = newRAInst(block, ib.instRWFlags(), ib.aggregatedFlags(), tiedRegCount, ib._clobbered);
+
+    if (ASMJIT_UNLIKELY(!raInst))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    RARegIndex index;
+    RATiedFlags flagsFilter = ~ib.forbiddenFlags();
+
+    index.buildIndexes(ib._count);
+    raInst->_tiedIndex = index;
+    raInst->_tiedCount = ib._count;
+
+    for (uint32_t i = 0; i < tiedRegCount; i++) {
+      RATiedReg* tiedReg = ib[i];
+      RAWorkReg* workReg = workRegById(tiedReg->workId());
+
+      workReg->resetTiedReg();
+      RegGroup group = workReg->group();
+
+      if (tiedReg->hasUseId()) {
+        block->addFlags(RABlockFlags::kHasFixedRegs);
+        raInst->_usedRegs[group] |= Support::bitMask(tiedReg->useId());
+      }
+
+      if (tiedReg->hasOutId()) {
+        block->addFlags(RABlockFlags::kHasFixedRegs);
+      }
+
+      RATiedReg& dst = raInst->_tiedRegs[index[group]++];
+      dst = *tiedReg;
+      dst._flags &= flagsFilter;
+
+      if (!tiedReg->isDuplicate())
+        dst._useRegMask &= ~ib._used[group];
+    }
+
+    node->setPassData<RAInst>(raInst);
+    return kErrorOk;
+  }
+
+  //! \}
+
+  //! \name CFG - Build CFG
+  //! \{
+
+  //! Traverse the whole function and do the following:
+  //!
+  //!   1. Construct CFG (represented by `RABlock`) by populating `_blocks` and `_exits`. Blocks describe the control
+  //!      flow of the function and contain some additional information that is used by the register allocator.
+  //!
+  //!   2. Remove unreachable code immediately. This is not strictly necessary for BaseCompiler itself as the register
+  //!      allocator cannot reach such nodes, but keeping instructions that use virtual registers would fail during
+  //!      instruction encoding phase (Assembler).
+  //!
+  //!   3. `RAInst` is created for each `InstNode` or compatible. It contains information that is essential for further
+  //!      analysis and register allocation.
+  //!
+  //! Use `RACFGBuilderT` template that provides the necessary boilerplate.
+  virtual Error buildCFG() noexcept = 0;
+
+  //! Called after the CFG is built.
+  Error initSharedAssignments(const ZoneVector<uint32_t>& sharedAssignmentsMap) noexcept;
+
+  //! \}
+
+  //! \name CFG - Views Order
+  //! \{
+
+  //! Constructs CFG views (only POV at the moment).
+  Error buildCFGViews() noexcept;
+
+  //! \}
+
+  //! \name CFG - Dominators
+  //! \{
+
+  // Terminology:
+  //   - A node `X` dominates a node `Z` if any path from the entry point to `Z` has to go through `X`.
+  //   - A node `Z` post-dominates a node `X` if any path from `X` to the end of the graph has to go through `Z`.
+
+  //! Constructs a dominator-tree from CFG.
+  Error buildCFGDominators() noexcept;
+
+  bool _strictlyDominates(const RABlock* a, const RABlock* b) const noexcept;
+  const RABlock* _nearestCommonDominator(const RABlock* a, const RABlock* b) const noexcept;
+
+  //! Tests whether the basic block `a` dominates `b` - non-strict, returns true when `a == b`.
+  inline bool dominates(const RABlock* a, const RABlock* b) const noexcept { return a == b ? true : _strictlyDominates(a, b); }
+  //! Tests whether the basic block `a` dominates `b` - strict dominance check, returns false when `a == b`.
+  inline bool strictlyDominates(const RABlock* a, const RABlock* b) const noexcept { return a == b ? false : _strictlyDominates(a, b); }
+
+  //! Returns a nearest common dominator of `a` and `b`.
+  inline RABlock* nearestCommonDominator(RABlock* a, RABlock* b) const noexcept { return const_cast<RABlock*>(_nearestCommonDominator(a, b)); }
+  //! Returns a nearest common dominator of `a` and `b` (const).
+  inline const RABlock* nearestCommonDominator(const RABlock* a, const RABlock* b) const noexcept { return _nearestCommonDominator(a, b); }
+
+  //! \}
+
+  //! \name CFG - Utilities
+  //! \{
+
+  Error removeUnreachableCode() noexcept;
+
+  //! Returns `node` or some node after that is ideal for beginning a new block. This function is mostly used after
+  //! a conditional or unconditional jump to select the successor node. In some cases the next node could be a label,
+  //! which means it could have assigned some block already.
+  BaseNode* findSuccessorStartingAt(BaseNode* node) noexcept;
+
+  //! Returns `true` of the `node` can flow to `target` without reaching code nor data. It's used to eliminate jumps
+  //! to labels that are next right to them.
+  bool isNextTo(BaseNode* node, BaseNode* target) noexcept;
+
+  //! \}
+
+  //! \name Virtual Register Management
+  //! \{
+
+  //! Returns a native size of the general-purpose register of the target architecture.
+  inline uint32_t registerSize() const noexcept { return _sp.size(); }
+  inline uint32_t availableRegCount(RegGroup group) const noexcept { return _availableRegCount[group]; }
+
+  inline RAWorkReg* workRegById(uint32_t workId) const noexcept { return _workRegs[workId]; }
+
+  inline RAWorkRegs& workRegs() noexcept { return _workRegs; }
+  inline RAWorkRegs& workRegs(RegGroup group) noexcept { return _workRegsOfGroup[group]; }
+
+  inline const RAWorkRegs& workRegs() const noexcept { return _workRegs; }
+  inline const RAWorkRegs& workRegs(RegGroup group) const noexcept { return _workRegsOfGroup[group]; }
+
+  inline uint32_t workRegCount() const noexcept { return _workRegs.size(); }
+  inline uint32_t workRegCount(RegGroup group) const noexcept { return _workRegsOfGroup[group].size(); }
+
+  inline void _buildPhysIndex() noexcept {
+    _physRegIndex.buildIndexes(_physRegCount);
+    _physRegTotal = uint32_t(_physRegIndex[RegGroup::kMaxVirt]) +
+                    uint32_t(_physRegCount[RegGroup::kMaxVirt]) ;
+  }
+  inline uint32_t physRegIndex(RegGroup group) const noexcept { return _physRegIndex[group]; }
+  inline uint32_t physRegTotal() const noexcept { return _physRegTotal; }
+
+  Error _asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept;
+
+  //! Creates `RAWorkReg` data for the given `vReg`. The function does nothing
+  //! if `vReg` already contains link to `RAWorkReg`. Called by `constructBlocks()`.
+  inline Error asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept {
+    *out = vReg->workReg();
+    return *out ? kErrorOk : _asWorkReg(vReg, out);
+  }
+
+  ASMJIT_FORCE_INLINE Error virtIndexAsWorkReg(uint32_t vIndex, RAWorkReg** out) noexcept {
+    const ZoneVector<VirtReg*>& virtRegs = cc()->virtRegs();
+    if (ASMJIT_UNLIKELY(vIndex >= virtRegs.size()))
+      return DebugUtils::errored(kErrorInvalidVirtId);
+    return asWorkReg(virtRegs[vIndex], out);
+  }
+
+  inline RAStackSlot* getOrCreateStackSlot(RAWorkReg* workReg) noexcept {
+    RAStackSlot* slot = workReg->stackSlot();
+
+    if (slot)
+      return slot;
+
+    slot = _stackAllocator.newSlot(_sp.id(), workReg->virtReg()->virtSize(), workReg->virtReg()->alignment(), RAStackSlot::kFlagRegHome);
+    workReg->_stackSlot = slot;
+    workReg->markStackUsed();
+    return slot;
+  }
+
+  inline BaseMem workRegAsMem(RAWorkReg* workReg) noexcept {
+    getOrCreateStackSlot(workReg);
+    return BaseMem(OperandSignature::fromOpType(OperandType::kMem) |
+                   OperandSignature::fromMemBaseType(_sp.type()) |
+                   OperandSignature::fromBits(OperandSignature::kMemRegHomeFlag),
+                   workReg->virtId(), 0, 0);
+  }
+
+  WorkToPhysMap* newWorkToPhysMap() noexcept;
+  PhysToWorkMap* newPhysToWorkMap() noexcept;
+
+  inline PhysToWorkMap* clonePhysToWorkMap(const PhysToWorkMap* map) noexcept {
+    size_t size = PhysToWorkMap::sizeOf(_physRegTotal);
+    return static_cast<PhysToWorkMap*>(zone()->dupAligned(map, size, sizeof(uint32_t)));
+  }
+
+  //! \name Liveness Analysis & Statistics
+  //! \{
+
+  //! 1. Calculates GEN/KILL/IN/OUT of each block.
+  //! 2. Calculates live spans and basic statistics of each work register.
+  Error buildLiveness() noexcept;
+
+  //! Assigns argIndex to WorkRegs. Must be called after the liveness analysis
+  //! finishes as it checks whether the argument is live upon entry.
+  Error assignArgIndexToWorkRegs() noexcept;
+
+  //! \}
+
+  //! \name Register Allocation - Global
+  //! \{
+
+  //! Runs a global register allocator.
+  Error runGlobalAllocator() noexcept;
+
+  //! Initializes data structures used for global live spans.
+  Error initGlobalLiveSpans() noexcept;
+
+  Error binPack(RegGroup group) noexcept;
+
+  //! \}
+
+  //! \name Register Allocation - Local
+  //! \{
+
+  //! Runs a local register allocator.
+  Error runLocalAllocator() noexcept;
+  Error setBlockEntryAssignment(RABlock* block, const RABlock* fromBlock, const RAAssignment& fromAssignment) noexcept;
+  Error setSharedAssignment(uint32_t sharedAssignmentId, const RAAssignment& fromAssignment) noexcept;
+
+  //! Called after the RA assignment has been assigned to a block.
+  //!
+  //! This cannot change the assignment, but can examine it.
+  Error blockEntryAssigned(const PhysToWorkMap* physToWorkMap) noexcept;
+
+  //! \}
+
+  //! \name Register Allocation Utilities
+  //! \{
+
+  Error useTemporaryMem(BaseMem& out, uint32_t size, uint32_t alignment) noexcept;
+
+  //! \}
+
+  //! \name Function Prolog & Epilog
+  //! \{
+
+  virtual Error updateStackFrame() noexcept;
+  Error _markStackArgsToKeep() noexcept;
+  Error _updateStackArgs() noexcept;
+  Error insertPrologEpilog() noexcept;
+
+  //! \}
+
+  //! \name Instruction Rewriter
+  //! \{
+
+  Error rewrite() noexcept;
+  virtual Error _rewrite(BaseNode* first, BaseNode* stop) noexcept = 0;
+
+  //! \}
+
+#ifndef ASMJIT_NO_LOGGING
+  //! \name Logging
+  //! \{
+
+  Error annotateCode() noexcept;
+
+  Error _dumpBlockIds(String& sb, const RABlocks& blocks) noexcept;
+  Error _dumpBlockLiveness(String& sb, const RABlock* block) noexcept;
+  Error _dumpLiveSpans(String& sb) noexcept;
+
+  //! \}
+#endif
+
+  //! \name Emit
+  //! \{
+
+  virtual Error emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept = 0;
+  virtual Error emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept = 0;
+
+  virtual Error emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept = 0;
+  virtual Error emitSave(uint32_t workId, uint32_t srcPhysId) noexcept = 0;
+
+  virtual Error emitJump(const Label& label) noexcept = 0;
+  virtual Error emitPreCall(InvokeNode* invokeNode) noexcept = 0;
+
+  //! \}
+};
+
+inline ZoneAllocator* RABlock::allocator() const noexcept { return _ra->allocator(); }
+
+inline RegMask RABlock::entryScratchGpRegs() const noexcept {
+  RegMask regs = _entryScratchGpRegs;
+  if (hasSharedAssignmentId())
+    regs = _ra->_sharedAssignments[_sharedAssignmentId].entryScratchGpRegs();
+  return regs;
+}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_CORE_RAPASS_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/rastack.cpp b/lib/lepton/asmjit/core/rastack.cpp
new file mode 100644
index 0000000000..2b7ed592df
--- /dev/null
+++ b/lib/lepton/asmjit/core/rastack.cpp
@@ -0,0 +1,184 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/rastack_p.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// RAStackAllocator - Slots
+// ========================
+
+RAStackSlot* RAStackAllocator::newSlot(uint32_t baseRegId, uint32_t size, uint32_t alignment, uint32_t flags) noexcept {
+  if (ASMJIT_UNLIKELY(_slots.willGrow(allocator(), 1) != kErrorOk))
+    return nullptr;
+
+  RAStackSlot* slot = allocator()->allocT<RAStackSlot>();
+  if (ASMJIT_UNLIKELY(!slot))
+    return nullptr;
+
+  slot->_baseRegId = uint8_t(baseRegId);
+  slot->_alignment = uint8_t(Support::max<uint32_t>(alignment, 1));
+  slot->_flags = uint16_t(flags);
+  slot->_useCount = 0;
+  slot->_size = size;
+
+  slot->_weight = 0;
+  slot->_offset = 0;
+
+  _alignment = Support::max<uint32_t>(_alignment, alignment);
+  _slots.appendUnsafe(slot);
+  return slot;
+}
+
+// RAStackAllocator - Utilities
+// ============================
+
+struct RAStackGap {
+  inline RAStackGap() noexcept
+    : offset(0),
+      size(0) {}
+
+  inline RAStackGap(uint32_t offset, uint32_t size) noexcept
+    : offset(offset),
+      size(size) {}
+
+  inline RAStackGap(const RAStackGap& other) noexcept
+    : offset(other.offset),
+      size(other.size) {}
+
+  uint32_t offset;
+  uint32_t size;
+};
+
+Error RAStackAllocator::calculateStackFrame() noexcept {
+  // Base weight added to all registers regardless of their size and alignment.
+  uint32_t kBaseRegWeight = 16;
+
+  // STEP 1:
+  //
+  // Update usage based on the size of the slot. We boost smaller slots in a way that 32-bit register has higher
+  // priority than a 128-bit register, however, if one 128-bit register is used 4 times more than some other 32-bit
+  // register it will overweight it.
+  for (RAStackSlot* slot : _slots) {
+    uint32_t alignment = slot->alignment();
+    ASMJIT_ASSERT(alignment > 0);
+
+    uint32_t power = Support::min<uint32_t>(Support::ctz(alignment), 6);
+    uint64_t weight;
+
+    if (slot->isRegHome())
+      weight = kBaseRegWeight + (uint64_t(slot->useCount()) * (7 - power));
+    else
+      weight = power;
+
+    // If overflown, which has less chance of winning a lottery, just use max possible weight. In such case it
+    // probably doesn't matter at all.
+    if (weight > 0xFFFFFFFFu)
+      weight = 0xFFFFFFFFu;
+
+    slot->setWeight(uint32_t(weight));
+  }
+
+  // STEP 2:
+  //
+  // Sort stack slots based on their newly calculated weight (in descending order).
+  _slots.sort([](const RAStackSlot* a, const RAStackSlot* b) noexcept {
+    return a->weight() >  b->weight() ? 1 :
+           a->weight() == b->weight() ? 0 : -1;
+  });
+
+  // STEP 3:
+  //
+  // Calculate offset of each slot. We start from the slot that has the highest weight and advance to slots with
+  // lower weight. It could look that offsets start from the first slot in our list and then simply increase, but
+  // it's not always the case as we also try to fill all gaps introduced by the fact that slots are sorted by
+  // weight and not by size & alignment, so when we need to align some slot we distribute the gap caused by the
+  // alignment to `gaps`.
+  uint32_t offset = 0;
+  ZoneVector<RAStackGap> gaps[kSizeCount - 1];
+
+  for (RAStackSlot* slot : _slots) {
+    if (slot->isStackArg())
+      continue;
+
+    uint32_t slotAlignment = slot->alignment();
+    uint32_t alignedOffset = Support::alignUp(offset, slotAlignment);
+
+    // Try to find a slot within gaps first, before advancing the `offset`.
+    bool foundGap = false;
+    uint32_t gapSize = 0;
+    uint32_t gapOffset = 0;
+
+    {
+      uint32_t slotSize = slot->size();
+      if (slotSize < (1u << uint32_t(ASMJIT_ARRAY_SIZE(gaps)))) {
+        // Iterate from the lowest to the highest possible.
+        uint32_t index = Support::ctz(slotSize);
+        do {
+          if (!gaps[index].empty()) {
+            RAStackGap gap = gaps[index].pop();
+
+            ASMJIT_ASSERT(Support::isAligned(gap.offset, slotAlignment));
+            slot->setOffset(int32_t(gap.offset));
+
+            gapSize = gap.size - slotSize;
+            gapOffset = gap.offset - slotSize;
+
+            foundGap = true;
+            break;
+          }
+        } while (++index < uint32_t(ASMJIT_ARRAY_SIZE(gaps)));
+      }
+    }
+
+    // No gap found, we may create a new one(s) if the current offset is not aligned.
+    if (!foundGap && offset != alignedOffset) {
+      gapSize = alignedOffset - offset;
+      gapOffset = alignedOffset;
+
+      offset = alignedOffset;
+    }
+
+    // True if we have found a gap and not filled all of it or we aligned the current offset.
+    if (gapSize) {
+      uint32_t gapEnd = gapSize + gapOffset;
+      while (gapOffset < gapEnd) {
+        uint32_t index = Support::ctz(gapOffset);
+        uint32_t slotSize = 1u << index;
+
+        // Weird case, better to bail...
+        if (gapEnd - gapOffset < slotSize)
+          break;
+
+        ASMJIT_PROPAGATE(gaps[index].append(allocator(), RAStackGap(gapOffset, slotSize)));
+        gapOffset += slotSize;
+      }
+    }
+
+    if (!foundGap) {
+      ASMJIT_ASSERT(Support::isAligned(offset, slotAlignment));
+      slot->setOffset(int32_t(offset));
+      offset += slot->size();
+    }
+  }
+
+  _stackSize = Support::alignUp(offset, _alignment);
+  return kErrorOk;
+}
+
+Error RAStackAllocator::adjustSlotOffsets(int32_t offset) noexcept {
+  for (RAStackSlot* slot : _slots)
+    if (!slot->isStackArg())
+      slot->_offset += offset;
+  return kErrorOk;
+}
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/core/rastack_p.h b/lib/lepton/asmjit/core/rastack_p.h
new file mode 100644
index 0000000000..90640b4dea
--- /dev/null
+++ b/lib/lepton/asmjit/core/rastack_p.h
@@ -0,0 +1,171 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_RASTACK_P_H_INCLUDED
+#define ASMJIT_CORE_RASTACK_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/radefs_p.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_ra
+//! \{
+
+//! Stack slot.
+struct RAStackSlot {
+  //! Stack slot flags.
+  //!
+  //! TODO: kFlagStackArg is not used by the current implementation, do we need to keep it?
+  enum Flags : uint16_t {
+    //! Stack slot is register home slot.
+    kFlagRegHome = 0x0001u,
+    //! Stack slot position matches argument passed via stack.
+    kFlagStackArg = 0x0002u
+  };
+
+  enum ArgIndex : uint32_t {
+    kNoArgIndex = 0xFF
+  };
+
+  //! \name Members
+  //! \{
+
+  //! Base register used to address the stack.
+  uint8_t _baseRegId;
+  //! Minimum alignment required by the slot.
+  uint8_t _alignment;
+  //! Reserved for future use.
+  uint16_t _flags;
+  //! Size of memory required by the slot.
+  uint32_t _size;
+
+  //! Usage counter (one unit equals one memory access).
+  uint32_t _useCount;
+  //! Weight of the slot, calculated by \ref RAStackAllocator::calculateStackFrame().
+  uint32_t _weight;
+  //! Stack offset, calculated by \ref RAStackAllocator::calculateStackFrame().
+  int32_t _offset;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline uint32_t baseRegId() const noexcept { return _baseRegId; }
+  inline void setBaseRegId(uint32_t id) noexcept { _baseRegId = uint8_t(id); }
+
+  inline uint32_t size() const noexcept { return _size; }
+  inline uint32_t alignment() const noexcept { return _alignment; }
+
+  inline uint32_t flags() const noexcept { return _flags; }
+  inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; }
+  inline void addFlags(uint32_t flags) noexcept { _flags = uint16_t(_flags | flags); }
+
+  inline bool isRegHome() const noexcept { return hasFlag(kFlagRegHome); }
+  inline bool isStackArg() const noexcept { return hasFlag(kFlagStackArg); }
+
+  inline uint32_t useCount() const noexcept { return _useCount; }
+  inline void addUseCount(uint32_t n = 1) noexcept { _useCount += n; }
+
+  inline uint32_t weight() const noexcept { return _weight; }
+  inline void setWeight(uint32_t weight) noexcept { _weight = weight; }
+
+  inline int32_t offset() const noexcept { return _offset; }
+  inline void setOffset(int32_t offset) noexcept { _offset = offset; }
+
+  //! \}
+};
+
+typedef ZoneVector<RAStackSlot*> RAStackSlots;
+
+//! Stack allocator.
+class RAStackAllocator {
+public:
+  ASMJIT_NONCOPYABLE(RAStackAllocator)
+
+  enum Size : uint32_t {
+    kSize1     = 0,
+    kSize2     = 1,
+    kSize4     = 2,
+    kSize8     = 3,
+    kSize16    = 4,
+    kSize32    = 5,
+    kSize64    = 6,
+    kSizeCount = 7
+  };
+
+  //! \name Members
+  //! \{
+
+  //! Allocator used to allocate internal data.
+  ZoneAllocator* _allocator;
+  //! Count of bytes used by all slots.
+  uint32_t _bytesUsed;
+  //! Calculated stack size (can be a bit greater than `_bytesUsed`).
+  uint32_t _stackSize;
+  //! Minimum stack alignment.
+  uint32_t _alignment;
+  //! Stack slots vector.
+  RAStackSlots _slots;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline RAStackAllocator() noexcept
+    : _allocator(nullptr),
+      _bytesUsed(0),
+      _stackSize(0),
+      _alignment(1),
+      _slots() {}
+
+  inline void reset(ZoneAllocator* allocator) noexcept {
+    _allocator = allocator;
+    _bytesUsed = 0;
+    _stackSize = 0;
+    _alignment = 1;
+    _slots.reset();
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline ZoneAllocator* allocator() const noexcept { return _allocator; }
+
+  inline uint32_t bytesUsed() const noexcept { return _bytesUsed; }
+  inline uint32_t stackSize() const noexcept { return _stackSize; }
+  inline uint32_t alignment() const noexcept { return _alignment; }
+
+  inline RAStackSlots& slots() noexcept { return _slots; }
+  inline const RAStackSlots& slots() const noexcept { return _slots; }
+  inline uint32_t slotCount() const noexcept { return _slots.size(); }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  RAStackSlot* newSlot(uint32_t baseRegId, uint32_t size, uint32_t alignment, uint32_t flags = 0) noexcept;
+
+  Error calculateStackFrame() noexcept;
+  Error adjustSlotOffsets(int32_t offset) noexcept;
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_CORE_RASTACK_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/string.cpp b/lib/lepton/asmjit/core/string.cpp
new file mode 100644
index 0000000000..83dc6efdaa
--- /dev/null
+++ b/lib/lepton/asmjit/core/string.cpp
@@ -0,0 +1,559 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/string.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// String - Globals
+// ================
+
+static const char String_baseN[] = "0123456789ABCDEF";
+
+constexpr size_t kMinAllocSize = 64;
+constexpr size_t kMaxAllocSize = SIZE_MAX - Globals::kGrowThreshold;
+
+// String - Clear & Reset
+// ======================
+
+Error String::reset() noexcept {
+  if (_type == kTypeLarge)
+    ::free(_large.data);
+
+  _resetInternal();
+  return kErrorOk;
+}
+
+Error String::clear() noexcept {
+  if (isLargeOrExternal()) {
+    _large.size = 0;
+    _large.data[0] = '\0';
+  }
+  else {
+    _raw.uptr[0] = 0;
+  }
+
+  return kErrorOk;
+}
+
+// String - Prepare
+// ================
+
+char* String::prepare(ModifyOp op, size_t size) noexcept {
+  char* curData;
+  size_t curSize;
+  size_t curCapacity;
+
+  if (isLargeOrExternal()) {
+    curData = this->_large.data;
+    curSize = this->_large.size;
+    curCapacity = this->_large.capacity;
+  }
+  else {
+    curData = this->_small.data;
+    curSize = this->_small.type;
+    curCapacity = kSSOCapacity;
+  }
+
+  if (op == ModifyOp::kAssign) {
+    if (size > curCapacity) {
+      // Prevent arithmetic overflow.
+      if (ASMJIT_UNLIKELY(size >= kMaxAllocSize))
+        return nullptr;
+
+      size_t newCapacity = Support::alignUp<size_t>(size + 1, kMinAllocSize);
+      char* newData = static_cast<char*>(::malloc(newCapacity));
+
+      if (ASMJIT_UNLIKELY(!newData))
+        return nullptr;
+
+      if (_type == kTypeLarge)
+        ::free(curData);
+
+      _large.type = kTypeLarge;
+      _large.size = size;
+      _large.capacity = newCapacity - 1;
+      _large.data = newData;
+
+      newData[size] = '\0';
+      return newData;
+    }
+    else {
+      _setSize(size);
+      curData[size] = '\0';
+      return curData;
+    }
+  }
+  else {
+    // Prevent arithmetic overflow.
+    if (ASMJIT_UNLIKELY(size >= kMaxAllocSize - curSize))
+      return nullptr;
+
+    size_t newSize = size + curSize;
+    size_t newSizePlusOne = newSize + 1;
+
+    if (newSizePlusOne > curCapacity) {
+      size_t newCapacity = Support::max<size_t>(curCapacity + 1, kMinAllocSize);
+
+      if (newCapacity < newSizePlusOne && newCapacity < Globals::kGrowThreshold)
+        newCapacity = Support::alignUpPowerOf2(newCapacity);
+
+      if (newCapacity < newSizePlusOne)
+        newCapacity = Support::alignUp(newSizePlusOne, Globals::kGrowThreshold);
+
+      if (ASMJIT_UNLIKELY(newCapacity < newSizePlusOne))
+        return nullptr;
+
+      char* newData = static_cast<char*>(::malloc(newCapacity));
+      if (ASMJIT_UNLIKELY(!newData))
+        return nullptr;
+
+      memcpy(newData, curData, curSize);
+
+      if (_type == kTypeLarge)
+        ::free(curData);
+
+      _large.type = kTypeLarge;
+      _large.size = newSize;
+      _large.capacity = newCapacity - 1;
+      _large.data = newData;
+
+      newData[newSize] = '\0';
+      return newData + curSize;
+    }
+    else {
+      _setSize(newSize);
+      curData[newSize] = '\0';
+      return curData + curSize;
+    }
+  }
+}
+
+// String - Assign
+// ===============
+
+Error String::assign(const char* data, size_t size) noexcept {
+  char* dst = nullptr;
+
+  // Null terminated string without `size` specified.
+  if (size == SIZE_MAX)
+    size = data ? strlen(data) : size_t(0);
+
+  if (isLargeOrExternal()) {
+    if (size <= _large.capacity) {
+      dst = _large.data;
+      _large.size = size;
+    }
+    else {
+      size_t capacityPlusOne = Support::alignUp(size + 1, 32);
+      if (ASMJIT_UNLIKELY(capacityPlusOne < size))
+        return DebugUtils::errored(kErrorOutOfMemory);
+
+      dst = static_cast<char*>(::malloc(capacityPlusOne));
+      if (ASMJIT_UNLIKELY(!dst))
+        return DebugUtils::errored(kErrorOutOfMemory);
+
+      if (_type == kTypeLarge)
+        ::free(_large.data);
+
+      _large.type = kTypeLarge;
+      _large.data = dst;
+      _large.size = size;
+      _large.capacity = capacityPlusOne - 1;
+    }
+  }
+  else {
+    if (size <= kSSOCapacity) {
+      ASMJIT_ASSERT(size < 0xFFu);
+
+      dst = _small.data;
+      _small.type = uint8_t(size);
+    }
+    else {
+      dst = static_cast<char*>(::malloc(size + 1));
+      if (ASMJIT_UNLIKELY(!dst))
+        return DebugUtils::errored(kErrorOutOfMemory);
+
+      _large.type = kTypeLarge;
+      _large.data = dst;
+      _large.size = size;
+      _large.capacity = size;
+    }
+  }
+
+  // Optionally copy data from `data` and null-terminate.
+  if (data && size) {
+    // NOTE: It's better to use `memmove()`. If, for any reason, somebody uses
+    // this function to substring the same string it would work as expected.
+    ::memmove(dst, data, size);
+  }
+
+  dst[size] = '\0';
+  return kErrorOk;
+}
+
+// String - Operations
+// ===================
+
+Error String::_opString(ModifyOp op, const char* str, size_t size) noexcept {
+  if (size == SIZE_MAX)
+    size = str ? strlen(str) : size_t(0);
+
+  if (!size)
+    return kErrorOk;
+
+  char* p = prepare(op, size);
+  if (!p)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  memcpy(p, str, size);
+  return kErrorOk;
+}
+
+Error String::_opChar(ModifyOp op, char c) noexcept {
+  char* p = prepare(op, 1);
+  if (!p)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  *p = c;
+  return kErrorOk;
+}
+
+Error String::_opChars(ModifyOp op, char c, size_t n) noexcept {
+  if (!n)
+    return kErrorOk;
+
+  char* p = prepare(op, n);
+  if (!p)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  memset(p, c, n);
+  return kErrorOk;
+}
+
+Error String::padEnd(size_t n, char c) noexcept {
+  size_t size = this->size();
+  return n > size ? appendChars(c, n - size) : kErrorOk;
+}
+
+Error String::_opNumber(ModifyOp op, uint64_t i, uint32_t base, size_t width, StringFormatFlags flags) noexcept {
+  if (base == 0)
+    base = 10;
+
+  char buf[128];
+  char* p = buf + ASMJIT_ARRAY_SIZE(buf);
+
+  uint64_t orig = i;
+  char sign = '\0';
+
+  // Format Sign
+  // -----------
+
+  if (Support::test(flags, StringFormatFlags::kSigned) && int64_t(i) < 0) {
+    i = uint64_t(-int64_t(i));
+    sign = '-';
+  }
+  else if (Support::test(flags, StringFormatFlags::kShowSign)) {
+    sign = '+';
+  }
+  else if (Support::test(flags, StringFormatFlags::kShowSpace)) {
+    sign = ' ';
+  }
+
+  // Format Number
+  // -------------
+
+  switch (base) {
+    case 2:
+    case 8:
+    case 16: {
+      uint32_t shift = Support::ctz(base);
+      uint32_t mask = base - 1;
+
+      do {
+        uint64_t d = i >> shift;
+        size_t r = size_t(i & mask);
+
+        *--p = String_baseN[r];
+        i = d;
+      } while (i);
+
+      break;
+    }
+
+    case 10: {
+      do {
+        uint64_t d = i / 10;
+        uint64_t r = i % 10;
+
+        *--p = char(uint32_t('0') + uint32_t(r));
+        i = d;
+      } while (i);
+
+      break;
+    }
+
+    default:
+      return DebugUtils::errored(kErrorInvalidArgument);
+  }
+
+  size_t numberSize = (size_t)(buf + ASMJIT_ARRAY_SIZE(buf) - p);
+
+  // Alternate Form
+  // --------------
+
+  if (Support::test(flags, StringFormatFlags::kAlternate)) {
+    if (base == 8) {
+      if (orig != 0)
+        *--p = '0';
+    }
+    if (base == 16) {
+      *--p = 'x';
+      *--p = '0';
+    }
+  }
+
+  // String Width
+  // ------------
+
+  if (sign != 0)
+    *--p = sign;
+
+  if (width > 256)
+    width = 256;
+
+  if (width <= numberSize)
+    width = 0;
+  else
+    width -= numberSize;
+
+  // Finalize
+  // --------
+
+  size_t prefixSize = (size_t)(buf + ASMJIT_ARRAY_SIZE(buf) - p) - numberSize;
+  char* data = prepare(op, prefixSize + width + numberSize);
+
+  if (!data)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  memcpy(data, p, prefixSize);
+  data += prefixSize;
+
+  memset(data, '0', width);
+  data += width;
+
+  memcpy(data, p + prefixSize, numberSize);
+  return kErrorOk;
+}
+
+Error String::_opHex(ModifyOp op, const void* data, size_t size, char separator) noexcept {
+  char* dst;
+  const uint8_t* src = static_cast<const uint8_t*>(data);
+
+  if (!size)
+    return kErrorOk;
+
+  if (separator) {
+    if (ASMJIT_UNLIKELY(size >= SIZE_MAX / 3))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    dst = prepare(op, size * 3 - 1);
+    if (ASMJIT_UNLIKELY(!dst))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    size_t i = 0;
+    for (;;) {
+      dst[0] = String_baseN[(src[0] >> 4) & 0xF];
+      dst[1] = String_baseN[(src[0]     ) & 0xF];
+      if (++i == size)
+        break;
+      // This makes sure that the separator is only put between two hexadecimal bytes.
+      dst[2] = separator;
+      dst += 3;
+      src++;
+    }
+  }
+  else {
+    if (ASMJIT_UNLIKELY(size >= SIZE_MAX / 2))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    dst = prepare(op, size * 2);
+    if (ASMJIT_UNLIKELY(!dst))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    for (size_t i = 0; i < size; i++, dst += 2, src++) {
+      dst[0] = String_baseN[(src[0] >> 4) & 0xF];
+      dst[1] = String_baseN[(src[0]     ) & 0xF];
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error String::_opFormat(ModifyOp op, const char* fmt, ...) noexcept {
+  Error err;
+  va_list ap;
+
+  va_start(ap, fmt);
+  err = _opVFormat(op, fmt, ap);
+  va_end(ap);
+
+  return err;
+}
+
+Error String::_opVFormat(ModifyOp op, const char* fmt, va_list ap) noexcept {
+  size_t startAt = (op == ModifyOp::kAssign) ? size_t(0) : size();
+  size_t remainingCapacity = capacity() - startAt;
+
+  char buf[1024];
+  int fmtResult;
+  size_t outputSize;
+
+  va_list apCopy;
+  va_copy(apCopy, ap);
+
+  if (remainingCapacity >= 128) {
+    fmtResult = vsnprintf(data() + startAt, remainingCapacity, fmt, ap);
+    outputSize = size_t(fmtResult);
+
+    if (ASMJIT_LIKELY(outputSize <= remainingCapacity)) {
+      _setSize(startAt + outputSize);
+      return kErrorOk;
+    }
+  }
+  else {
+    fmtResult = vsnprintf(buf, ASMJIT_ARRAY_SIZE(buf), fmt, ap);
+    outputSize = size_t(fmtResult);
+
+    if (ASMJIT_LIKELY(outputSize < ASMJIT_ARRAY_SIZE(buf)))
+      return _opString(op, buf, outputSize);
+  }
+
+  if (ASMJIT_UNLIKELY(fmtResult < 0))
+    return DebugUtils::errored(kErrorInvalidState);
+
+  char* p = prepare(op, outputSize);
+  if (ASMJIT_UNLIKELY(!p))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  fmtResult = vsnprintf(p, outputSize + 1, fmt, apCopy);
+  ASMJIT_ASSERT(size_t(fmtResult) == outputSize);
+
+  return kErrorOk;
+}
+
+Error String::truncate(size_t newSize) noexcept {
+  if (isLargeOrExternal()) {
+    if (newSize < _large.size) {
+      _large.data[newSize] = '\0';
+      _large.size = newSize;
+    }
+  }
+  else {
+    if (newSize < _type) {
+      _small.data[newSize] = '\0';
+      _small.type = uint8_t(newSize);
+    }
+  }
+
+  return kErrorOk;
+}
+
+bool String::eq(const char* other, size_t size) const noexcept {
+  const char* aData = data();
+  const char* bData = other;
+
+  size_t aSize = this->size();
+  size_t bSize = size;
+
+  if (bSize == SIZE_MAX) {
+    size_t i;
+    for (i = 0; i < aSize; i++)
+      if (aData[i] != bData[i] || bData[i] == 0)
+        return false;
+    return bData[i] == 0;
+  }
+  else {
+    if (aSize != bSize)
+      return false;
+    return ::memcmp(aData, bData, aSize) == 0;
+  }
+}
+
+// String - Tests
+// ==============
+
+#if defined(ASMJIT_TEST)
+UNIT(core_string) {
+  String s;
+
+  EXPECT(s.isLargeOrExternal() == false);
+  EXPECT(s.isExternal() == false);
+
+  EXPECT(s.assign('a') == kErrorOk);
+  EXPECT(s.size() == 1);
+  EXPECT(s.capacity() == String::kSSOCapacity);
+  EXPECT(s.data()[0] == 'a');
+  EXPECT(s.data()[1] == '\0');
+  EXPECT(s.eq("a") == true);
+  EXPECT(s.eq("a", 1) == true);
+
+  EXPECT(s.assignChars('b', 4) == kErrorOk);
+  EXPECT(s.size() == 4);
+  EXPECT(s.capacity() == String::kSSOCapacity);
+  EXPECT(s.data()[0] == 'b');
+  EXPECT(s.data()[1] == 'b');
+  EXPECT(s.data()[2] == 'b');
+  EXPECT(s.data()[3] == 'b');
+  EXPECT(s.data()[4] == '\0');
+  EXPECT(s.eq("bbbb") == true);
+  EXPECT(s.eq("bbbb", 4) == true);
+
+  EXPECT(s.assign("abc") == kErrorOk);
+  EXPECT(s.size() == 3);
+  EXPECT(s.capacity() == String::kSSOCapacity);
+  EXPECT(s.data()[0] == 'a');
+  EXPECT(s.data()[1] == 'b');
+  EXPECT(s.data()[2] == 'c');
+  EXPECT(s.data()[3] == '\0');
+  EXPECT(s.eq("abc") == true);
+  EXPECT(s.eq("abc", 3) == true);
+
+  const char* large = "Large string that will not fit into SSO buffer";
+  EXPECT(s.assign(large) == kErrorOk);
+  EXPECT(s.isLargeOrExternal() == true);
+  EXPECT(s.size() == strlen(large));
+  EXPECT(s.capacity() > String::kSSOCapacity);
+  EXPECT(s.eq(large) == true);
+  EXPECT(s.eq(large, strlen(large)) == true);
+
+  const char* additional = " (additional content)";
+  EXPECT(s.isLargeOrExternal() == true);
+  EXPECT(s.append(additional) == kErrorOk);
+  EXPECT(s.size() == strlen(large) + strlen(additional));
+
+  EXPECT(s.clear() == kErrorOk);
+  EXPECT(s.size() == 0);
+  EXPECT(s.empty() == true);
+  EXPECT(s.data()[0] == '\0');
+  EXPECT(s.isLargeOrExternal() == true); // Clear should never release the memory.
+
+  EXPECT(s.appendUInt(1234) == kErrorOk);
+  EXPECT(s.eq("1234") == true);
+
+  EXPECT(s.assignUInt(0xFFFF, 16, 0, StringFormatFlags::kAlternate) == kErrorOk);
+  EXPECT(s.eq("0xFFFF"));
+
+  StringTmp<64> sTmp;
+  EXPECT(sTmp.isLargeOrExternal());
+  EXPECT(sTmp.isExternal());
+  EXPECT(sTmp.appendChars(' ', 1000) == kErrorOk);
+  EXPECT(!sTmp.isExternal());
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/string.h b/lib/lepton/asmjit/core/string.h
new file mode 100644
index 0000000000..2562e66194
--- /dev/null
+++ b/lib/lepton/asmjit/core/string.h
@@ -0,0 +1,372 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_STRING_H_INCLUDED
+#define ASMJIT_CORE_STRING_H_INCLUDED
+
+#include "../core/support.h"
+#include "../core/zone.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_utilities
+//! \{
+
+//! Format flags used by \ref String API.
+enum class StringFormatFlags : uint32_t {
+  //! No flags.
+  kNone = 0x00000000u,
+  //! Show sign.
+  kShowSign = 0x00000001u,
+  //! Show space.
+  kShowSpace = 0x00000002u,
+  //! Alternate form (use 0x when formatting HEX number).
+  kAlternate = 0x00000004u,
+  //! The input is signed.
+  kSigned = 0x80000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(StringFormatFlags)
+
+//! Fixed string - only useful for strings that would never exceed `N - 1` characters; always null-terminated.
+template<size_t N>
+union FixedString {
+  //! \name Constants
+  //! \{
+
+  // This cannot be constexpr as GCC 4.8 refuses constexpr members of unions.
+  enum : uint32_t {
+    kNumUInt32Words = uint32_t((N + sizeof(uint32_t) - 1) / sizeof(uint32_t))
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  char str[kNumUInt32Words * sizeof(uint32_t)];
+  uint32_t u32[kNumUInt32Words];
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline bool eq(const char* other) const noexcept {
+    return strcmp(str, other) == 0;
+  }
+
+  //! \}
+};
+
+//! A simple non-reference counted string that uses small string optimization (SSO).
+//!
+//! This string has 3 allocation possibilities:
+//!
+//!   1. Small    - embedded buffer is used for up to `kSSOCapacity` characters. This should handle most small
+//!                 strings and thus avoid dynamic memory allocation for most use-cases.
+//!
+//!   2. Large    - string that doesn't fit into an embedded buffer (or string that was truncated from a larger
+//!                 buffer) and is owned by AsmJit. When you destroy the string AsmJit would automatically
+//!                 release the large buffer.
+//!
+//!   3. External - like Large (2), however, the large buffer is not owned by AsmJit and won't be released when
+//!                 the string is destroyed or reallocated. This is mostly useful for working with larger temporary
+//!                 strings allocated on stack or with immutable strings.
+class String {
+public:
+  ASMJIT_NONCOPYABLE(String)
+
+  //! String operation.
+  enum class ModifyOp : uint32_t {
+    //! Assignment - a new content replaces the current one.
+    kAssign = 0,
+    //! Append - a new content is appended to the string.
+    kAppend = 1
+  };
+
+  //! \cond INTERNAL
+  enum : uint32_t {
+    kLayoutSize = 32,
+    kSSOCapacity = kLayoutSize - 2
+  };
+
+  //! String type.
+  enum Type : uint8_t {
+    //! Large string (owned by String).
+    kTypeLarge = 0x1Fu,
+    //! External string (zone allocated or not owned by String).
+    kTypeExternal = 0x20u
+  };
+
+  union Raw {
+    uint8_t u8[kLayoutSize];
+    uint64_t u64[kLayoutSize / sizeof(uint64_t)];
+    uintptr_t uptr[kLayoutSize / sizeof(uintptr_t)];
+  };
+
+  struct Small {
+    uint8_t type;
+    char data[kSSOCapacity + 1u];
+  };
+
+  struct Large {
+    uint8_t type;
+    uint8_t reserved[sizeof(uintptr_t) - 1];
+    size_t size;
+    size_t capacity;
+    char* data;
+  };
+
+  union {
+    uint8_t _type;
+    Raw _raw;
+    Small _small;
+    Large _large;
+  };
+  //! \endcond
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a default-initialized string if zero length.
+  inline String() noexcept
+    : _small {} {}
+
+  //! Creates a string that takes ownership of the content of the `other` string.
+  inline String(String&& other) noexcept {
+    _raw = other._raw;
+    other._resetInternal();
+  }
+
+  inline ~String() noexcept {
+    reset();
+  }
+
+  //! Reset the string into a construction state.
+  ASMJIT_API Error reset() noexcept;
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline String& operator=(String&& other) noexcept {
+    swap(other);
+    other.reset();
+    return *this;
+  }
+
+  inline bool operator==(const char* other) const noexcept { return  eq(other); }
+  inline bool operator!=(const char* other) const noexcept { return !eq(other); }
+
+  inline bool operator==(const String& other) const noexcept { return  eq(other); }
+  inline bool operator!=(const String& other) const noexcept { return !eq(other); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool isExternal() const noexcept { return _type == kTypeExternal; }
+  inline bool isLargeOrExternal() const noexcept { return _type >= kTypeLarge; }
+
+  //! Tests whether the string is empty.
+  inline bool empty() const noexcept { return size() == 0; }
+  //! Returns the size of the string.
+  inline size_t size() const noexcept { return isLargeOrExternal() ? size_t(_large.size) : size_t(_type); }
+  //! Returns the capacity of the string.
+  inline size_t capacity() const noexcept { return isLargeOrExternal() ? _large.capacity : size_t(kSSOCapacity); }
+
+  //! Returns the data of the string.
+  inline char* data() noexcept { return isLargeOrExternal() ? _large.data : _small.data; }
+  //! \overload
+  inline const char* data() const noexcept { return isLargeOrExternal() ? _large.data : _small.data; }
+
+  inline char* start() noexcept { return data(); }
+  inline const char* start() const noexcept { return data(); }
+
+  inline char* end() noexcept { return data() + size(); }
+  inline const char* end() const noexcept { return data() + size(); }
+
+  //! \}
+
+  //! \name String Operations
+  //! \{
+
+  //! Swaps the content of this string with `other`.
+  inline void swap(String& other) noexcept {
+    std::swap(_raw, other._raw);
+  }
+
+  //! Clears the content of the string.
+  ASMJIT_API Error clear() noexcept;
+
+  ASMJIT_API char* prepare(ModifyOp op, size_t size) noexcept;
+
+  ASMJIT_API Error _opString(ModifyOp op, const char* str, size_t size = SIZE_MAX) noexcept;
+  ASMJIT_API Error _opChar(ModifyOp op, char c) noexcept;
+  ASMJIT_API Error _opChars(ModifyOp op, char c, size_t n) noexcept;
+  ASMJIT_API Error _opNumber(ModifyOp op, uint64_t i, uint32_t base = 0, size_t width = 0, StringFormatFlags flags = StringFormatFlags::kNone) noexcept;
+  ASMJIT_API Error _opHex(ModifyOp op, const void* data, size_t size, char separator = '\0') noexcept;
+  ASMJIT_API Error _opFormat(ModifyOp op, const char* fmt, ...) noexcept;
+  ASMJIT_API Error _opVFormat(ModifyOp op, const char* fmt, va_list ap) noexcept;
+
+  //! Replaces the current of the string with `data` of the given `size`.
+  //!
+  //! Null terminated strings can set `size` to `SIZE_MAX`.
+  ASMJIT_API Error assign(const char* data, size_t size = SIZE_MAX) noexcept;
+
+  //! Replaces the current of the string with `other` string.
+  inline Error assign(const String& other) noexcept {
+    return assign(other.data(), other.size());
+  }
+
+  //! Replaces the current of the string by a single `c` character.
+  inline Error assign(char c) noexcept {
+    return _opChar(ModifyOp::kAssign, c);
+  }
+
+  //! Replaces the current of the string by a `c` character, repeated `n` times.
+  inline Error assignChars(char c, size_t n) noexcept {
+    return _opChars(ModifyOp::kAssign, c, n);
+  }
+
+  //! Replaces the current of the string by a formatted integer `i` (signed).
+  inline Error assignInt(int64_t i, uint32_t base = 0, size_t width = 0, StringFormatFlags flags = StringFormatFlags::kNone) noexcept {
+    return _opNumber(ModifyOp::kAssign, uint64_t(i), base, width, flags | StringFormatFlags::kSigned);
+  }
+
+  //! Replaces the current of the string by a formatted integer `i` (unsigned).
+  inline Error assignUInt(uint64_t i, uint32_t base = 0, size_t width = 0, StringFormatFlags flags = StringFormatFlags::kNone) noexcept {
+    return _opNumber(ModifyOp::kAssign, i, base, width, flags);
+  }
+
+  //! Replaces the current of the string by the given `data` converted to a HEX string.
+  inline Error assignHex(const void* data, size_t size, char separator = '\0') noexcept {
+    return _opHex(ModifyOp::kAssign, data, size, separator);
+  }
+
+  //! Replaces the current of the string by a formatted string `fmt`.
+  template<typename... Args>
+  inline Error assignFormat(const char* fmt, Args&&... args) noexcept {
+    return _opFormat(ModifyOp::kAssign, fmt, std::forward<Args>(args)...);
+  }
+
+  //! Replaces the current of the string by a formatted string `fmt` (va_list version).
+  inline Error assignVFormat(const char* fmt, va_list ap) noexcept {
+    return _opVFormat(ModifyOp::kAssign, fmt, ap);
+  }
+
+  //! Appends `str` having the given size `size` to the string.
+  //!
+  //! Null terminated strings can set `size` to `SIZE_MAX`.
+  inline Error append(const char* str, size_t size = SIZE_MAX) noexcept {
+    return _opString(ModifyOp::kAppend, str, size);
+  }
+
+  //! Appends `other` string to this string.
+  inline Error append(const String& other) noexcept {
+    return append(other.data(), other.size());
+  }
+
+  //! Appends a single `c` character.
+  inline Error append(char c) noexcept {
+    return _opChar(ModifyOp::kAppend, c);
+  }
+
+  //! Appends `c` character repeated `n` times.
+  inline Error appendChars(char c, size_t n) noexcept {
+    return _opChars(ModifyOp::kAppend, c, n);
+  }
+
+  //! Appends a formatted integer `i` (signed).
+  inline Error appendInt(int64_t i, uint32_t base = 0, size_t width = 0, StringFormatFlags flags = StringFormatFlags::kNone) noexcept {
+    return _opNumber(ModifyOp::kAppend, uint64_t(i), base, width, flags | StringFormatFlags::kSigned);
+  }
+
+  //! Appends a formatted integer `i` (unsigned).
+  inline Error appendUInt(uint64_t i, uint32_t base = 0, size_t width = 0, StringFormatFlags flags = StringFormatFlags::kNone) noexcept {
+    return _opNumber(ModifyOp::kAppend, i, base, width, flags);
+  }
+
+  //! Appends the given `data` converted to a HEX string.
+  inline Error appendHex(const void* data, size_t size, char separator = '\0') noexcept {
+    return _opHex(ModifyOp::kAppend, data, size, separator);
+  }
+
+  //! Appends a formatted string `fmt` with `args`.
+  template<typename... Args>
+  inline Error appendFormat(const char* fmt, Args&&... args) noexcept {
+    return _opFormat(ModifyOp::kAppend, fmt, std::forward<Args>(args)...);
+  }
+
+  //! Appends a formatted string `fmt` (va_list version).
+  inline Error appendVFormat(const char* fmt, va_list ap) noexcept {
+    return _opVFormat(ModifyOp::kAppend, fmt, ap);
+  }
+
+  ASMJIT_API Error padEnd(size_t n, char c = ' ') noexcept;
+
+  //! Truncate the string length into `newSize`.
+  ASMJIT_API Error truncate(size_t newSize) noexcept;
+
+  ASMJIT_API bool eq(const char* other, size_t size = SIZE_MAX) const noexcept;
+  inline bool eq(const String& other) const noexcept { return eq(other.data(), other.size()); }
+
+  //! \}
+
+  //! \name Internal Functions
+  //! \{
+
+  //! Resets string to embedded and makes it empty (zero length, zero first char)
+  //!
+  //! \note This is always called internally after an external buffer was released as it zeroes all bytes
+  //! used by String's embedded storage.
+  inline void _resetInternal() noexcept {
+    for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_raw.uptr); i++)
+      _raw.uptr[i] = 0;
+  }
+
+  inline void _setSize(size_t newSize) noexcept {
+    if (isLargeOrExternal())
+      _large.size = newSize;
+    else
+      _small.type = uint8_t(newSize);
+  }
+
+  //! \}
+};
+
+//! Temporary string builder, has statically allocated `N` bytes.
+template<size_t N>
+class StringTmp : public String {
+public:
+  ASMJIT_NONCOPYABLE(StringTmp)
+
+  //! Embedded data.
+  char _embeddedData[Support::alignUp(N + 1, sizeof(size_t))];
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline StringTmp() noexcept {
+    _resetToTemporary();
+  }
+
+  inline void _resetToTemporary() noexcept {
+    _large.type = kTypeExternal;
+    _large.capacity = ASMJIT_ARRAY_SIZE(_embeddedData) - 1;
+    _large.data = _embeddedData;
+    _embeddedData[0] = '\0';
+  }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_STRING_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/support.cpp b/lib/lepton/asmjit/core/support.cpp
new file mode 100644
index 0000000000..245398fe73
--- /dev/null
+++ b/lib/lepton/asmjit/core/support.cpp
@@ -0,0 +1,494 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// Support - Tests
+// ===============
+
+#if defined(ASMJIT_TEST)
+template<typename T>
+static void testArrays(const T* a, const T* b, size_t size) noexcept {
+  for (size_t i = 0; i < size; i++)
+    EXPECT(a[i] == b[i], "Mismatch at %u", unsigned(i));
+}
+
+static void testAlignment() noexcept {
+  INFO("Support::isAligned()");
+  EXPECT(Support::isAligned<size_t>(0xFFFF,  4) == false);
+  EXPECT(Support::isAligned<size_t>(0xFFF4,  4) == true);
+  EXPECT(Support::isAligned<size_t>(0xFFF8,  8) == true);
+  EXPECT(Support::isAligned<size_t>(0xFFF0, 16) == true);
+
+  INFO("Support::alignUp()");
+  EXPECT(Support::alignUp<size_t>(0xFFFF,  4) == 0x10000);
+  EXPECT(Support::alignUp<size_t>(0xFFF4,  4) == 0x0FFF4);
+  EXPECT(Support::alignUp<size_t>(0xFFF8,  8) == 0x0FFF8);
+  EXPECT(Support::alignUp<size_t>(0xFFF0, 16) == 0x0FFF0);
+  EXPECT(Support::alignUp<size_t>(0xFFF0, 32) == 0x10000);
+
+  INFO("Support::alignUpDiff()");
+  EXPECT(Support::alignUpDiff<size_t>(0xFFFF,  4) == 1);
+  EXPECT(Support::alignUpDiff<size_t>(0xFFF4,  4) == 0);
+  EXPECT(Support::alignUpDiff<size_t>(0xFFF8,  8) == 0);
+  EXPECT(Support::alignUpDiff<size_t>(0xFFF0, 16) == 0);
+  EXPECT(Support::alignUpDiff<size_t>(0xFFF0, 32) == 16);
+
+  INFO("Support::alignUpPowerOf2()");
+  EXPECT(Support::alignUpPowerOf2<size_t>(0x0000) == 0x00000);
+  EXPECT(Support::alignUpPowerOf2<size_t>(0xFFFF) == 0x10000);
+  EXPECT(Support::alignUpPowerOf2<size_t>(0xF123) == 0x10000);
+  EXPECT(Support::alignUpPowerOf2<size_t>(0x0F00) == 0x01000);
+  EXPECT(Support::alignUpPowerOf2<size_t>(0x0100) == 0x00100);
+  EXPECT(Support::alignUpPowerOf2<size_t>(0x1001) == 0x02000);
+}
+
+static void testBitUtils() noexcept {
+  uint32_t i;
+
+  INFO("Support::shl() / shr()");
+  EXPECT(Support::shl(int32_t(0x00001111), 16) == int32_t(0x11110000u));
+  EXPECT(Support::shl(uint32_t(0x00001111), 16) == uint32_t(0x11110000u));
+  EXPECT(Support::shr(int32_t(0x11110000u), 16) == int32_t(0x00001111u));
+  EXPECT(Support::shr(uint32_t(0x11110000u), 16) == uint32_t(0x00001111u));
+  EXPECT(Support::sar(int32_t(0xFFFF0000u), 16) == int32_t(0xFFFFFFFFu));
+  EXPECT(Support::sar(uint32_t(0xFFFF0000u), 16) == uint32_t(0xFFFFFFFFu));
+
+  INFO("Support::blsi()");
+  for (i = 0; i < 32; i++) EXPECT(Support::blsi(uint32_t(1) << i) == uint32_t(1) << i);
+  for (i = 0; i < 31; i++) EXPECT(Support::blsi(uint32_t(3) << i) == uint32_t(1) << i);
+  for (i = 0; i < 64; i++) EXPECT(Support::blsi(uint64_t(1) << i) == uint64_t(1) << i);
+  for (i = 0; i < 63; i++) EXPECT(Support::blsi(uint64_t(3) << i) == uint64_t(1) << i);
+
+  INFO("Support::ctz()");
+  for (i = 0; i < 32; i++) EXPECT(Support::Internal::clzFallback(uint32_t(1) << i) == 31 - i);
+  for (i = 0; i < 64; i++) EXPECT(Support::Internal::clzFallback(uint64_t(1) << i) == 63 - i);
+  for (i = 0; i < 32; i++) EXPECT(Support::Internal::ctzFallback(uint32_t(1) << i) == i);
+  for (i = 0; i < 64; i++) EXPECT(Support::Internal::ctzFallback(uint64_t(1) << i) == i);
+  for (i = 0; i < 32; i++) EXPECT(Support::clz(uint32_t(1) << i) == 31 - i);
+  for (i = 0; i < 64; i++) EXPECT(Support::clz(uint64_t(1) << i) == 63 - i);
+  for (i = 0; i < 32; i++) EXPECT(Support::ctz(uint32_t(1) << i) == i);
+  for (i = 0; i < 64; i++) EXPECT(Support::ctz(uint64_t(1) << i) == i);
+
+  INFO("Support::bitMask()");
+  EXPECT(Support::bitMask(0, 1, 7) == 0x83u);
+  for (i = 0; i < 32; i++)
+    EXPECT(Support::bitMask(i) == (1u << i));
+
+  INFO("Support::bitTest()");
+  for (i = 0; i < 32; i++) {
+    EXPECT(Support::bitTest((1 << i), i) == true, "Support::bitTest(%X, %u) should return true", (1 << i), i);
+  }
+
+  INFO("Support::lsbMask<uint32_t>()");
+  for (i = 0; i < 32; i++) {
+    uint32_t expectedBits = 0;
+    for (uint32_t b = 0; b < i; b++)
+      expectedBits |= uint32_t(1) << b;
+    EXPECT(Support::lsbMask<uint32_t>(i) == expectedBits);
+  }
+
+  INFO("Support::lsbMask<uint64_t>()");
+  for (i = 0; i < 64; i++) {
+    uint64_t expectedBits = 0;
+    for (uint32_t b = 0; b < i; b++)
+      expectedBits |= uint64_t(1) << b;
+    EXPECT(Support::lsbMask<uint64_t>(i) == expectedBits);
+  }
+
+  INFO("Support::popcnt()");
+  for (i = 0; i < 32; i++) EXPECT(Support::popcnt((uint32_t(1) << i)) == 1);
+  for (i = 0; i < 64; i++) EXPECT(Support::popcnt((uint64_t(1) << i)) == 1);
+  EXPECT(Support::popcnt(0x000000F0) ==  4);
+  EXPECT(Support::popcnt(0x10101010) ==  4);
+  EXPECT(Support::popcnt(0xFF000000) ==  8);
+  EXPECT(Support::popcnt(0xFFFFFFF7) == 31);
+  EXPECT(Support::popcnt(0x7FFFFFFF) == 31);
+
+  INFO("Support::isPowerOf2()");
+  for (i = 0; i < 64; i++) {
+    EXPECT(Support::isPowerOf2(uint64_t(1) << i) == true);
+    EXPECT(Support::isPowerOf2((uint64_t(1) << i) ^ 0x001101) == false);
+  }
+}
+
+static void testIntUtils() noexcept {
+  INFO("Support::byteswap()");
+  EXPECT(Support::byteswap16(int32_t(0x0102)) == int32_t(0x0201));
+  EXPECT(Support::byteswap32(int32_t(0x01020304)) == int32_t(0x04030201));
+  EXPECT(Support::byteswap32(uint32_t(0x01020304)) == uint32_t(0x04030201));
+  EXPECT(Support::byteswap64(uint64_t(0x0102030405060708)) == uint64_t(0x0807060504030201));
+
+  INFO("Support::bytepack()");
+  union BytePackData {
+    uint8_t bytes[4];
+    uint32_t u32;
+  } bpdata;
+
+  bpdata.u32 = Support::bytepack32_4x8(0x00, 0x11, 0x22, 0x33);
+  EXPECT(bpdata.bytes[0] == 0x00);
+  EXPECT(bpdata.bytes[1] == 0x11);
+  EXPECT(bpdata.bytes[2] == 0x22);
+  EXPECT(bpdata.bytes[3] == 0x33);
+
+  INFO("Support::isBetween()");
+  EXPECT(Support::isBetween<int>(10 , 10, 20) == true);
+  EXPECT(Support::isBetween<int>(11 , 10, 20) == true);
+  EXPECT(Support::isBetween<int>(20 , 10, 20) == true);
+  EXPECT(Support::isBetween<int>(9  , 10, 20) == false);
+  EXPECT(Support::isBetween<int>(21 , 10, 20) == false);
+  EXPECT(Support::isBetween<int>(101, 10, 20) == false);
+
+  INFO("Support::isInt8()");
+  EXPECT(Support::isInt8(-128) == true);
+  EXPECT(Support::isInt8( 127) == true);
+  EXPECT(Support::isInt8(-129) == false);
+  EXPECT(Support::isInt8( 128) == false);
+
+  INFO("Support::isInt16()");
+  EXPECT(Support::isInt16(-32768) == true);
+  EXPECT(Support::isInt16( 32767) == true);
+  EXPECT(Support::isInt16(-32769) == false);
+  EXPECT(Support::isInt16( 32768) == false);
+
+  INFO("Support::isInt32()");
+  EXPECT(Support::isInt32( 2147483647    ) == true);
+  EXPECT(Support::isInt32(-2147483647 - 1) == true);
+  EXPECT(Support::isInt32(uint64_t(2147483648u)) == false);
+  EXPECT(Support::isInt32(uint64_t(0xFFFFFFFFu)) == false);
+  EXPECT(Support::isInt32(uint64_t(0xFFFFFFFFu) + 1) == false);
+
+  INFO("Support::isUInt8()");
+  EXPECT(Support::isUInt8(0)   == true);
+  EXPECT(Support::isUInt8(255) == true);
+  EXPECT(Support::isUInt8(256) == false);
+  EXPECT(Support::isUInt8(-1)  == false);
+
+  INFO("Support::isUInt12()");
+  EXPECT(Support::isUInt12(0)    == true);
+  EXPECT(Support::isUInt12(4095) == true);
+  EXPECT(Support::isUInt12(4096) == false);
+  EXPECT(Support::isUInt12(-1)   == false);
+
+  INFO("Support::isUInt16()");
+  EXPECT(Support::isUInt16(0)     == true);
+  EXPECT(Support::isUInt16(65535) == true);
+  EXPECT(Support::isUInt16(65536) == false);
+  EXPECT(Support::isUInt16(-1)    == false);
+
+  INFO("Support::isUInt32()");
+  EXPECT(Support::isUInt32(uint64_t(0xFFFFFFFF)) == true);
+  EXPECT(Support::isUInt32(uint64_t(0xFFFFFFFF) + 1) == false);
+  EXPECT(Support::isUInt32(-1) == false);
+}
+
+static void testReadWrite() noexcept {
+  INFO("Support::readX() / writeX()");
+
+  uint8_t arr[32] = { 0 };
+
+  Support::writeU16uBE(arr + 1, 0x0102u);
+  Support::writeU16uBE(arr + 3, 0x0304u);
+  EXPECT(Support::readU32uBE(arr + 1) == 0x01020304u);
+  EXPECT(Support::readU32uLE(arr + 1) == 0x04030201u);
+  EXPECT(Support::readU32uBE(arr + 2) == 0x02030400u);
+  EXPECT(Support::readU32uLE(arr + 2) == 0x00040302u);
+
+  Support::writeU32uLE(arr + 5, 0x05060708u);
+  EXPECT(Support::readU64uBE(arr + 1) == 0x0102030408070605u);
+  EXPECT(Support::readU64uLE(arr + 1) == 0x0506070804030201u);
+
+  Support::writeU64uLE(arr + 7, 0x1122334455667788u);
+  EXPECT(Support::readU32uBE(arr + 8) == 0x77665544u);
+}
+
+static void testBitVector() noexcept {
+  INFO("Support::bitVectorOp");
+  {
+    uint32_t vec[3] = { 0 };
+    Support::bitVectorFill(vec, 1, 64);
+    EXPECT(vec[0] == 0xFFFFFFFEu);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0x00000001u);
+
+    Support::bitVectorClear(vec, 1, 1);
+    EXPECT(vec[0] == 0xFFFFFFFCu);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0x00000001u);
+
+    Support::bitVectorFill(vec, 0, 32);
+    EXPECT(vec[0] == 0xFFFFFFFFu);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0x00000001u);
+
+    Support::bitVectorClear(vec, 0, 32);
+    EXPECT(vec[0] == 0x00000000u);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0x00000001u);
+
+    Support::bitVectorFill(vec, 1, 30);
+    EXPECT(vec[0] == 0x7FFFFFFEu);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0x00000001u);
+
+    Support::bitVectorClear(vec, 1, 95);
+    EXPECT(vec[0] == 0x00000000u);
+    EXPECT(vec[1] == 0x00000000u);
+    EXPECT(vec[2] == 0x00000000u);
+
+    Support::bitVectorFill(vec, 32, 64);
+    EXPECT(vec[0] == 0x00000000u);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0xFFFFFFFFu);
+
+    Support::bitVectorSetBit(vec, 1, true);
+    EXPECT(vec[0] == 0x00000002u);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0xFFFFFFFFu);
+
+    Support::bitVectorSetBit(vec, 95, false);
+    EXPECT(vec[0] == 0x00000002u);
+    EXPECT(vec[1] == 0xFFFFFFFFu);
+    EXPECT(vec[2] == 0x7FFFFFFFu);
+
+    Support::bitVectorClear(vec, 33, 32);
+    EXPECT(vec[0] == 0x00000002u);
+    EXPECT(vec[1] == 0x00000001u);
+    EXPECT(vec[2] == 0x7FFFFFFEu);
+  }
+
+  INFO("Support::bitVectorIndexOf");
+  {
+    uint32_t vec1[1] = { 0x80000000 };
+    EXPECT(Support::bitVectorIndexOf(vec1, 0, true) == 31);
+    EXPECT(Support::bitVectorIndexOf(vec1, 1, true) == 31);
+    EXPECT(Support::bitVectorIndexOf(vec1, 31, true) == 31);
+
+    uint32_t vec2[2] = { 0x00000000, 0x80000000 };
+    EXPECT(Support::bitVectorIndexOf(vec2, 0, true) == 63);
+    EXPECT(Support::bitVectorIndexOf(vec2, 1, true) == 63);
+    EXPECT(Support::bitVectorIndexOf(vec2, 31, true) == 63);
+    EXPECT(Support::bitVectorIndexOf(vec2, 32, true) == 63);
+    EXPECT(Support::bitVectorIndexOf(vec2, 33, true) == 63);
+    EXPECT(Support::bitVectorIndexOf(vec2, 63, true) == 63);
+
+    uint32_t vec3[3] = { 0x00000001, 0x00000000, 0x80000000 };
+    EXPECT(Support::bitVectorIndexOf(vec3, 0, true) == 0);
+    EXPECT(Support::bitVectorIndexOf(vec3, 1, true) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec3, 2, true) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec3, 31, true) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec3, 32, true) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec3, 63, true) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec3, 64, true) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec3, 95, true) == 95);
+
+    uint32_t vec4[3] = { ~vec3[0], ~vec3[1], ~vec3[2] };
+    EXPECT(Support::bitVectorIndexOf(vec4, 0, false) == 0);
+    EXPECT(Support::bitVectorIndexOf(vec4, 1, false) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec4, 2, false) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec4, 31, false) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec4, 32, false) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec4, 63, false) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec4, 64, false) == 95);
+    EXPECT(Support::bitVectorIndexOf(vec4, 95, false) == 95);
+  }
+
+  INFO("Support::BitWordIterator<uint32_t>");
+  {
+    Support::BitWordIterator<uint32_t> it(0x80000F01u);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 0);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 8);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 9);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 10);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 11);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 31);
+    EXPECT(!it.hasNext());
+
+    // No bits set.
+    it.init(0x00000000u);
+    ASMJIT_ASSERT(!it.hasNext());
+
+    // Only first bit set.
+    it.init(0x00000001u);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 0);
+    ASMJIT_ASSERT(!it.hasNext());
+
+    // Only last bit set (special case).
+    it.init(0x80000000u);
+    ASMJIT_ASSERT(it.hasNext());
+    ASMJIT_ASSERT(it.next() == 31);
+    ASMJIT_ASSERT(!it.hasNext());
+  }
+
+  INFO("Support::BitWordIterator<uint64_t>");
+  {
+    Support::BitWordIterator<uint64_t> it(uint64_t(1) << 63);
+    ASMJIT_ASSERT(it.hasNext());
+    ASMJIT_ASSERT(it.next() == 63);
+    ASMJIT_ASSERT(!it.hasNext());
+  }
+
+  INFO("Support::BitVectorIterator<uint32_t>");
+  {
+    // Border cases.
+    static const uint32_t bitsNone[] = { 0xFFFFFFFFu };
+    Support::BitVectorIterator<uint32_t> it(bitsNone, 0);
+
+    EXPECT(!it.hasNext());
+    it.init(bitsNone, 0, 1);
+    EXPECT(!it.hasNext());
+    it.init(bitsNone, 0, 128);
+    EXPECT(!it.hasNext());
+
+    static const uint32_t bits1[] = { 0x80000008u, 0x80000001u, 0x00000000u, 0x80000000u, 0x00000000u, 0x00000000u, 0x00003000u };
+    it.init(bits1, ASMJIT_ARRAY_SIZE(bits1));
+
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 3);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 31);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 32);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 63);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 127);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 204);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 205);
+    EXPECT(!it.hasNext());
+
+    it.init(bits1, ASMJIT_ARRAY_SIZE(bits1), 4);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 31);
+
+    it.init(bits1, ASMJIT_ARRAY_SIZE(bits1), 64);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 127);
+
+    it.init(bits1, ASMJIT_ARRAY_SIZE(bits1), 127);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 127);
+
+    static const uint32_t bits2[] = { 0x80000000u, 0x80000000u, 0x00000000u, 0x80000000u };
+    it.init(bits2, ASMJIT_ARRAY_SIZE(bits2));
+
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 31);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 63);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 127);
+    EXPECT(!it.hasNext());
+
+    static const uint32_t bits3[] = { 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u };
+    it.init(bits3, ASMJIT_ARRAY_SIZE(bits3));
+    EXPECT(!it.hasNext());
+
+    static const uint32_t bits4[] = { 0x00000000u, 0x00000000u, 0x00000000u, 0x80000000u };
+    it.init(bits4, ASMJIT_ARRAY_SIZE(bits4));
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 127);
+    EXPECT(!it.hasNext());
+  }
+
+  INFO("Support::BitVectorIterator<uint64_t>");
+  {
+    static const uint64_t bits1[] = { 0x80000000u, 0x80000000u, 0x00000000u, 0x80000000u };
+    Support::BitVectorIterator<uint64_t> it(bits1, ASMJIT_ARRAY_SIZE(bits1));
+
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 31);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 95);
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 223);
+    EXPECT(!it.hasNext());
+
+    static const uint64_t bits2[] = { 0x8000000000000000u, 0, 0, 0 };
+    it.init(bits2, ASMJIT_ARRAY_SIZE(bits2));
+
+    EXPECT(it.hasNext());
+    EXPECT(it.next() == 63);
+    EXPECT(!it.hasNext());
+  }
+}
+
+static void testSorting() noexcept {
+  INFO("Support::qSort() - Testing qsort and isort of predefined arrays");
+  {
+    constexpr size_t kArraySize = 11;
+
+    int ref_[kArraySize] = { -4, -2, -1, 0, 1, 9, 12, 13, 14, 19, 22 };
+    int arr1[kArraySize] = { 0, 1, -1, 19, 22, 14, -4, 9, 12, 13, -2 };
+    int arr2[kArraySize];
+
+    memcpy(arr2, arr1, kArraySize * sizeof(int));
+
+    Support::iSort(arr1, kArraySize);
+    Support::qSort(arr2, kArraySize);
+    testArrays(arr1, ref_, kArraySize);
+    testArrays(arr2, ref_, kArraySize);
+  }
+
+  INFO("Support::qSort() - Testing qsort and isort of artificial arrays");
+  {
+    constexpr size_t kArraySize = 200;
+
+    int arr1[kArraySize];
+    int arr2[kArraySize];
+    int ref_[kArraySize];
+
+    for (size_t size = 2; size < kArraySize; size++) {
+      for (size_t i = 0; i < size; i++) {
+        arr1[i] = int(size - 1 - i);
+        arr2[i] = int(size - 1 - i);
+        ref_[i] = int(i);
+      }
+
+      Support::iSort(arr1, size);
+      Support::qSort(arr2, size);
+      testArrays(arr1, ref_, size);
+      testArrays(arr2, ref_, size);
+    }
+  }
+
+  INFO("Support::qSort() - Testing qsort and isort with an unstable compare function");
+  {
+    constexpr size_t kArraySize = 5;
+
+    float arr1[kArraySize] = { 1.0f, 0.0f, 3.0f, -1.0f, std::numeric_limits<float>::quiet_NaN() };
+    float arr2[kArraySize] = { };
+
+    memcpy(arr2, arr1, kArraySize * sizeof(float));
+
+    // We don't test as it's undefined where the NaN would be.
+    Support::iSort(arr1, kArraySize);
+    Support::qSort(arr2, kArraySize);
+  }
+}
+
+UNIT(support) {
+  testAlignment();
+  testBitUtils();
+  testIntUtils();
+  testReadWrite();
+  testBitVector();
+  testSorting();
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/support.h b/lib/lepton/asmjit/core/support.h
new file mode 100644
index 0000000000..e55b8084db
--- /dev/null
+++ b/lib/lepton/asmjit/core/support.h
@@ -0,0 +1,1773 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_SUPPORT_H_INCLUDED
+#define ASMJIT_CORE_SUPPORT_H_INCLUDED
+
+#include "../core/globals.h"
+
+#if defined(_MSC_VER)
+  #include <intrin.h>
+#endif
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_utilities
+//! \{
+
+//! Contains support classes and functions that may be used by AsmJit source and header files. Anything defined
+//! here is considered internal and should not be used outside of AsmJit and related projects like AsmTK.
+namespace Support {
+
+// Support - Basic Traits
+// ======================
+
+#if ASMJIT_ARCH_X86
+typedef uint8_t FastUInt8;
+#else
+typedef uint32_t FastUInt8;
+#endif
+
+//! \cond INTERNAL
+namespace Internal {
+  template<typename T, size_t Alignment>
+  struct AliasedUInt {};
+
+  template<> struct AliasedUInt<uint16_t, 2> { typedef uint16_t ASMJIT_MAY_ALIAS T; };
+  template<> struct AliasedUInt<uint32_t, 4> { typedef uint32_t ASMJIT_MAY_ALIAS T; };
+  template<> struct AliasedUInt<uint64_t, 8> { typedef uint64_t ASMJIT_MAY_ALIAS T; };
+
+  template<> struct AliasedUInt<uint16_t, 1> { typedef uint16_t ASMJIT_MAY_ALIAS ASMJIT_ALIGN_TYPE(T, 1); };
+  template<> struct AliasedUInt<uint32_t, 1> { typedef uint32_t ASMJIT_MAY_ALIAS ASMJIT_ALIGN_TYPE(T, 1); };
+  template<> struct AliasedUInt<uint32_t, 2> { typedef uint32_t ASMJIT_MAY_ALIAS ASMJIT_ALIGN_TYPE(T, 2); };
+  template<> struct AliasedUInt<uint64_t, 1> { typedef uint64_t ASMJIT_MAY_ALIAS ASMJIT_ALIGN_TYPE(T, 1); };
+  template<> struct AliasedUInt<uint64_t, 2> { typedef uint64_t ASMJIT_MAY_ALIAS ASMJIT_ALIGN_TYPE(T, 2); };
+  template<> struct AliasedUInt<uint64_t, 4> { typedef uint64_t ASMJIT_MAY_ALIAS ASMJIT_ALIGN_TYPE(T, 4); };
+
+  // StdInt    - Make an int-type by size (signed or unsigned) that is the
+  //             same as types defined by <stdint.h>.
+  // Int32Or64 - Make an int-type that has at least 32 bits: [u]int[32|64]_t.
+
+  template<size_t Size, unsigned Unsigned>
+  struct StdInt {}; // Fail if not specialized.
+
+  template<> struct StdInt<1, 0> { typedef int8_t   Type; };
+  template<> struct StdInt<1, 1> { typedef uint8_t  Type; };
+  template<> struct StdInt<2, 0> { typedef int16_t  Type; };
+  template<> struct StdInt<2, 1> { typedef uint16_t Type; };
+  template<> struct StdInt<4, 0> { typedef int32_t  Type; };
+  template<> struct StdInt<4, 1> { typedef uint32_t Type; };
+  template<> struct StdInt<8, 0> { typedef int64_t  Type; };
+  template<> struct StdInt<8, 1> { typedef uint64_t Type; };
+
+  template<typename T, int Unsigned = std::is_unsigned<T>::value>
+  struct Int32Or64 : public StdInt<sizeof(T) <= 4 ? size_t(4) : sizeof(T), Unsigned> {};
+}
+//! \endcond
+
+template<typename T>
+static constexpr bool isUnsigned() noexcept { return std::is_unsigned<T>::value; }
+
+//! Casts an integer `x` to either `int32_t` or `int64_t` depending on `T`.
+template<typename T>
+static constexpr typename Internal::Int32Or64<T, 0>::Type asInt(const T& x) noexcept {
+  return (typename Internal::Int32Or64<T, 0>::Type)x;
+}
+
+//! Casts an integer `x` to either `uint32_t` or `uint64_t` depending on `T`.
+template<typename T>
+static constexpr typename Internal::Int32Or64<T, 1>::Type asUInt(const T& x) noexcept {
+  return (typename Internal::Int32Or64<T, 1>::Type)x;
+}
+
+//! Casts an integer `x` to either `int32_t`, uint32_t`, `int64_t`, or `uint64_t` depending on `T`.
+template<typename T>
+static constexpr typename Internal::Int32Or64<T>::Type asNormalized(const T& x) noexcept {
+  return (typename Internal::Int32Or64<T>::Type)x;
+}
+
+//! Casts an integer `x` to the same type as defined by `<stdint.h>`.
+template<typename T>
+static constexpr typename Internal::StdInt<sizeof(T), isUnsigned<T>()>::Type asStdInt(const T& x) noexcept {
+  return (typename Internal::StdInt<sizeof(T), isUnsigned<T>()>::Type)x;
+}
+
+//! A helper class that can be used to iterate over enum values.
+template<typename T, T from = (T)0, T to = T::kMaxValue>
+struct EnumValues {
+  typedef typename std::underlying_type<T>::type ValueType;
+
+  struct Iterator {
+    ValueType value;
+
+    inline T operator*() const { return (T)value; }
+    inline void operator++() { ++value; }
+
+    inline bool operator==(const Iterator& other) const noexcept { return value == other.value; }
+    inline bool operator!=(const Iterator& other) const noexcept { return value != other.value; }
+  };
+
+  inline Iterator begin() const noexcept { return Iterator{ValueType(from)}; }
+  inline Iterator end() const noexcept { return Iterator{ValueType(to) + 1}; }
+};
+
+// Support - BitCast
+// =================
+
+//! \cond
+namespace Internal {
+  template<typename DstT, typename SrcT>
+  union BitCastUnion {
+    inline BitCastUnion(SrcT src) noexcept : src(src) {}
+    SrcT src;
+    DstT dst;
+  };
+}
+//! \endcond
+
+//! Bit-casts from `Src` type to `Dst` type.
+//!
+//! Useful to bit-cast between integers and floating points.
+template<typename Dst, typename Src>
+static inline Dst bitCast(const Src& x) noexcept { return Internal::BitCastUnion<Dst, Src>(x).dst; }
+
+// Support - BitOps
+// ================
+
+//! Storage used to store a pack of bits (should by compatible with a machine word).
+typedef Internal::StdInt<sizeof(uintptr_t), 1>::Type BitWord;
+
+template<typename T>
+static constexpr uint32_t bitSizeOf() noexcept { return uint32_t(sizeof(T) * 8u); }
+
+//! Number of bits stored in a single `BitWord`.
+static constexpr uint32_t kBitWordSizeInBits = bitSizeOf<BitWord>();
+
+//! Returns `0 - x` in a safe way (no undefined behavior), works for unsigned numbers as well.
+template<typename T>
+static constexpr T neg(const T& x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return T(U(0) - U(x));
+}
+
+template<typename T>
+static constexpr T allOnes() noexcept { return neg<T>(T(1)); }
+
+//! Returns `x << y` (shift left logical) by explicitly casting `x` to an unsigned type and back.
+template<typename X, typename Y>
+static constexpr X shl(const X& x, const Y& y) noexcept {
+  typedef typename std::make_unsigned<X>::type U;
+  return X(U(x) << y);
+}
+
+//! Returns `x >> y` (shift right logical) by explicitly casting `x` to an unsigned type and back.
+template<typename X, typename Y>
+static constexpr X shr(const X& x, const Y& y) noexcept {
+  typedef typename std::make_unsigned<X>::type U;
+  return X(U(x) >> y);
+}
+
+//! Returns `x >> y` (shift right arithmetic) by explicitly casting `x` to a signed type and back.
+template<typename X, typename Y>
+static constexpr X sar(const X& x, const Y& y) noexcept {
+  typedef typename std::make_signed<X>::type S;
+  return X(S(x) >> y);
+}
+
+template<typename X, typename Y>
+static constexpr X ror(const X& x, const Y& y) noexcept {
+  typedef typename std::make_unsigned<X>::type U;
+  return X((U(x) >> y) | (U(x) << (bitSizeOf<U>() - y)));
+}
+
+//! Returns `x | (x >> y)` - helper used by some bit manipulation helpers.
+template<typename X, typename Y>
+static constexpr X or_shr(const X& x, const Y& y) noexcept { return X(x | shr(x, y)); }
+
+//! Returns `x & -x` - extracts lowest set isolated bit (like BLSI instruction).
+template<typename T>
+static constexpr T blsi(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return T(U(x) & neg(U(x)));
+}
+
+//! Tests whether the given value `x` has `n`th bit set.
+template<typename T, typename IndexT>
+static constexpr bool bitTest(T x, IndexT n) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return (U(x) & (U(1) << asStdInt(n))) != 0;
+}
+
+// Tests whether the given `value` is a consecutive mask of bits that starts at
+// the least significant bit.
+template<typename T>
+static inline constexpr bool isLsbMask(const T& value) {
+  typedef typename std::make_unsigned<T>::type U;
+  return value && ((U(value) + 1u) & U(value)) == 0;
+}
+
+// Tests whether the given value contains at least one bit or whether it's a
+// bit-mask of consecutive bits.
+//
+// This function is similar to \ref isLsbMask(), but the mask doesn't have to
+// start at a least significant bit.
+template<typename T>
+static inline constexpr bool isConsecutiveMask(const T& value) {
+  typedef typename std::make_unsigned<T>::type U;
+  return value && isLsbMask((U(value) - 1u) | U(value));
+}
+
+//! Generates a trailing bit-mask that has `n` least significant (trailing) bits set.
+template<typename T, typename CountT>
+static constexpr T lsbMask(const CountT& n) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return (sizeof(U) < sizeof(uintptr_t))
+    // Prevent undefined behavior by using a larger type than T.
+    ? T(U((uintptr_t(1) << n) - uintptr_t(1)))
+    // Prevent undefined behavior by checking `n` before shift.
+    : n ? T(shr(allOnes<T>(), bitSizeOf<T>() - size_t(n))) : T(0);
+}
+
+//! Generats a leading bit-mask that has `n` most significant (leading) bits set.
+template<typename T, typename CountT>
+static constexpr T msbMask(const CountT& n) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return (sizeof(U) < sizeof(uintptr_t))
+    // Prevent undefined behavior by using a larger type than T.
+    ? T(allOnes<uintptr_t>() >> (bitSizeOf<uintptr_t>() - n))
+    // Prevent undefined behavior by performing `n & (nBits - 1)` so it's always within the range.
+    : T(sar(U(n != 0) << (bitSizeOf<U>() - 1), n ? uint32_t(n - 1) : uint32_t(0)));
+}
+
+//! Returns a bit-mask that has `x` bit set.
+template<typename Index>
+static constexpr uint32_t bitMask(const Index& x) noexcept { return (1u << asUInt(x)); }
+
+//! Returns a bit-mask that has `x` bit set (multiple arguments).
+template<typename Index, typename... Args>
+static constexpr uint32_t bitMask(const Index& x, Args... args) noexcept { return bitMask(x) | bitMask(args...); }
+
+//! Converts a boolean value `b` to zero or full mask (all bits set).
+template<typename DstT, typename SrcT>
+static constexpr DstT bitMaskFromBool(SrcT b) noexcept {
+  typedef typename std::make_unsigned<DstT>::type U;
+  return DstT(U(0) - U(b));
+}
+
+//! Tests whether `a & b` is non-zero.
+template<typename A, typename B>
+static inline constexpr bool test(A a, B b) noexcept { return (asUInt(a) & asUInt(b)) != 0; }
+
+//! \cond
+namespace Internal {
+  // Fills all trailing bits right from the first most significant bit set.
+  static constexpr uint8_t fillTrailingBitsImpl(uint8_t x) noexcept { return or_shr(or_shr(or_shr(x, 1), 2), 4); }
+  // Fills all trailing bits right from the first most significant bit set.
+  static constexpr uint16_t fillTrailingBitsImpl(uint16_t x) noexcept { return or_shr(or_shr(or_shr(or_shr(x, 1), 2), 4), 8); }
+  // Fills all trailing bits right from the first most significant bit set.
+  static constexpr uint32_t fillTrailingBitsImpl(uint32_t x) noexcept { return or_shr(or_shr(or_shr(or_shr(or_shr(x, 1), 2), 4), 8), 16); }
+  // Fills all trailing bits right from the first most significant bit set.
+  static constexpr uint64_t fillTrailingBitsImpl(uint64_t x) noexcept { return or_shr(or_shr(or_shr(or_shr(or_shr(or_shr(x, 1), 2), 4), 8), 16), 32); }
+}
+//! \endcond
+
+// Fills all trailing bits right from the first most significant bit set.
+template<typename T>
+static constexpr T fillTrailingBits(const T& x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return T(Internal::fillTrailingBitsImpl(U(x)));
+}
+
+// Support - Count Leading/Trailing Zeros
+// ======================================
+
+//! \cond
+namespace Internal {
+namespace {
+
+template<typename T>
+struct BitScanData { T x; uint32_t n; };
+
+template<typename T, uint32_t N>
+struct BitScanCalc {
+  static constexpr BitScanData<T> advanceLeft(const BitScanData<T>& data, uint32_t n) noexcept {
+    return BitScanData<T> { data.x << n, data.n + n };
+  }
+
+  static constexpr BitScanData<T> advanceRight(const BitScanData<T>& data, uint32_t n) noexcept {
+    return BitScanData<T> { data.x >> n, data.n + n };
+  }
+
+  static constexpr BitScanData<T> clz(const BitScanData<T>& data) noexcept {
+    return BitScanCalc<T, N / 2>::clz(advanceLeft(data, data.x & (allOnes<T>() << (bitSizeOf<T>() - N)) ? uint32_t(0) : N));
+  }
+
+  static constexpr BitScanData<T> ctz(const BitScanData<T>& data) noexcept {
+    return BitScanCalc<T, N / 2>::ctz(advanceRight(data, data.x & (allOnes<T>() >> (bitSizeOf<T>() - N)) ? uint32_t(0) : N));
+  }
+};
+
+template<typename T>
+struct BitScanCalc<T, 0> {
+  static constexpr BitScanData<T> clz(const BitScanData<T>& ctx) noexcept {
+    return BitScanData<T> { 0, ctx.n - uint32_t(ctx.x >> (bitSizeOf<T>() - 1)) };
+  }
+
+  static constexpr BitScanData<T> ctz(const BitScanData<T>& ctx) noexcept {
+    return BitScanData<T> { 0, ctx.n - uint32_t(ctx.x & 0x1) };
+  }
+};
+
+template<typename T>
+constexpr uint32_t clzFallback(const T& x) noexcept {
+  return BitScanCalc<T, bitSizeOf<T>() / 2u>::clz(BitScanData<T>{x, 1}).n;
+}
+
+template<typename T>
+constexpr uint32_t ctzFallback(const T& x) noexcept {
+  return BitScanCalc<T, bitSizeOf<T>() / 2u>::ctz(BitScanData<T>{x, 1}).n;
+}
+
+template<typename T> inline uint32_t clzImpl(const T& x) noexcept { return clzFallback(asUInt(x)); }
+template<typename T> inline uint32_t ctzImpl(const T& x) noexcept { return ctzFallback(asUInt(x)); }
+
+#if !defined(ASMJIT_NO_INTRINSICS)
+# if defined(__GNUC__)
+template<> inline uint32_t clzImpl(const uint32_t& x) noexcept { return uint32_t(__builtin_clz(x)); }
+template<> inline uint32_t clzImpl(const uint64_t& x) noexcept { return uint32_t(__builtin_clzll(x)); }
+template<> inline uint32_t ctzImpl(const uint32_t& x) noexcept { return uint32_t(__builtin_ctz(x)); }
+template<> inline uint32_t ctzImpl(const uint64_t& x) noexcept { return uint32_t(__builtin_ctzll(x)); }
+# elif defined(_MSC_VER)
+template<> inline uint32_t clzImpl(const uint32_t& x) noexcept { unsigned long i; _BitScanReverse(&i, x); return uint32_t(i ^ 31); }
+template<> inline uint32_t ctzImpl(const uint32_t& x) noexcept { unsigned long i; _BitScanForward(&i, x); return uint32_t(i); }
+#  if ASMJIT_ARCH_X86 == 64 || ASMJIT_ARCH_ARM == 64
+template<> inline uint32_t clzImpl(const uint64_t& x) noexcept { unsigned long i; _BitScanReverse64(&i, x); return uint32_t(i ^ 63); }
+template<> inline uint32_t ctzImpl(const uint64_t& x) noexcept { unsigned long i; _BitScanForward64(&i, x); return uint32_t(i); }
+#  endif
+# endif
+#endif
+
+} // {anonymous}
+} // {Internal}
+//! \endcond
+
+//! Count leading zeros in `x` (returns a position of a first bit set in `x`).
+//!
+//! \note The input MUST NOT be zero, otherwise the result is undefined.
+template<typename T>
+static inline uint32_t clz(T x) noexcept { return Internal::clzImpl(asUInt(x)); }
+
+//! Count trailing zeros in `x` (returns a position of a first bit set in `x`).
+//!
+//! \note The input MUST NOT be zero, otherwise the result is undefined.
+template<typename T>
+static inline uint32_t ctz(T x) noexcept { return Internal::ctzImpl(asUInt(x)); }
+
+template<uint64_t kInput>
+struct ConstCTZ {
+  static constexpr uint32_t value =
+    (kInput & (uint64_t(1) <<  0)) ?  0 :
+    (kInput & (uint64_t(1) <<  1)) ?  1 :
+    (kInput & (uint64_t(1) <<  2)) ?  2 :
+    (kInput & (uint64_t(1) <<  3)) ?  3 :
+    (kInput & (uint64_t(1) <<  4)) ?  4 :
+    (kInput & (uint64_t(1) <<  5)) ?  5 :
+    (kInput & (uint64_t(1) <<  6)) ?  6 :
+    (kInput & (uint64_t(1) <<  7)) ?  7 :
+    (kInput & (uint64_t(1) <<  8)) ?  8 :
+    (kInput & (uint64_t(1) <<  9)) ?  9 :
+    (kInput & (uint64_t(1) << 10)) ? 10 :
+    (kInput & (uint64_t(1) << 11)) ? 11 :
+    (kInput & (uint64_t(1) << 12)) ? 12 :
+    (kInput & (uint64_t(1) << 13)) ? 13 :
+    (kInput & (uint64_t(1) << 14)) ? 14 :
+    (kInput & (uint64_t(1) << 15)) ? 15 :
+    (kInput & (uint64_t(1) << 16)) ? 16 :
+    (kInput & (uint64_t(1) << 17)) ? 17 :
+    (kInput & (uint64_t(1) << 18)) ? 18 :
+    (kInput & (uint64_t(1) << 19)) ? 19 :
+    (kInput & (uint64_t(1) << 20)) ? 20 :
+    (kInput & (uint64_t(1) << 21)) ? 21 :
+    (kInput & (uint64_t(1) << 22)) ? 22 :
+    (kInput & (uint64_t(1) << 23)) ? 23 :
+    (kInput & (uint64_t(1) << 24)) ? 24 :
+    (kInput & (uint64_t(1) << 25)) ? 25 :
+    (kInput & (uint64_t(1) << 26)) ? 26 :
+    (kInput & (uint64_t(1) << 27)) ? 27 :
+    (kInput & (uint64_t(1) << 28)) ? 28 :
+    (kInput & (uint64_t(1) << 29)) ? 29 :
+    (kInput & (uint64_t(1) << 30)) ? 30 :
+    (kInput & (uint64_t(1) << 31)) ? 31 :
+    (kInput & (uint64_t(1) << 32)) ? 32 :
+    (kInput & (uint64_t(1) << 33)) ? 33 :
+    (kInput & (uint64_t(1) << 34)) ? 34 :
+    (kInput & (uint64_t(1) << 35)) ? 35 :
+    (kInput & (uint64_t(1) << 36)) ? 36 :
+    (kInput & (uint64_t(1) << 37)) ? 37 :
+    (kInput & (uint64_t(1) << 38)) ? 38 :
+    (kInput & (uint64_t(1) << 39)) ? 39 :
+    (kInput & (uint64_t(1) << 40)) ? 40 :
+    (kInput & (uint64_t(1) << 41)) ? 41 :
+    (kInput & (uint64_t(1) << 42)) ? 42 :
+    (kInput & (uint64_t(1) << 43)) ? 43 :
+    (kInput & (uint64_t(1) << 44)) ? 44 :
+    (kInput & (uint64_t(1) << 45)) ? 45 :
+    (kInput & (uint64_t(1) << 46)) ? 46 :
+    (kInput & (uint64_t(1) << 47)) ? 47 :
+    (kInput & (uint64_t(1) << 48)) ? 48 :
+    (kInput & (uint64_t(1) << 49)) ? 49 :
+    (kInput & (uint64_t(1) << 50)) ? 50 :
+    (kInput & (uint64_t(1) << 51)) ? 51 :
+    (kInput & (uint64_t(1) << 52)) ? 52 :
+    (kInput & (uint64_t(1) << 53)) ? 53 :
+    (kInput & (uint64_t(1) << 54)) ? 54 :
+    (kInput & (uint64_t(1) << 55)) ? 55 :
+    (kInput & (uint64_t(1) << 56)) ? 56 :
+    (kInput & (uint64_t(1) << 57)) ? 57 :
+    (kInput & (uint64_t(1) << 58)) ? 58 :
+    (kInput & (uint64_t(1) << 59)) ? 59 :
+    (kInput & (uint64_t(1) << 60)) ? 60 :
+    (kInput & (uint64_t(1) << 61)) ? 61 :
+    (kInput & (uint64_t(1) << 62)) ? 62 :
+    (kInput & (uint64_t(1) << 63)) ? 63 : 64;
+};
+
+// Support - PopCnt
+// ================
+
+// Based on the following resource:
+//   http://graphics.stanford.edu/~seander/bithacks.html
+//
+// Alternatively, for a very small number of bits in `x`:
+//   uint32_t n = 0;
+//   while (x) {
+//     x &= x - 1;
+//     n++;
+//   }
+//   return n;
+
+//! \cond
+namespace Internal {
+  static inline uint32_t constPopcntImpl(uint32_t x) noexcept {
+    x = x - ((x >> 1) & 0x55555555u);
+    x = (x & 0x33333333u) + ((x >> 2) & 0x33333333u);
+    return (((x + (x >> 4)) & 0x0F0F0F0Fu) * 0x01010101u) >> 24;
+  }
+
+  static inline uint32_t constPopcntImpl(uint64_t x) noexcept {
+    if (ASMJIT_ARCH_BITS >= 64) {
+      x = x - ((x >> 1) & 0x5555555555555555u);
+      x = (x & 0x3333333333333333u) + ((x >> 2) & 0x3333333333333333u);
+      return uint32_t((((x + (x >> 4)) & 0x0F0F0F0F0F0F0F0Fu) * 0x0101010101010101u) >> 56);
+    }
+    else {
+      return constPopcntImpl(uint32_t(x >> 32)) +
+             constPopcntImpl(uint32_t(x & 0xFFFFFFFFu));
+    }
+  }
+
+  static inline uint32_t popcntImpl(uint32_t x) noexcept {
+  #if defined(__GNUC__)
+    return uint32_t(__builtin_popcount(x));
+  #else
+    return constPopcntImpl(asUInt(x));
+  #endif
+  }
+
+  static inline uint32_t popcntImpl(uint64_t x) noexcept {
+  #if defined(__GNUC__)
+    return uint32_t(__builtin_popcountll(x));
+  #else
+    return constPopcntImpl(asUInt(x));
+  #endif
+  }
+}
+//! \endcond
+
+//! Calculates count of bits in `x`.
+template<typename T>
+static inline uint32_t popcnt(T x) noexcept { return Internal::popcntImpl(asUInt(x)); }
+
+//! Calculates count of bits in `x` (useful in constant expressions).
+template<typename T>
+static inline uint32_t constPopcnt(T x) noexcept { return Internal::constPopcntImpl(asUInt(x)); }
+
+// Support - Min/Max
+// =================
+
+// NOTE: These are constexpr `min()` and `max()` implementations that are not
+// exactly the same as `std::min()` and `std::max()`. The return value is not
+// a reference to `a` or `b` but it's a new value instead.
+
+template<typename T>
+static constexpr T min(const T& a, const T& b) noexcept { return b < a ? b : a; }
+
+template<typename T, typename... Args>
+static constexpr T min(const T& a, const T& b, Args&&... args) noexcept { return min(min(a, b), std::forward<Args>(args)...); }
+
+template<typename T>
+static constexpr T max(const T& a, const T& b) noexcept { return a < b ? b : a; }
+
+template<typename T, typename... Args>
+static constexpr T max(const T& a, const T& b, Args&&... args) noexcept { return max(max(a, b), std::forward<Args>(args)...); }
+
+// Support - Immediate Helpers
+// ===========================
+
+namespace Internal {
+  template<typename T, bool IsFloat>
+  struct ImmConv {
+    static inline int64_t fromT(const T& x) noexcept { return int64_t(x); }
+    static inline T toT(int64_t x) noexcept { return T(uint64_t(x) & Support::allOnes<typename std::make_unsigned<T>::type>()); }
+  };
+
+  template<typename T>
+  struct ImmConv<T, true> {
+    static inline int64_t fromT(const T& x) noexcept { return int64_t(bitCast<int64_t>(double(x))); }
+    static inline T toT(int64_t x) noexcept { return T(bitCast<double>(x)); }
+  };
+}
+
+template<typename T>
+static inline int64_t immediateFromT(const T& x) noexcept { return Internal::ImmConv<T, std::is_floating_point<T>::value>::fromT(x); }
+
+template<typename T>
+static inline T immediateToT(int64_t x) noexcept { return Internal::ImmConv<T, std::is_floating_point<T>::value>::toT(x); }
+
+// Support - Overflow Arithmetic
+// =============================
+
+//! \cond
+namespace Internal {
+  template<typename T>
+  inline T addOverflowFallback(T x, T y, FastUInt8* of) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+
+    U result = U(x) + U(y);
+    *of = FastUInt8(*of | FastUInt8(isUnsigned<T>() ? result < U(x) : T((U(x) ^ ~U(y)) & (U(x) ^ result)) < 0));
+    return T(result);
+  }
+
+  template<typename T>
+  inline T subOverflowFallback(T x, T y, FastUInt8* of) noexcept {
+    typedef typename std::make_unsigned<T>::type U;
+
+    U result = U(x) - U(y);
+    *of = FastUInt8(*of | FastUInt8(isUnsigned<T>() ? result > U(x) : T((U(x) ^ U(y)) & (U(x) ^ result)) < 0));
+    return T(result);
+  }
+
+  template<typename T>
+  inline T mulOverflowFallback(T x, T y, FastUInt8* of) noexcept {
+    typedef typename Internal::StdInt<sizeof(T) * 2, isUnsigned<T>()>::Type I;
+    typedef typename std::make_unsigned<I>::type U;
+
+    U mask = allOnes<U>();
+    if (std::is_signed<T>::value) {
+      U prod = U(I(x)) * U(I(y));
+      *of = FastUInt8(*of | FastUInt8(I(prod) < I(std::numeric_limits<T>::lowest()) || I(prod) > I(std::numeric_limits<T>::max())));
+      return T(I(prod & mask));
+    }
+    else {
+      U prod = U(x) * U(y);
+      *of = FastUInt8(*of | FastUInt8((prod & ~mask) != 0));
+      return T(prod & mask);
+    }
+  }
+
+  template<>
+  inline int64_t mulOverflowFallback(int64_t x, int64_t y, FastUInt8* of) noexcept {
+    int64_t result = int64_t(uint64_t(x) * uint64_t(y));
+    *of = FastUInt8(*of | FastUInt8(x && (result / x != y)));
+    return result;
+  }
+
+  template<>
+  inline uint64_t mulOverflowFallback(uint64_t x, uint64_t y, FastUInt8* of) noexcept {
+    uint64_t result = x * y;
+    *of = FastUInt8(*of | FastUInt8(y != 0 && allOnes<uint64_t>() / y < x));
+    return result;
+  }
+
+  // These can be specialized.
+  template<typename T> inline T addOverflowImpl(const T& x, const T& y, FastUInt8* of) noexcept { return addOverflowFallback(x, y, of); }
+  template<typename T> inline T subOverflowImpl(const T& x, const T& y, FastUInt8* of) noexcept { return subOverflowFallback(x, y, of); }
+  template<typename T> inline T mulOverflowImpl(const T& x, const T& y, FastUInt8* of) noexcept { return mulOverflowFallback(x, y, of); }
+
+  #if defined(__GNUC__) && !defined(ASMJIT_NO_INTRINSICS)
+  #if defined(__clang__) || __GNUC__ >= 5
+  #define ASMJIT_ARITH_OVERFLOW_SPECIALIZE(FUNC, T, RESULT_T, BUILTIN)     \
+    template<>                                                             \
+    inline T FUNC(const T& x, const T& y, FastUInt8* of) noexcept {        \
+      RESULT_T result;                                                     \
+      *of = FastUInt8(*of | (BUILTIN((RESULT_T)x, (RESULT_T)y, &result))); \
+      return T(result);                                                    \
+    }
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, int32_t , int               , __builtin_sadd_overflow  )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint32_t, unsigned int      , __builtin_uadd_overflow  )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, int64_t , long long         , __builtin_saddll_overflow)
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint64_t, unsigned long long, __builtin_uaddll_overflow)
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, int32_t , int               , __builtin_ssub_overflow  )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint32_t, unsigned int      , __builtin_usub_overflow  )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, int64_t , long long         , __builtin_ssubll_overflow)
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint64_t, unsigned long long, __builtin_usubll_overflow)
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, int32_t , int               , __builtin_smul_overflow  )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, uint32_t, unsigned int      , __builtin_umul_overflow  )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, int64_t , long long         , __builtin_smulll_overflow)
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, uint64_t, unsigned long long, __builtin_umulll_overflow)
+  #undef ASMJIT_ARITH_OVERFLOW_SPECIALIZE
+  #endif
+  #endif
+
+  // There is a bug in MSVC that makes these specializations unusable, maybe in the future...
+  #if defined(_MSC_VER) && 0
+  #define ASMJIT_ARITH_OVERFLOW_SPECIALIZE(FUNC, T, ALT_T, BUILTIN)        \
+    template<>                                                             \
+    inline T FUNC(T x, T y, FastUInt8* of) noexcept {                      \
+      ALT_T result;                                                        \
+      *of = FastUInt8(*of | BUILTIN(0, (ALT_T)x, (ALT_T)y, &result));      \
+      return T(result);                                                    \
+    }
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint32_t, unsigned int      , _addcarry_u32 )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint32_t, unsigned int      , _subborrow_u32)
+  #if ARCH_BITS >= 64
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint64_t, unsigned __int64  , _addcarry_u64 )
+  ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint64_t, unsigned __int64  , _subborrow_u64)
+  #endif
+  #undef ASMJIT_ARITH_OVERFLOW_SPECIALIZE
+  #endif
+} // {Internal}
+//! \endcond
+
+template<typename T>
+static inline T addOverflow(const T& x, const T& y, FastUInt8* of) noexcept { return T(Internal::addOverflowImpl(asStdInt(x), asStdInt(y), of)); }
+
+template<typename T>
+static inline T subOverflow(const T& x, const T& y, FastUInt8* of) noexcept { return T(Internal::subOverflowImpl(asStdInt(x), asStdInt(y), of)); }
+
+template<typename T>
+static inline T mulOverflow(const T& x, const T& y, FastUInt8* of) noexcept { return T(Internal::mulOverflowImpl(asStdInt(x), asStdInt(y), of)); }
+
+// Support - Alignment
+// ===================
+
+template<typename X, typename Y>
+static constexpr bool isAligned(X base, Y alignment) noexcept {
+  typedef typename Internal::StdInt<sizeof(X), 1>::Type U;
+  return ((U)base % (U)alignment) == 0;
+}
+
+//! Tests whether the `x` is a power of two (only one bit is set).
+template<typename T>
+static constexpr bool isPowerOf2(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+  return x && !(U(x) & (U(x) - U(1)));
+}
+
+template<typename X, typename Y>
+static constexpr X alignUp(X x, Y alignment) noexcept {
+  typedef typename Internal::StdInt<sizeof(X), 1>::Type U;
+  return (X)( ((U)x + ((U)(alignment) - 1u)) & ~((U)(alignment) - 1u) );
+}
+
+template<typename T>
+static constexpr T alignUpPowerOf2(T x) noexcept {
+  typedef typename Internal::StdInt<sizeof(T), 1>::Type U;
+  return (T)(fillTrailingBits(U(x) - 1u) + 1u);
+}
+
+//! Returns either zero or a positive difference between `base` and `base` when
+//! aligned to `alignment`.
+template<typename X, typename Y>
+static constexpr typename Internal::StdInt<sizeof(X), 1>::Type alignUpDiff(X base, Y alignment) noexcept {
+  typedef typename Internal::StdInt<sizeof(X), 1>::Type U;
+  return alignUp(U(base), alignment) - U(base);
+}
+
+template<typename X, typename Y>
+static constexpr X alignDown(X x, Y alignment) noexcept {
+  typedef typename Internal::StdInt<sizeof(X), 1>::Type U;
+  return (X)( (U)x & ~((U)(alignment) - 1u) );
+}
+
+// Support - NumGranularized
+// =========================
+
+//! Calculates the number of elements that would be required if `base` is
+//! granularized by `granularity`. This function can be used to calculate
+//! the number of BitWords to represent N bits, for example.
+template<typename X, typename Y>
+static constexpr X numGranularized(X base, Y granularity) noexcept {
+  typedef typename Internal::StdInt<sizeof(X), 1>::Type U;
+  return X((U(base) + U(granularity) - 1) / U(granularity));
+}
+
+// Support - IsBetween
+// ===================
+
+//! Checks whether `x` is greater than or equal to `a` and lesser than or equal to `b`.
+template<typename T>
+static constexpr bool isBetween(const T& x, const T& a, const T& b) noexcept {
+  return x >= a && x <= b;
+}
+
+// Support - IsInt & IsUInt
+// ========================
+
+//! Checks whether the given integer `x` can be casted to a 4-bit signed integer.
+template<typename T>
+static constexpr bool isInt4(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? isBetween<S>(S(x), -8, 7) : U(x) <= U(7u);
+}
+
+//! Checks whether the given integer `x` can be casted to a 7-bit signed integer.
+template<typename T>
+static constexpr bool isInt7(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? isBetween<S>(S(x), -64, 63) : U(x) <= U(63u);
+}
+
+//! Checks whether the given integer `x` can be casted to an 8-bit signed integer.
+template<typename T>
+static constexpr bool isInt8(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? sizeof(T) <= 1 || isBetween<S>(S(x), -128, 127) : U(x) <= U(127u);
+}
+
+//! Checks whether the given integer `x` can be casted to a 9-bit signed integer.
+template<typename T>
+static constexpr bool isInt9(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? sizeof(T) <= 1 || isBetween<S>(S(x), -256, 255)
+                                  : sizeof(T) <= 1 || U(x) <= U(255u);
+}
+
+//! Checks whether the given integer `x` can be casted to a 10-bit signed integer.
+template<typename T>
+static constexpr bool isInt10(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? sizeof(T) <= 1 || isBetween<S>(S(x), -512, 511)
+                                  : sizeof(T) <= 1 || U(x) <= U(511u);
+}
+
+//! Checks whether the given integer `x` can be casted to a 16-bit signed integer.
+template<typename T>
+static constexpr bool isInt16(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? sizeof(T) <= 2 || isBetween<S>(S(x), -32768, 32767)
+                                  : sizeof(T) <= 1 || U(x) <= U(32767u);
+}
+
+//! Checks whether the given integer `x` can be casted to a 32-bit signed integer.
+template<typename T>
+static constexpr bool isInt32(T x) noexcept {
+  typedef typename std::make_signed<T>::type S;
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? sizeof(T) <= 4 || isBetween<S>(S(x), -2147483647 - 1, 2147483647)
+                                  : sizeof(T) <= 2 || U(x) <= U(2147483647u);
+}
+
+//! Checks whether the given integer `x` can be casted to a 4-bit unsigned integer.
+template<typename T>
+static constexpr bool isUInt4(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? x >= T(0) && x <= T(15)
+                                  : U(x) <= U(15u);
+}
+
+//! Checks whether the given integer `x` can be casted to an 8-bit unsigned integer.
+template<typename T>
+static constexpr bool isUInt8(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? (sizeof(T) <= 1 || T(x) <= T(255)) && x >= T(0)
+                                  : (sizeof(T) <= 1 || U(x) <= U(255u));
+}
+
+//! Checks whether the given integer `x` can be casted to a 12-bit unsigned integer (ARM specific).
+template<typename T>
+static constexpr bool isUInt12(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? (sizeof(T) <= 1 || T(x) <= T(4095)) && x >= T(0)
+                                  : (sizeof(T) <= 1 || U(x) <= U(4095u));
+}
+
+//! Checks whether the given integer `x` can be casted to a 16-bit unsigned integer.
+template<typename T>
+static constexpr bool isUInt16(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? (sizeof(T) <= 2 || T(x) <= T(65535)) && x >= T(0)
+                                  : (sizeof(T) <= 2 || U(x) <= U(65535u));
+}
+
+//! Checks whether the given integer `x` can be casted to a 32-bit unsigned integer.
+template<typename T>
+static constexpr bool isUInt32(T x) noexcept {
+  typedef typename std::make_unsigned<T>::type U;
+
+  return std::is_signed<T>::value ? (sizeof(T) <= 4 || T(x) <= T(4294967295u)) && x >= T(0)
+                                  : (sizeof(T) <= 4 || U(x) <= U(4294967295u));
+}
+
+//! Checks whether the given integer `x` can be casted to a 32-bit unsigned integer.
+template<typename T>
+static constexpr bool isIntOrUInt32(T x) noexcept {
+  return sizeof(T) <= 4 ? true : (uint32_t(uint64_t(x) >> 32) + 1u) <= 1u;
+}
+
+static bool inline isEncodableOffset32(int32_t offset, uint32_t nBits) noexcept {
+  uint32_t nRev = 32 - nBits;
+  return Support::sar(Support::shl(offset, nRev), nRev) == offset;
+}
+
+static bool inline isEncodableOffset64(int64_t offset, uint32_t nBits) noexcept {
+  uint32_t nRev = 64 - nBits;
+  return Support::sar(Support::shl(offset, nRev), nRev) == offset;
+}
+
+// Support - ByteSwap
+// ==================
+
+static inline uint16_t byteswap16(uint16_t x) noexcept {
+  return uint16_t(((x >> 8) & 0xFFu) | ((x & 0xFFu) << 8));
+}
+
+static inline uint32_t byteswap32(uint32_t x) noexcept {
+  return (x << 24) | (x >> 24) | ((x << 8) & 0x00FF0000u) | ((x >> 8) & 0x0000FF00);
+}
+
+static inline uint64_t byteswap64(uint64_t x) noexcept {
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(ASMJIT_NO_INTRINSICS)
+  return uint64_t(__builtin_bswap64(uint64_t(x)));
+#elif defined(_MSC_VER) && !defined(ASMJIT_NO_INTRINSICS)
+  return uint64_t(_byteswap_uint64(uint64_t(x)));
+#else
+  return (uint64_t(byteswap32(uint32_t(uint64_t(x) >> 32        )))      ) |
+         (uint64_t(byteswap32(uint32_t(uint64_t(x) & 0xFFFFFFFFu))) << 32) ;
+#endif
+}
+
+// Support - BytePack & Unpack
+// ===========================
+
+//! Pack four 8-bit integer into a 32-bit integer as it is an array of `{b0,b1,b2,b3}`.
+static constexpr uint32_t bytepack32_4x8(uint32_t a, uint32_t b, uint32_t c, uint32_t d) noexcept {
+  return ASMJIT_ARCH_LE ? (a | (b << 8) | (c << 16) | (d << 24))
+                        : (d | (c << 8) | (b << 16) | (a << 24));
+}
+
+template<typename T>
+static constexpr uint32_t unpackU32At0(T x) noexcept { return ASMJIT_ARCH_LE ? uint32_t(uint64_t(x) & 0xFFFFFFFFu) : uint32_t(uint64_t(x) >> 32); }
+template<typename T>
+static constexpr uint32_t unpackU32At1(T x) noexcept { return ASMJIT_ARCH_BE ? uint32_t(uint64_t(x) & 0xFFFFFFFFu) : uint32_t(uint64_t(x) >> 32); }
+
+// Support - Position of byte (in bit-shift)
+// =========================================
+
+static inline uint32_t byteShiftOfDWordStruct(uint32_t index) noexcept {
+  return ASMJIT_ARCH_LE ? index * 8 : (uint32_t(sizeof(uint32_t)) - 1u - index) * 8;
+}
+
+// Support - String Utilities
+// ==========================
+
+template<typename T>
+static constexpr T asciiToLower(T c) noexcept { return T(c ^ T(T(c >= T('A') && c <= T('Z')) << 5)); }
+
+template<typename T>
+static constexpr T asciiToUpper(T c) noexcept { return T(c ^ T(T(c >= T('a') && c <= T('z')) << 5)); }
+
+static ASMJIT_FORCE_INLINE size_t strLen(const char* s, size_t maxSize) noexcept {
+  size_t i = 0;
+  while (i < maxSize && s[i] != '\0')
+    i++;
+  return i;
+}
+
+static constexpr uint32_t hashRound(uint32_t hash, uint32_t c) noexcept { return hash * 65599 + c; }
+
+// Gets a hash of the given string `data` of size `size`. Size must be valid
+// as this function doesn't check for a null terminator and allows it in the
+// middle of the string.
+static inline uint32_t hashString(const char* data, size_t size) noexcept {
+  uint32_t hashCode = 0;
+  for (uint32_t i = 0; i < size; i++)
+    hashCode = hashRound(hashCode, uint8_t(data[i]));
+  return hashCode;
+}
+
+static ASMJIT_FORCE_INLINE const char* findPackedString(const char* p, uint32_t id) noexcept {
+  uint32_t i = 0;
+  while (i < id) {
+    while (p[0])
+      p++;
+    p++;
+    i++;
+  }
+  return p;
+}
+
+//! Compares two instruction names.
+//!
+//! `a` is a null terminated instruction name from arch-specific `nameData[]`
+//! table. `b` is a possibly non-null terminated instruction name passed to
+//! `InstAPI::stringToInstId()`.
+static ASMJIT_FORCE_INLINE int cmpInstName(const char* a, const char* b, size_t size) noexcept {
+  for (size_t i = 0; i < size; i++) {
+    int c = int(uint8_t(a[i])) - int(uint8_t(b[i]));
+    if (c != 0) return c;
+  }
+  return int(uint8_t(a[size]));
+}
+
+// Support - Memory Read Access - 8 Bits
+// =====================================
+
+static inline uint8_t readU8(const void* p) noexcept { return static_cast<const uint8_t*>(p)[0]; }
+static inline int8_t readI8(const void* p) noexcept { return static_cast<const int8_t*>(p)[0]; }
+
+// Support - Memory Read Access - 16 Bits
+// ======================================
+
+template<ByteOrder BO, size_t Alignment>
+static inline uint16_t readU16x(const void* p) noexcept {
+  typedef typename Internal::AliasedUInt<uint16_t, Alignment>::T U16AlignedToN;
+  uint16_t x = static_cast<const U16AlignedToN*>(p)[0];
+  return BO == ByteOrder::kNative ? x : byteswap16(x);
+}
+
+template<size_t Alignment = 1>
+static inline uint16_t readU16u(const void* p) noexcept { return readU16x<ByteOrder::kNative, Alignment>(p); }
+template<size_t Alignment = 1>
+static inline uint16_t readU16uLE(const void* p) noexcept { return readU16x<ByteOrder::kLE, Alignment>(p); }
+template<size_t Alignment = 1>
+static inline uint16_t readU16uBE(const void* p) noexcept { return readU16x<ByteOrder::kBE, Alignment>(p); }
+
+static inline uint16_t readU16a(const void* p) noexcept { return readU16x<ByteOrder::kNative, 2>(p); }
+static inline uint16_t readU16aLE(const void* p) noexcept { return readU16x<ByteOrder::kLE, 2>(p); }
+static inline uint16_t readU16aBE(const void* p) noexcept { return readU16x<ByteOrder::kBE, 2>(p); }
+
+template<ByteOrder BO, size_t Alignment>
+static inline int16_t readI16x(const void* p) noexcept { return int16_t(readU16x<BO, Alignment>(p)); }
+
+template<size_t Alignment = 1>
+static inline int16_t readI16u(const void* p) noexcept { return int16_t(readU16x<ByteOrder::kNative, Alignment>(p)); }
+template<size_t Alignment = 1>
+static inline int16_t readI16uLE(const void* p) noexcept { return int16_t(readU16x<ByteOrder::kLE, Alignment>(p)); }
+template<size_t Alignment = 1>
+static inline int16_t readI16uBE(const void* p) noexcept { return int16_t(readU16x<ByteOrder::kBE, Alignment>(p)); }
+
+static inline int16_t readI16a(const void* p) noexcept { return int16_t(readU16x<ByteOrder::kNative, 2>(p)); }
+static inline int16_t readI16aLE(const void* p) noexcept { return int16_t(readU16x<ByteOrder::kLE, 2>(p)); }
+static inline int16_t readI16aBE(const void* p) noexcept { return int16_t(readU16x<ByteOrder::kBE, 2>(p)); }
+
+// Support - Memory Read Access - 24 Bits
+// ======================================
+
+template<ByteOrder BO = ByteOrder::kNative>
+static inline uint32_t readU24u(const void* p) noexcept {
+  uint32_t b0 = readU8(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 2 : 0));
+  uint32_t b1 = readU8(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 1 : 1));
+  uint32_t b2 = readU8(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 0 : 2));
+  return (b0 << 16) | (b1 << 8) | b2;
+}
+
+static inline uint32_t readU24uLE(const void* p) noexcept { return readU24u<ByteOrder::kLE>(p); }
+static inline uint32_t readU24uBE(const void* p) noexcept { return readU24u<ByteOrder::kBE>(p); }
+
+// Support - Memory Read Access - 32 Bits
+// ======================================
+
+template<ByteOrder BO, size_t Alignment>
+static inline uint32_t readU32x(const void* p) noexcept {
+  typedef typename Internal::AliasedUInt<uint32_t, Alignment>::T U32AlignedToN;
+  uint32_t x = static_cast<const U32AlignedToN*>(p)[0];
+  return BO == ByteOrder::kNative ? x : byteswap32(x);
+}
+
+template<size_t Alignment = 1>
+static inline uint32_t readU32u(const void* p) noexcept { return readU32x<ByteOrder::kNative, Alignment>(p); }
+template<size_t Alignment = 1>
+static inline uint32_t readU32uLE(const void* p) noexcept { return readU32x<ByteOrder::kLE, Alignment>(p); }
+template<size_t Alignment = 1>
+static inline uint32_t readU32uBE(const void* p) noexcept { return readU32x<ByteOrder::kBE, Alignment>(p); }
+
+static inline uint32_t readU32a(const void* p) noexcept { return readU32x<ByteOrder::kNative, 4>(p); }
+static inline uint32_t readU32aLE(const void* p) noexcept { return readU32x<ByteOrder::kLE, 4>(p); }
+static inline uint32_t readU32aBE(const void* p) noexcept { return readU32x<ByteOrder::kBE, 4>(p); }
+
+template<ByteOrder BO, size_t Alignment>
+static inline uint32_t readI32x(const void* p) noexcept { return int32_t(readU32x<BO, Alignment>(p)); }
+
+template<size_t Alignment = 1>
+static inline int32_t readI32u(const void* p) noexcept { return int32_t(readU32x<ByteOrder::kNative, Alignment>(p)); }
+template<size_t Alignment = 1>
+static inline int32_t readI32uLE(const void* p) noexcept { return int32_t(readU32x<ByteOrder::kLE, Alignment>(p)); }
+template<size_t Alignment = 1>
+static inline int32_t readI32uBE(const void* p) noexcept { return int32_t(readU32x<ByteOrder::kBE, Alignment>(p)); }
+
+static inline int32_t readI32a(const void* p) noexcept { return int32_t(readU32x<ByteOrder::kNative, 4>(p)); }
+static inline int32_t readI32aLE(const void* p) noexcept { return int32_t(readU32x<ByteOrder::kLE, 4>(p)); }
+static inline int32_t readI32aBE(const void* p) noexcept { return int32_t(readU32x<ByteOrder::kBE, 4>(p)); }
+
+// Support - Memory Read Access - 64 Bits
+// ======================================
+
+template<ByteOrder BO, size_t Alignment>
+static inline uint64_t readU64x(const void* p) noexcept {
+  typedef typename Internal::AliasedUInt<uint64_t, Alignment>::T U64AlignedToN;
+  uint64_t x = static_cast<const U64AlignedToN*>(p)[0];
+  return BO == ByteOrder::kNative ? x : byteswap64(x);
+}
+
+template<size_t Alignment = 1>
+static inline uint64_t readU64u(const void* p) noexcept { return readU64x<ByteOrder::kNative, Alignment>(p); }
+template<size_t Alignment = 1>
+static inline uint64_t readU64uLE(const void* p) noexcept { return readU64x<ByteOrder::kLE, Alignment>(p); }
+template<size_t Alignment = 1>
+static inline uint64_t readU64uBE(const void* p) noexcept { return readU64x<ByteOrder::kBE, Alignment>(p); }
+
+static inline uint64_t readU64a(const void* p) noexcept { return readU64x<ByteOrder::kNative, 8>(p); }
+static inline uint64_t readU64aLE(const void* p) noexcept { return readU64x<ByteOrder::kLE, 8>(p); }
+static inline uint64_t readU64aBE(const void* p) noexcept { return readU64x<ByteOrder::kBE, 8>(p); }
+
+template<ByteOrder BO, size_t Alignment>
+static inline int64_t readI64x(const void* p) noexcept { return int64_t(readU64x<BO, Alignment>(p)); }
+
+template<size_t Alignment = 1>
+static inline int64_t readI64u(const void* p) noexcept { return int64_t(readU64x<ByteOrder::kNative, Alignment>(p)); }
+template<size_t Alignment = 1>
+static inline int64_t readI64uLE(const void* p) noexcept { return int64_t(readU64x<ByteOrder::kLE, Alignment>(p)); }
+template<size_t Alignment = 1>
+static inline int64_t readI64uBE(const void* p) noexcept { return int64_t(readU64x<ByteOrder::kBE, Alignment>(p)); }
+
+static inline int64_t readI64a(const void* p) noexcept { return int64_t(readU64x<ByteOrder::kNative, 8>(p)); }
+static inline int64_t readI64aLE(const void* p) noexcept { return int64_t(readU64x<ByteOrder::kLE, 8>(p)); }
+static inline int64_t readI64aBE(const void* p) noexcept { return int64_t(readU64x<ByteOrder::kBE, 8>(p)); }
+
+// Support - Memory Write Access - 8 Bits
+// ======================================
+
+static inline void writeU8(void* p, uint8_t x) noexcept { static_cast<uint8_t*>(p)[0] = x; }
+static inline void writeI8(void* p, int8_t x) noexcept { static_cast<int8_t*>(p)[0] = x; }
+
+// Support - Memory Write Access - 16 Bits
+// =======================================
+
+template<ByteOrder BO = ByteOrder::kNative, size_t Alignment = 1>
+static inline void writeU16x(void* p, uint16_t x) noexcept {
+  typedef typename Internal::AliasedUInt<uint16_t, Alignment>::T U16AlignedToN;
+  static_cast<U16AlignedToN*>(p)[0] = BO == ByteOrder::kNative ? x : byteswap16(x);
+}
+
+template<size_t Alignment = 1>
+static inline void writeU16uLE(void* p, uint16_t x) noexcept { writeU16x<ByteOrder::kLE, Alignment>(p, x); }
+template<size_t Alignment = 1>
+static inline void writeU16uBE(void* p, uint16_t x) noexcept { writeU16x<ByteOrder::kBE, Alignment>(p, x); }
+
+static inline void writeU16a(void* p, uint16_t x) noexcept { writeU16x<ByteOrder::kNative, 2>(p, x); }
+static inline void writeU16aLE(void* p, uint16_t x) noexcept { writeU16x<ByteOrder::kLE, 2>(p, x); }
+static inline void writeU16aBE(void* p, uint16_t x) noexcept { writeU16x<ByteOrder::kBE, 2>(p, x); }
+
+
+template<ByteOrder BO = ByteOrder::kNative, size_t Alignment = 1>
+static inline void writeI16x(void* p, int16_t x) noexcept { writeU16x<BO, Alignment>(p, uint16_t(x)); }
+
+template<size_t Alignment = 1>
+static inline void writeI16uLE(void* p, int16_t x) noexcept { writeU16x<ByteOrder::kLE, Alignment>(p, uint16_t(x)); }
+template<size_t Alignment = 1>
+static inline void writeI16uBE(void* p, int16_t x) noexcept { writeU16x<ByteOrder::kBE, Alignment>(p, uint16_t(x)); }
+
+static inline void writeI16a(void* p, int16_t x) noexcept { writeU16x<ByteOrder::kNative, 2>(p, uint16_t(x)); }
+static inline void writeI16aLE(void* p, int16_t x) noexcept { writeU16x<ByteOrder::kLE, 2>(p, uint16_t(x)); }
+static inline void writeI16aBE(void* p, int16_t x) noexcept { writeU16x<ByteOrder::kBE, 2>(p, uint16_t(x)); }
+
+// Support - Memory Write Access - 24 Bits
+// =======================================
+
+template<ByteOrder BO = ByteOrder::kNative>
+static inline void writeU24u(void* p, uint32_t v) noexcept {
+  static_cast<uint8_t*>(p)[0] = uint8_t((v >> (BO == ByteOrder::kLE ?  0 : 16)) & 0xFFu);
+  static_cast<uint8_t*>(p)[1] = uint8_t((v >> (BO == ByteOrder::kLE ?  8 :  8)) & 0xFFu);
+  static_cast<uint8_t*>(p)[2] = uint8_t((v >> (BO == ByteOrder::kLE ? 16 :  0)) & 0xFFu);
+}
+
+static inline void writeU24uLE(void* p, uint32_t v) noexcept { writeU24u<ByteOrder::kLE>(p, v); }
+static inline void writeU24uBE(void* p, uint32_t v) noexcept { writeU24u<ByteOrder::kBE>(p, v); }
+
+// Support - Memory Write Access - 32 Bits
+// =======================================
+
+template<ByteOrder BO = ByteOrder::kNative, size_t Alignment = 1>
+static inline void writeU32x(void* p, uint32_t x) noexcept {
+  typedef typename Internal::AliasedUInt<uint32_t, Alignment>::T U32AlignedToN;
+  static_cast<U32AlignedToN*>(p)[0] = (BO == ByteOrder::kNative) ? x : Support::byteswap32(x);
+}
+
+template<size_t Alignment = 1>
+static inline void writeU32u(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kNative, Alignment>(p, x); }
+template<size_t Alignment = 1>
+static inline void writeU32uLE(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kLE, Alignment>(p, x); }
+template<size_t Alignment = 1>
+static inline void writeU32uBE(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kBE, Alignment>(p, x); }
+
+static inline void writeU32a(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kNative, 4>(p, x); }
+static inline void writeU32aLE(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kLE, 4>(p, x); }
+static inline void writeU32aBE(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kBE, 4>(p, x); }
+
+template<ByteOrder BO = ByteOrder::kNative, size_t Alignment = 1>
+static inline void writeI32x(void* p, int32_t x) noexcept { writeU32x<BO, Alignment>(p, uint32_t(x)); }
+
+template<size_t Alignment = 1>
+static inline void writeI32u(void* p, int32_t x) noexcept { writeU32x<ByteOrder::kNative, Alignment>(p, uint32_t(x)); }
+template<size_t Alignment = 1>
+static inline void writeI32uLE(void* p, int32_t x) noexcept { writeU32x<ByteOrder::kLE, Alignment>(p, uint32_t(x)); }
+template<size_t Alignment = 1>
+static inline void writeI32uBE(void* p, int32_t x) noexcept { writeU32x<ByteOrder::kBE, Alignment>(p, uint32_t(x)); }
+
+static inline void writeI32a(void* p, int32_t x) noexcept { writeU32x<ByteOrder::kNative, 4>(p, uint32_t(x)); }
+static inline void writeI32aLE(void* p, int32_t x) noexcept { writeU32x<ByteOrder::kLE, 4>(p, uint32_t(x)); }
+static inline void writeI32aBE(void* p, int32_t x) noexcept { writeU32x<ByteOrder::kBE, 4>(p, uint32_t(x)); }
+
+// Support - Memory Write Access - 64 Bits
+// =======================================
+
+template<ByteOrder BO = ByteOrder::kNative, size_t Alignment = 1>
+static inline void writeU64x(void* p, uint64_t x) noexcept {
+  typedef typename Internal::AliasedUInt<uint64_t, Alignment>::T U64AlignedToN;
+  static_cast<U64AlignedToN*>(p)[0] = BO == ByteOrder::kNative ? x : byteswap64(x);
+}
+
+template<size_t Alignment = 1>
+static inline void writeU64u(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kNative, Alignment>(p, x); }
+template<size_t Alignment = 1>
+static inline void writeU64uLE(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kLE, Alignment>(p, x); }
+template<size_t Alignment = 1>
+static inline void writeU64uBE(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kBE, Alignment>(p, x); }
+
+static inline void writeU64a(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kNative, 8>(p, x); }
+static inline void writeU64aLE(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kLE, 8>(p, x); }
+static inline void writeU64aBE(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kBE, 8>(p, x); }
+
+template<ByteOrder BO = ByteOrder::kNative, size_t Alignment = 1>
+static inline void writeI64x(void* p, int64_t x) noexcept { writeU64x<BO, Alignment>(p, uint64_t(x)); }
+
+template<size_t Alignment = 1>
+static inline void writeI64u(void* p, int64_t x) noexcept { writeU64x<ByteOrder::kNative, Alignment>(p, uint64_t(x)); }
+template<size_t Alignment = 1>
+static inline void writeI64uLE(void* p, int64_t x) noexcept { writeU64x<ByteOrder::kLE, Alignment>(p, uint64_t(x)); }
+template<size_t Alignment = 1>
+static inline void writeI64uBE(void* p, int64_t x) noexcept { writeU64x<ByteOrder::kBE, Alignment>(p, uint64_t(x)); }
+
+static inline void writeI64a(void* p, int64_t x) noexcept { writeU64x<ByteOrder::kNative, 8>(p, uint64_t(x)); }
+static inline void writeI64aLE(void* p, int64_t x) noexcept { writeU64x<ByteOrder::kLE, 8>(p, uint64_t(x)); }
+static inline void writeI64aBE(void* p, int64_t x) noexcept { writeU64x<ByteOrder::kBE, 8>(p, uint64_t(x)); }
+
+// Support - Operators
+// ===================
+
+//! \cond INTERNAL
+struct Set    { template<typename T> static inline T op(T x, T y) noexcept { DebugUtils::unused(x); return  y; } };
+struct SetNot { template<typename T> static inline T op(T x, T y) noexcept { DebugUtils::unused(x); return ~y; } };
+struct And    { template<typename T> static inline T op(T x, T y) noexcept { return  x &  y; } };
+struct AndNot { template<typename T> static inline T op(T x, T y) noexcept { return  x & ~y; } };
+struct NotAnd { template<typename T> static inline T op(T x, T y) noexcept { return ~x &  y; } };
+struct Or     { template<typename T> static inline T op(T x, T y) noexcept { return  x |  y; } };
+struct Xor    { template<typename T> static inline T op(T x, T y) noexcept { return  x ^  y; } };
+struct Add    { template<typename T> static inline T op(T x, T y) noexcept { return  x +  y; } };
+struct Sub    { template<typename T> static inline T op(T x, T y) noexcept { return  x -  y; } };
+struct Min    { template<typename T> static inline T op(T x, T y) noexcept { return min<T>(x, y); } };
+struct Max    { template<typename T> static inline T op(T x, T y) noexcept { return max<T>(x, y); } };
+//! \endcond
+
+// Support - BitWordIterator
+// =========================
+
+//! Iterates over each bit in a number which is set to 1.
+//!
+//! Example of use:
+//!
+//! ```
+//! uint32_t bitsToIterate = 0x110F;
+//! Support::BitWordIterator<uint32_t> it(bitsToIterate);
+//!
+//! while (it.hasNext()) {
+//!   uint32_t bitIndex = it.next();
+//!   std::printf("Bit at %u is set\n", unsigned(bitIndex));
+//! }
+//! ```
+template<typename T>
+class BitWordIterator {
+public:
+  ASMJIT_FORCE_INLINE explicit BitWordIterator(T bitWord) noexcept
+    : _bitWord(bitWord) {}
+
+  ASMJIT_FORCE_INLINE void init(T bitWord) noexcept { _bitWord = bitWord; }
+  ASMJIT_FORCE_INLINE bool hasNext() const noexcept { return _bitWord != 0; }
+
+  ASMJIT_FORCE_INLINE uint32_t next() noexcept {
+    ASMJIT_ASSERT(_bitWord != 0);
+    uint32_t index = ctz(_bitWord);
+    _bitWord ^= T(1u) << index;
+    return index;
+  }
+
+  T _bitWord;
+};
+
+// Support - BitWordFlipIterator
+// =============================
+
+template<typename T>
+class BitWordFlipIterator {
+public:
+  ASMJIT_FORCE_INLINE explicit BitWordFlipIterator(T bitWord) noexcept
+    : _bitWord(bitWord) {}
+
+  ASMJIT_FORCE_INLINE void init(T bitWord) noexcept { _bitWord = bitWord; }
+  ASMJIT_FORCE_INLINE bool hasNext() const noexcept { return _bitWord != 0; }
+
+  ASMJIT_FORCE_INLINE uint32_t nextAndFlip() noexcept {
+    ASMJIT_ASSERT(_bitWord != 0);
+    uint32_t index = ctz(_bitWord);
+    _bitWord ^= T(1u) << index;
+    return index;
+  }
+
+  T _bitWord;
+  T _xorMask;
+};
+
+// Support - BitVectorOps
+// ======================
+
+//! \cond
+namespace Internal {
+  template<typename T, class OperatorT, class FullWordOpT>
+  static inline void bitVectorOp(T* buf, size_t index, size_t count) noexcept {
+    if (count == 0)
+      return;
+
+    const size_t kTSizeInBits = bitSizeOf<T>();
+    size_t vecIndex = index / kTSizeInBits; // T[]
+    size_t bitIndex = index % kTSizeInBits; // T[][]
+
+    buf += vecIndex;
+
+    // The first BitWord requires special handling to preserve bits outside the fill region.
+    const T kFillMask = allOnes<T>();
+    size_t firstNBits = min<size_t>(kTSizeInBits - bitIndex, count);
+
+    buf[0] = OperatorT::op(buf[0], (kFillMask >> (kTSizeInBits - firstNBits)) << bitIndex);
+    buf++;
+    count -= firstNBits;
+
+    // All bits between the first and last affected BitWords can be just filled.
+    while (count >= kTSizeInBits) {
+      buf[0] = FullWordOpT::op(buf[0], kFillMask);
+      buf++;
+      count -= kTSizeInBits;
+    }
+
+    // The last BitWord requires special handling as well
+    if (count)
+      buf[0] = OperatorT::op(buf[0], kFillMask >> (kTSizeInBits - count));
+  }
+}
+//! \endcond
+
+//! Sets bit in a bit-vector `buf` at `index`.
+template<typename T>
+static inline bool bitVectorGetBit(T* buf, size_t index) noexcept {
+  const size_t kTSizeInBits = bitSizeOf<T>();
+
+  size_t vecIndex = index / kTSizeInBits;
+  size_t bitIndex = index % kTSizeInBits;
+
+  return bool((buf[vecIndex] >> bitIndex) & 0x1u);
+}
+
+//! Sets bit in a bit-vector `buf` at `index` to `value`.
+template<typename T>
+static inline void bitVectorSetBit(T* buf, size_t index, bool value) noexcept {
+  const size_t kTSizeInBits = bitSizeOf<T>();
+
+  size_t vecIndex = index / kTSizeInBits;
+  size_t bitIndex = index % kTSizeInBits;
+
+  T bitMask = T(1u) << bitIndex;
+  if (value)
+    buf[vecIndex] |= bitMask;
+  else
+    buf[vecIndex] &= ~bitMask;
+}
+
+//! Sets bit in a bit-vector `buf` at `index` to `value`.
+template<typename T>
+static inline void bitVectorFlipBit(T* buf, size_t index) noexcept {
+  const size_t kTSizeInBits = bitSizeOf<T>();
+
+  size_t vecIndex = index / kTSizeInBits;
+  size_t bitIndex = index % kTSizeInBits;
+
+  T bitMask = T(1u) << bitIndex;
+  buf[vecIndex] ^= bitMask;
+}
+
+//! Fills `count` bits in bit-vector `buf` starting at bit-index `index`.
+template<typename T>
+static inline void bitVectorFill(T* buf, size_t index, size_t count) noexcept { Internal::bitVectorOp<T, Or, Set>(buf, index, count); }
+
+//! Clears `count` bits in bit-vector `buf` starting at bit-index `index`.
+template<typename T>
+static inline void bitVectorClear(T* buf, size_t index, size_t count) noexcept { Internal::bitVectorOp<T, AndNot, SetNot>(buf, index, count); }
+
+template<typename T>
+static inline size_t bitVectorIndexOf(T* buf, size_t start, bool value) noexcept {
+  const size_t kTSizeInBits = bitSizeOf<T>();
+  size_t vecIndex = start / kTSizeInBits; // T[]
+  size_t bitIndex = start % kTSizeInBits; // T[][]
+
+  T* p = buf + vecIndex;
+
+  // We always look for zeros, if value is `true` we have to flip all bits before the search.
+  const T kFillMask = allOnes<T>();
+  const T kFlipMask = value ? T(0) : kFillMask;
+
+  // The first BitWord requires special handling as there are some bits we want to ignore.
+  T bits = (*p ^ kFlipMask) & (kFillMask << bitIndex);
+  for (;;) {
+    if (bits)
+      return (size_t)(p - buf) * kTSizeInBits + ctz(bits);
+    bits = *++p ^ kFlipMask;
+  }
+}
+
+// Support - BitVectorIterator
+// ===========================
+
+template<typename T>
+class BitVectorIterator {
+public:
+  const T* _ptr;
+  size_t _idx;
+  size_t _end;
+  T _current;
+
+  ASMJIT_FORCE_INLINE BitVectorIterator(const BitVectorIterator& other) noexcept = default;
+
+  ASMJIT_FORCE_INLINE BitVectorIterator(const T* data, size_t numBitWords, size_t start = 0) noexcept {
+    init(data, numBitWords, start);
+  }
+
+  ASMJIT_FORCE_INLINE void init(const T* data, size_t numBitWords, size_t start = 0) noexcept {
+    const T* ptr = data + (start / bitSizeOf<T>());
+    size_t idx = alignDown(start, bitSizeOf<T>());
+    size_t end = numBitWords * bitSizeOf<T>();
+
+    T bitWord = T(0);
+    if (idx < end) {
+      bitWord = *ptr++ & (allOnes<T>() << (start % bitSizeOf<T>()));
+      while (!bitWord && (idx += bitSizeOf<T>()) < end)
+        bitWord = *ptr++;
+    }
+
+    _ptr = ptr;
+    _idx = idx;
+    _end = end;
+    _current = bitWord;
+  }
+
+  ASMJIT_FORCE_INLINE bool hasNext() const noexcept {
+    return _current != T(0);
+  }
+
+  ASMJIT_FORCE_INLINE size_t next() noexcept {
+    T bitWord = _current;
+    ASMJIT_ASSERT(bitWord != T(0));
+
+    uint32_t bit = ctz(bitWord);
+    bitWord ^= T(1u) << bit;
+
+    size_t n = _idx + bit;
+    while (!bitWord && (_idx += bitSizeOf<T>()) < _end)
+      bitWord = *_ptr++;
+
+    _current = bitWord;
+    return n;
+  }
+
+  ASMJIT_FORCE_INLINE size_t peekNext() const noexcept {
+    ASMJIT_ASSERT(_current != T(0));
+    return _idx + ctz(_current);
+  }
+};
+
+// Support - BitVectorOpIterator
+// =============================
+
+template<typename T, class OperatorT>
+class BitVectorOpIterator {
+public:
+  enum : uint32_t {
+    kTSizeInBits = bitSizeOf<T>()
+  };
+
+  const T* _aPtr;
+  const T* _bPtr;
+  size_t _idx;
+  size_t _end;
+  T _current;
+
+  ASMJIT_FORCE_INLINE BitVectorOpIterator(const T* aData, const T* bData, size_t numBitWords, size_t start = 0) noexcept {
+    init(aData, bData, numBitWords, start);
+  }
+
+  ASMJIT_FORCE_INLINE void init(const T* aData, const T* bData, size_t numBitWords, size_t start = 0) noexcept {
+    const T* aPtr = aData + (start / bitSizeOf<T>());
+    const T* bPtr = bData + (start / bitSizeOf<T>());
+    size_t idx = alignDown(start, bitSizeOf<T>());
+    size_t end = numBitWords * bitSizeOf<T>();
+
+    T bitWord = T(0);
+    if (idx < end) {
+      bitWord = OperatorT::op(*aPtr++, *bPtr++) & (allOnes<T>() << (start % bitSizeOf<T>()));
+      while (!bitWord && (idx += kTSizeInBits) < end)
+        bitWord = OperatorT::op(*aPtr++, *bPtr++);
+    }
+
+    _aPtr = aPtr;
+    _bPtr = bPtr;
+    _idx = idx;
+    _end = end;
+    _current = bitWord;
+  }
+
+  ASMJIT_FORCE_INLINE bool hasNext() noexcept {
+    return _current != T(0);
+  }
+
+  ASMJIT_FORCE_INLINE size_t next() noexcept {
+    T bitWord = _current;
+    ASMJIT_ASSERT(bitWord != T(0));
+
+    uint32_t bit = ctz(bitWord);
+    bitWord ^= T(1u) << bit;
+
+    size_t n = _idx + bit;
+    while (!bitWord && (_idx += kTSizeInBits) < _end)
+      bitWord = OperatorT::op(*_aPtr++, *_bPtr++);
+
+    _current = bitWord;
+    return n;
+  }
+};
+
+// Support - Sorting
+// =================
+
+//! Sort order.
+enum class SortOrder : uint32_t {
+  //!< Ascending order.
+  kAscending  = 0,
+  //!< Descending order.
+  kDescending = 1
+};
+
+//! A helper class that provides comparison of any user-defined type that
+//! implements `<` and `>` operators (primitive types are supported as well).
+template<SortOrder kOrder = SortOrder::kAscending>
+struct Compare {
+  template<typename A, typename B>
+  inline int operator()(const A& a, const B& b) const noexcept {
+    return kOrder == SortOrder::kAscending ? int(a > b) - int(a < b) : int(a < b) - int(a > b);
+  }
+};
+
+//! Insertion sort.
+template<typename T, typename CompareT = Compare<SortOrder::kAscending>>
+static inline void iSort(T* base, size_t size, const CompareT& cmp = CompareT()) noexcept {
+  for (T* pm = base + 1; pm < base + size; pm++)
+    for (T* pl = pm; pl > base && cmp(pl[-1], pl[0]) > 0; pl--)
+      std::swap(pl[-1], pl[0]);
+}
+
+//! \cond
+namespace Internal {
+  //! Quick-sort implementation.
+  template<typename T, class CompareT>
+  struct QSortImpl {
+    enum : size_t {
+      kStackSize = 64 * 2,
+      kISortThreshold = 7
+    };
+
+    // Based on "PDCLib - Public Domain C Library" and rewritten to C++.
+    static void sort(T* base, size_t size, const CompareT& cmp) noexcept {
+      T* end = base + size;
+      T* stack[kStackSize];
+      T** stackptr = stack;
+
+      for (;;) {
+        if ((size_t)(end - base) > kISortThreshold) {
+          // We work from second to last - first will be pivot element.
+          T* pi = base + 1;
+          T* pj = end - 1;
+          std::swap(base[(size_t)(end - base) / 2], base[0]);
+
+          if (cmp(*pi  , *pj  ) > 0) std::swap(*pi  , *pj  );
+          if (cmp(*base, *pj  ) > 0) std::swap(*base, *pj  );
+          if (cmp(*pi  , *base) > 0) std::swap(*pi  , *base);
+
+          // Now we have the median for pivot element, entering main loop.
+          for (;;) {
+            while (pi < pj   && cmp(*++pi, *base) < 0) continue; // Move `i` right until `*i >= pivot`.
+            while (pj > base && cmp(*--pj, *base) > 0) continue; // Move `j` left  until `*j <= pivot`.
+
+            if (pi > pj) break;
+            std::swap(*pi, *pj);
+          }
+
+          // Move pivot into correct place.
+          std::swap(*base, *pj);
+
+          // Larger subfile base / end to stack, sort smaller.
+          if (pj - base > end - pi) {
+            // Left is larger.
+            *stackptr++ = base;
+            *stackptr++ = pj;
+            base = pi;
+          }
+          else {
+            // Right is larger.
+            *stackptr++ = pi;
+            *stackptr++ = end;
+            end = pj;
+          }
+          ASMJIT_ASSERT(stackptr <= stack + kStackSize);
+        }
+        else {
+          // UB sanitizer doesn't like applying offset to a nullptr base.
+          if (base != end)
+            iSort(base, (size_t)(end - base), cmp);
+
+          if (stackptr == stack)
+            break;
+
+          end = *--stackptr;
+          base = *--stackptr;
+        }
+      }
+    }
+  };
+}
+//! \endcond
+
+//! Quick sort implementation.
+//!
+//! The main reason to provide a custom qsort implementation is that we needed something that will
+//! never throw `bad_alloc` exception. This implementation doesn't use dynamic memory allocation.
+template<typename T, class CompareT = Compare<SortOrder::kAscending>>
+static inline void qSort(T* base, size_t size, const CompareT& cmp = CompareT()) noexcept {
+  Internal::QSortImpl<T, CompareT>::sort(base, size, cmp);
+}
+
+// Support - Array
+// ===============
+
+//! Array type, similar to std::array<T, N>, with the possibility to use enums in operator[].
+//!
+//! \note The array has C semantics - the elements in the array are not initialized.
+template<typename T, size_t N>
+struct Array {
+  //! \name Members
+  //! \{
+
+  //! The underlying array data, use \ref data() to access it.
+  T _data[N];
+
+  //! \}
+
+  //! \cond
+  // std compatibility.
+  typedef T value_type;
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+
+  typedef value_type& reference;
+  typedef const value_type& const_reference;
+
+  typedef value_type* pointer;
+  typedef const value_type* const_pointer;
+
+  typedef pointer iterator;
+  typedef const_pointer const_iterator;
+  //! \endcond
+
+  //! \name Overloaded Operators
+  //! \{
+
+  template<typename Index>
+  inline T& operator[](const Index& index) noexcept {
+    typedef typename Internal::StdInt<sizeof(Index), 1>::Type U;
+    ASMJIT_ASSERT(U(index) < N);
+    return _data[U(index)];
+  }
+
+  template<typename Index>
+  inline const T& operator[](const Index& index) const noexcept {
+    typedef typename Internal::StdInt<sizeof(Index), 1>::Type U;
+    ASMJIT_ASSERT(U(index) < N);
+    return _data[U(index)];
+  }
+
+  inline bool operator==(const Array& other) const noexcept {
+    for (size_t i = 0; i < N; i++)
+      if (_data[i] != other._data[i])
+        return false;
+    return true;
+  }
+
+  inline bool operator!=(const Array& other) const noexcept {
+    return !operator==(other);
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool empty() const noexcept { return false; }
+  inline size_t size() const noexcept { return N; }
+
+  inline T* data() noexcept { return _data; }
+  inline const T* data() const noexcept { return _data; }
+
+  inline T& front() noexcept { return _data[0]; }
+  inline const T& front() const noexcept { return _data[0]; }
+
+  inline T& back() noexcept { return _data[N - 1]; }
+  inline const T& back() const noexcept { return _data[N - 1]; }
+
+  inline T* begin() noexcept { return _data; }
+  inline T* end() noexcept { return _data + N; }
+
+  inline const T* begin() const noexcept { return _data; }
+  inline const T* end() const noexcept { return _data + N; }
+
+  inline const T* cbegin() const noexcept { return _data; }
+  inline const T* cend() const noexcept { return _data + N; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(Array& other) noexcept {
+    for (size_t i = 0; i < N; i++)
+      std::swap(_data[i], other._data[i]);
+  }
+
+  inline void fill(const T& value) noexcept {
+    for (size_t i = 0; i < N; i++)
+      _data[i] = value;
+  }
+
+  inline void copyFrom(const Array& other) noexcept {
+    for (size_t i = 0; i < N; i++)
+      _data[i] = other._data[i];
+  }
+
+  template<typename Operator>
+  inline void combine(const Array& other) noexcept {
+    for (size_t i = 0; i < N; i++)
+      _data[i] = Operator::op(_data[i], other._data[i]);
+  }
+
+  template<typename Operator>
+  inline T aggregate(T initialValue = T()) const noexcept {
+    T value = initialValue;
+    for (size_t i = 0; i < N; i++)
+      value = Operator::op(value, _data[i]);
+    return value;
+  }
+
+  template<typename Fn>
+  inline void forEach(Fn&& fn) noexcept {
+    for (size_t i = 0; i < N; i++)
+      fn(_data[i]);
+  }
+  //! \}
+};
+
+// Support::Temporary
+// ==================
+
+//! Used to pass a temporary buffer to:
+//!
+//!   - Containers that use user-passed buffer as an initial storage (still can grow).
+//!   - Zone allocator that would use the temporary buffer as a first block.
+struct Temporary {
+  //! \name Members
+  //! \{
+
+  void* _data;
+  size_t _size;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline constexpr Temporary(const Temporary& other) noexcept = default;
+  inline constexpr Temporary(void* data, size_t size) noexcept
+    : _data(data),
+      _size(size) {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline Temporary& operator=(const Temporary& other) noexcept = default;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the data storage.
+  template<typename T = void>
+  inline constexpr T* data() const noexcept { return static_cast<T*>(_data); }
+  //! Returns the data storage size in bytes.
+  inline constexpr size_t size() const noexcept { return _size; }
+
+  //! \}
+};
+
+} // {Support}
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_SUPPORT_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/target.cpp b/lib/lepton/asmjit/core/target.cpp
new file mode 100644
index 0000000000..fef025d709
--- /dev/null
+++ b/lib/lepton/asmjit/core/target.cpp
@@ -0,0 +1,14 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/target.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+Target::Target() noexcept : _environment() {}
+Target::~Target() noexcept {}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/target.h b/lib/lepton/asmjit/core/target.h
new file mode 100644
index 0000000000..23b0c6294c
--- /dev/null
+++ b/lib/lepton/asmjit/core/target.h
@@ -0,0 +1,53 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_TARGET_H_INCLUDED
+#define ASMJIT_CORE_TARGET_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/func.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! Target is an abstract class that describes a machine code target.
+class ASMJIT_VIRTAPI Target {
+public:
+  ASMJIT_BASE_CLASS(Target)
+  ASMJIT_NONCOPYABLE(Target)
+
+  //! Target environment information.
+  Environment _environment;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a `Target` instance.
+  ASMJIT_API Target() noexcept;
+  //! Destroys the `Target` instance.
+  ASMJIT_API virtual ~Target() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns target's environment.
+  inline const Environment& environment() const noexcept { return _environment; }
+  //! Returns the target architecture.
+  inline Arch arch() const noexcept { return _environment.arch(); }
+  //! Returns the target sub-architecture.
+  inline SubArch subArch() const noexcept { return _environment.subArch(); }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_TARGET_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/type.cpp b/lib/lepton/asmjit/core/type.cpp
new file mode 100644
index 0000000000..536fb8818f
--- /dev/null
+++ b/lib/lepton/asmjit/core/type.cpp
@@ -0,0 +1,74 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/misc_p.h"
+#include "../core/type.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+namespace TypeUtils {
+
+template<uint32_t Index>
+struct ScalarOfTypeId {
+  enum : uint32_t {
+    kTypeId = uint32_t(
+      isScalar(TypeId(Index)) ? TypeId(Index) :
+      isMask8 (TypeId(Index)) ? TypeId::kUInt8 :
+      isMask16(TypeId(Index)) ? TypeId::kUInt16 :
+      isMask32(TypeId(Index)) ? TypeId::kUInt32 :
+      isMask64(TypeId(Index)) ? TypeId::kUInt64 :
+      isMmx32 (TypeId(Index)) ? TypeId::kUInt32 :
+      isMmx64 (TypeId(Index)) ? TypeId::kUInt64 :
+      isVec32 (TypeId(Index)) ? TypeId((Index - uint32_t(TypeId::_kVec32Start ) + uint32_t(TypeId::kInt8)) & 0xFF) :
+      isVec64 (TypeId(Index)) ? TypeId((Index - uint32_t(TypeId::_kVec64Start ) + uint32_t(TypeId::kInt8)) & 0xFF) :
+      isVec128(TypeId(Index)) ? TypeId((Index - uint32_t(TypeId::_kVec128Start) + uint32_t(TypeId::kInt8)) & 0xFF) :
+      isVec256(TypeId(Index)) ? TypeId((Index - uint32_t(TypeId::_kVec256Start) + uint32_t(TypeId::kInt8)) & 0xFF) :
+      isVec512(TypeId(Index)) ? TypeId((Index - uint32_t(TypeId::_kVec512Start) + uint32_t(TypeId::kInt8)) & 0xFF) : TypeId::kVoid)
+  };
+};
+
+template<uint32_t Index>
+struct SizeOfTypeId {
+  enum : uint32_t {
+    kTypeSize =
+      isInt8   (TypeId(Index)) ?  1 :
+      isUInt8  (TypeId(Index)) ?  1 :
+      isInt16  (TypeId(Index)) ?  2 :
+      isUInt16 (TypeId(Index)) ?  2 :
+      isInt32  (TypeId(Index)) ?  4 :
+      isUInt32 (TypeId(Index)) ?  4 :
+      isInt64  (TypeId(Index)) ?  8 :
+      isUInt64 (TypeId(Index)) ?  8 :
+      isFloat32(TypeId(Index)) ?  4 :
+      isFloat64(TypeId(Index)) ?  8 :
+      isFloat80(TypeId(Index)) ? 10 :
+      isMask8  (TypeId(Index)) ?  1 :
+      isMask16 (TypeId(Index)) ?  2 :
+      isMask32 (TypeId(Index)) ?  4 :
+      isMask64 (TypeId(Index)) ?  8 :
+      isMmx32  (TypeId(Index)) ?  4 :
+      isMmx64  (TypeId(Index)) ?  8 :
+      isVec32  (TypeId(Index)) ?  4 :
+      isVec64  (TypeId(Index)) ?  8 :
+      isVec128 (TypeId(Index)) ? 16 :
+      isVec256 (TypeId(Index)) ? 32 :
+      isVec512 (TypeId(Index)) ? 64 : 0
+  };
+};
+
+const TypeData _typeData = {
+  #define VALUE(x) TypeId(ScalarOfTypeId<x>::kTypeId)
+  { ASMJIT_LOOKUP_TABLE_256(VALUE, 0) },
+  #undef VALUE
+
+  #define VALUE(x) SizeOfTypeId<x>::kTypeSize
+  { ASMJIT_LOOKUP_TABLE_256(VALUE, 0) }
+  #undef VALUE
+};
+
+} // {TypeUtils}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/type.h b/lib/lepton/asmjit/core/type.h
new file mode 100644
index 0000000000..3754959e4c
--- /dev/null
+++ b/lib/lepton/asmjit/core/type.h
@@ -0,0 +1,419 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_TYPE_H_INCLUDED
+#define ASMJIT_CORE_TYPE_H_INCLUDED
+
+#include "../core/globals.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_core
+//! \{
+
+//! Type identifier provides a minimalist type system used across AsmJit library.
+//!
+//! This is an additional information that can be used to describe a value-type of physical or virtual register. It's
+//! used mostly by BaseCompiler to describe register representation (the group of data stored in the register and the
+//! width used) and it's also used by APIs that allow to describe and work with function signatures.
+enum class TypeId : uint8_t {
+  //! Void type.
+  kVoid = 0,
+
+  _kBaseStart = 32,
+  _kBaseEnd = 44,
+
+  _kIntStart = 32,
+  _kIntEnd = 41,
+
+  //! Abstract signed integer type that has a native size.
+  kIntPtr = 32,
+  //! Abstract unsigned integer type that has a native size.
+  kUIntPtr = 33,
+
+  //! 8-bit signed integer type.
+  kInt8 = 34,
+  //! 8-bit unsigned integer type.
+  kUInt8 = 35,
+  //! 16-bit signed integer type.
+  kInt16 = 36,
+  //! 16-bit unsigned integer type.
+  kUInt16 = 37,
+  //! 32-bit signed integer type.
+  kInt32 = 38,
+  //! 32-bit unsigned integer type.
+  kUInt32 = 39,
+  //! 64-bit signed integer type.
+  kInt64 = 40,
+  //! 64-bit unsigned integer type.
+  kUInt64 = 41,
+
+  _kFloatStart  = 42,
+  _kFloatEnd = 44,
+
+  //! 32-bit floating point type.
+  kFloat32 = 42,
+  //! 64-bit floating point type.
+  kFloat64 = 43,
+  //! 80-bit floating point type.
+  kFloat80 = 44,
+
+  _kMaskStart = 45,
+  _kMaskEnd = 48,
+
+  //! 8-bit opmask register (K).
+  kMask8 = 45,
+  //! 16-bit opmask register (K).
+  kMask16 = 46,
+  //! 32-bit opmask register (K).
+  kMask32 = 47,
+  //! 64-bit opmask register (K).
+  kMask64 = 48,
+
+  _kMmxStart = 49,
+  _kMmxEnd = 50,
+
+  //! 64-bit MMX register only used for 32 bits.
+  kMmx32 = 49,
+  //! 64-bit MMX register.
+  kMmx64 = 50,
+
+  _kVec32Start  = 51,
+  _kVec32End = 60,
+
+  kInt8x4 = 51,
+  kUInt8x4 = 52,
+  kInt16x2 = 53,
+  kUInt16x2 = 54,
+  kInt32x1 = 55,
+  kUInt32x1 = 56,
+  kFloat32x1 = 59,
+
+  _kVec64Start  = 61,
+  _kVec64End = 70,
+
+  kInt8x8 = 61,
+  kUInt8x8 = 62,
+  kInt16x4 = 63,
+  kUInt16x4 = 64,
+  kInt32x2 = 65,
+  kUInt32x2 = 66,
+  kInt64x1 = 67,
+  kUInt64x1 = 68,
+  kFloat32x2 = 69,
+  kFloat64x1 = 70,
+
+  _kVec128Start = 71,
+  _kVec128End = 80,
+
+  kInt8x16 = 71,
+  kUInt8x16 = 72,
+  kInt16x8 = 73,
+  kUInt16x8 = 74,
+  kInt32x4 = 75,
+  kUInt32x4 = 76,
+  kInt64x2 = 77,
+  kUInt64x2 = 78,
+  kFloat32x4 = 79,
+  kFloat64x2 = 80,
+
+  _kVec256Start = 81,
+  _kVec256End = 90,
+
+  kInt8x32 = 81,
+  kUInt8x32 = 82,
+  kInt16x16 = 83,
+  kUInt16x16 = 84,
+  kInt32x8 = 85,
+  kUInt32x8 = 86,
+  kInt64x4 = 87,
+  kUInt64x4 = 88,
+  kFloat32x8 = 89,
+  kFloat64x4 = 90,
+
+  _kVec512Start = 91,
+  _kVec512End = 100,
+
+  kInt8x64 = 91,
+  kUInt8x64 = 92,
+  kInt16x32 = 93,
+  kUInt16x32 = 94,
+  kInt32x16 = 95,
+  kUInt32x16 = 96,
+  kInt64x8 = 97,
+  kUInt64x8 = 98,
+  kFloat32x16 = 99,
+  kFloat64x8 = 100,
+
+  kLastAssigned = kFloat64x8,
+
+  kMaxValue = 255
+};
+ASMJIT_DEFINE_ENUM_COMPARE(TypeId)
+
+//! Type identifier utilities.
+namespace TypeUtils {
+
+struct TypeData {
+  TypeId scalarOf[uint32_t(TypeId::kMaxValue) + 1];
+  uint8_t sizeOf[uint32_t(TypeId::kMaxValue) + 1];
+};
+ASMJIT_VARAPI const TypeData _typeData;
+
+//! Returns the scalar type of `typeId`.
+static inline TypeId scalarOf(TypeId typeId) noexcept { return _typeData.scalarOf[uint32_t(typeId)]; }
+
+//! Returns the size [in bytes] of `typeId`.
+static inline uint32_t sizeOf(TypeId typeId) noexcept { return _typeData.sizeOf[uint32_t(typeId)]; }
+
+//! Tests whether a given type `typeId` is between `a` and `b`.
+static inline constexpr bool isBetween(TypeId typeId, TypeId a, TypeId b) noexcept {
+  return Support::isBetween(uint32_t(typeId), uint32_t(a), uint32_t(b));
+}
+
+//! Tests whether a given type `typeId` is \ref TypeId::kVoid.
+static inline constexpr bool isVoid(TypeId typeId) noexcept { return typeId == TypeId::kVoid; }
+//! Tests whether a given type `typeId` is a valid non-void type.
+static inline constexpr bool isValid(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kIntStart, TypeId::_kVec512End); }
+//! Tests whether a given type `typeId` is scalar (has no vector part).
+static inline constexpr bool isScalar(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kBaseStart, TypeId::_kBaseEnd); }
+//! Tests whether a given type `typeId` is abstract, which means that its size depends on register size.
+static inline constexpr bool isAbstract(TypeId typeId) noexcept { return isBetween(typeId, TypeId::kIntPtr, TypeId::kUIntPtr); }
+
+//! Tests whether a given type is a scalar integer (signed or unsigned) of any size.
+static inline constexpr bool isInt(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kIntStart, TypeId::_kIntEnd); }
+//! Tests whether a given type is a scalar 8-bit integer (signed).
+static inline constexpr bool isInt8(TypeId typeId) noexcept { return typeId == TypeId::kInt8; }
+//! Tests whether a given type is a scalar 8-bit integer (unsigned).
+static inline constexpr bool isUInt8(TypeId typeId) noexcept { return typeId == TypeId::kUInt8; }
+//! Tests whether a given type is a scalar 16-bit integer (signed).
+static inline constexpr bool isInt16(TypeId typeId) noexcept { return typeId == TypeId::kInt16; }
+//! Tests whether a given type is a scalar 16-bit integer (unsigned).
+static inline constexpr bool isUInt16(TypeId typeId) noexcept { return typeId == TypeId::kUInt16; }
+//! Tests whether a given type is a scalar 32-bit integer (signed).
+static inline constexpr bool isInt32(TypeId typeId) noexcept { return typeId == TypeId::kInt32; }
+//! Tests whether a given type is a scalar 32-bit integer (unsigned).
+static inline constexpr bool isUInt32(TypeId typeId) noexcept { return typeId == TypeId::kUInt32; }
+//! Tests whether a given type is a scalar 64-bit integer (signed).
+static inline constexpr bool isInt64(TypeId typeId) noexcept { return typeId == TypeId::kInt64; }
+//! Tests whether a given type is a scalar 64-bit integer (unsigned).
+static inline constexpr bool isUInt64(TypeId typeId) noexcept { return typeId == TypeId::kUInt64; }
+
+static inline constexpr bool isGp8(TypeId typeId) noexcept { return isBetween(typeId, TypeId::kInt8, TypeId::kUInt8); }
+static inline constexpr bool isGp16(TypeId typeId) noexcept { return isBetween(typeId, TypeId::kInt16, TypeId::kUInt16); }
+static inline constexpr bool isGp32(TypeId typeId) noexcept { return isBetween(typeId, TypeId::kInt32, TypeId::kUInt32); }
+static inline constexpr bool isGp64(TypeId typeId) noexcept { return isBetween(typeId, TypeId::kInt64, TypeId::kUInt64); }
+
+//! Tests whether a given type is a scalar floating point of any size.
+static inline constexpr bool isFloat(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kFloatStart, TypeId::_kFloatEnd); }
+//! Tests whether a given type is a scalar 32-bit float.
+static inline constexpr bool isFloat32(TypeId typeId) noexcept { return typeId == TypeId::kFloat32; }
+//! Tests whether a given type is a scalar 64-bit float.
+static inline constexpr bool isFloat64(TypeId typeId) noexcept { return typeId == TypeId::kFloat64; }
+//! Tests whether a given type is a scalar 80-bit float.
+static inline constexpr bool isFloat80(TypeId typeId) noexcept { return typeId == TypeId::kFloat80; }
+
+static inline constexpr bool isMask(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kMaskStart, TypeId::_kMaskEnd); }
+static inline constexpr bool isMask8(TypeId typeId) noexcept { return typeId == TypeId::kMask8; }
+static inline constexpr bool isMask16(TypeId typeId) noexcept { return typeId == TypeId::kMask16; }
+static inline constexpr bool isMask32(TypeId typeId) noexcept { return typeId == TypeId::kMask32; }
+static inline constexpr bool isMask64(TypeId typeId) noexcept { return typeId == TypeId::kMask64; }
+
+static inline constexpr bool isMmx(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kMmxStart, TypeId::_kMmxEnd); }
+static inline constexpr bool isMmx32(TypeId typeId) noexcept { return typeId == TypeId::kMmx32; }
+static inline constexpr bool isMmx64(TypeId typeId) noexcept { return typeId == TypeId::kMmx64; }
+
+static inline constexpr bool isVec(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kVec32Start, TypeId::_kVec512End); }
+static inline constexpr bool isVec32(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kVec32Start, TypeId::_kVec32End); }
+static inline constexpr bool isVec64(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kVec64Start, TypeId::_kVec64End); }
+static inline constexpr bool isVec128(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kVec128Start, TypeId::_kVec128End); }
+static inline constexpr bool isVec256(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kVec256Start, TypeId::_kVec256End); }
+static inline constexpr bool isVec512(TypeId typeId) noexcept { return isBetween(typeId, TypeId::_kVec512Start, TypeId::_kVec512End); }
+
+//! \cond
+enum TypeCategory : uint32_t {
+  kTypeCategoryUnknown = 0,
+  kTypeCategoryEnum = 1,
+  kTypeCategoryIntegral = 2,
+  kTypeCategoryFloatingPoint = 3,
+  kTypeCategoryFunction = 4
+};
+
+template<typename T, TypeCategory kCategory>
+struct TypeIdOfT_ByCategory {}; // Fails if not specialized.
+
+template<typename T>
+struct TypeIdOfT_ByCategory<T, kTypeCategoryIntegral> {
+  enum : uint32_t {
+    kTypeId = uint32_t(
+      (sizeof(T) == 1 &&  std::is_signed<T>::value) ? TypeId::kInt8 :
+      (sizeof(T) == 1 && !std::is_signed<T>::value) ? TypeId::kUInt8 :
+      (sizeof(T) == 2 &&  std::is_signed<T>::value) ? TypeId::kInt16 :
+      (sizeof(T) == 2 && !std::is_signed<T>::value) ? TypeId::kUInt16 :
+      (sizeof(T) == 4 &&  std::is_signed<T>::value) ? TypeId::kInt32 :
+      (sizeof(T) == 4 && !std::is_signed<T>::value) ? TypeId::kUInt32 :
+      (sizeof(T) == 8 &&  std::is_signed<T>::value) ? TypeId::kInt64 :
+      (sizeof(T) == 8 && !std::is_signed<T>::value) ? TypeId::kUInt64 : TypeId::kVoid)
+  };
+};
+
+template<typename T>
+struct TypeIdOfT_ByCategory<T, kTypeCategoryFloatingPoint> {
+  enum : uint32_t {
+    kTypeId = uint32_t(
+      (sizeof(T) == 4 ) ? TypeId::kFloat32 :
+      (sizeof(T) == 8 ) ? TypeId::kFloat64 :
+      (sizeof(T) >= 10) ? TypeId::kFloat80 : TypeId::kVoid)
+  };
+};
+
+template<typename T>
+struct TypeIdOfT_ByCategory<T, kTypeCategoryEnum>
+  : public TypeIdOfT_ByCategory<typename std::underlying_type<T>::type, kTypeCategoryIntegral> {};
+
+template<typename T>
+struct TypeIdOfT_ByCategory<T, kTypeCategoryFunction> {
+  enum : uint32_t {
+    kTypeId = uint32_t(TypeId::kUIntPtr)
+  };
+};
+//! \endcond
+
+//! TypeIdOfT<> template allows to get a TypeId from a C++ type `T`.
+#ifdef _DOXYGEN
+template<typename T>
+struct TypeIdOfT {
+  //! TypeId of C++ type `T`.
+  static constexpr TypeId kTypeId = _TypeIdDeducedAtCompileTime_;
+};
+#else
+template<typename T>
+struct TypeIdOfT
+  : public TypeIdOfT_ByCategory<T,
+    std::is_enum<T>::value           ? kTypeCategoryEnum          :
+    std::is_integral<T>::value       ? kTypeCategoryIntegral      :
+    std::is_floating_point<T>::value ? kTypeCategoryFloatingPoint :
+    std::is_function<T>::value       ? kTypeCategoryFunction      : kTypeCategoryUnknown> {};
+#endif
+
+//! \cond
+template<typename T>
+struct TypeIdOfT<T*> {
+  enum : uint32_t {
+    kTypeId = uint32_t(TypeId::kUIntPtr)
+  };
+};
+
+template<typename T>
+struct TypeIdOfT<T&> {
+  enum : uint32_t {
+    kTypeId = uint32_t(TypeId::kUIntPtr)
+  };
+};
+//! \endcond
+
+//! Returns a corresponding \ref TypeId of `T` type.
+template<typename T>
+static inline constexpr TypeId typeIdOfT() noexcept { return TypeId(TypeIdOfT<T>::kTypeId); }
+
+//! Returns offset needed to convert a `kIntPtr` and `kUIntPtr` TypeId into a type that matches `registerSize`
+//! (general-purpose register size). If you find such TypeId it's then only about adding the offset to it.
+//!
+//! For example:
+//!
+//! ```
+//! uint32_t registerSize = /* 4 or 8 */;
+//! uint32_t deabstractDelta = TypeUtils::deabstractDeltaOfSize(registerSize);
+//!
+//! TypeId typeId = 'some type-id';
+//!
+//! // Normalize some typeId into a non-abstract typeId.
+//! if (TypeUtils::isAbstract(typeId)) typeId += deabstractDelta;
+//!
+//! // The same, but by using TypeUtils::deabstract() function.
+//! typeId = TypeUtils::deabstract(typeId, deabstractDelta);
+//! ```
+static inline constexpr uint32_t deabstractDeltaOfSize(uint32_t registerSize) noexcept {
+  return registerSize >= 8 ? uint32_t(TypeId::kInt64) - uint32_t(TypeId::kIntPtr)
+                           : uint32_t(TypeId::kInt32) - uint32_t(TypeId::kIntPtr);
+}
+
+//! Deabstracts a given `typeId` into a native type by using `deabstractDelta`, which was previously
+//! calculated by calling \ref deabstractDeltaOfSize() with a target native register size.
+static inline constexpr TypeId deabstract(TypeId typeId, uint32_t deabstractDelta) noexcept {
+  return isAbstract(typeId) ? TypeId(uint32_t(typeId) + deabstractDelta) : typeId;
+}
+
+static inline constexpr TypeId scalarToVector(TypeId scalarTypeId, TypeId vecStartId) noexcept {
+  return TypeId(uint32_t(vecStartId) + uint32_t(scalarTypeId) - uint32_t(TypeId::kInt8));
+}
+
+} // {TypeUtils}
+
+//! Provides type identifiers that can be used in templates instead of native types.
+namespace Type {
+
+//! bool as C++ type-name.
+struct Bool {};
+//! int8_t as C++ type-name.
+struct Int8 {};
+//! uint8_t as C++ type-name.
+struct UInt8 {};
+//! int16_t as C++ type-name.
+struct Int16 {};
+//! uint16_t as C++ type-name.
+struct UInt16 {};
+//! int32_t as C++ type-name.
+struct Int32 {};
+//! uint32_t as C++ type-name.
+struct UInt32 {};
+//! int64_t as C++ type-name.
+struct Int64 {};
+//! uint64_t as C++ type-name.
+struct UInt64 {};
+//! intptr_t as C++ type-name.
+struct IntPtr {};
+//! uintptr_t as C++ type-name.
+struct UIntPtr {};
+//! float as C++ type-name.
+struct Float32 {};
+//! double as C++ type-name.
+struct Float64 {};
+
+} // {Type}
+
+//! \cond
+#define ASMJIT_DEFINE_TYPE_ID(T, TYPE_ID) \
+namespace TypeUtils {                     \
+  template<>                              \
+  struct TypeIdOfT<T> {                   \
+    enum : uint32_t {                     \
+      kTypeId = uint32_t(TYPE_ID)         \
+    };                                    \
+  };                                      \
+}
+
+ASMJIT_DEFINE_TYPE_ID(void         , TypeId::kVoid);
+ASMJIT_DEFINE_TYPE_ID(Type::Bool   , TypeId::kUInt8);
+ASMJIT_DEFINE_TYPE_ID(Type::Int8   , TypeId::kInt8);
+ASMJIT_DEFINE_TYPE_ID(Type::UInt8  , TypeId::kUInt8);
+ASMJIT_DEFINE_TYPE_ID(Type::Int16  , TypeId::kInt16);
+ASMJIT_DEFINE_TYPE_ID(Type::UInt16 , TypeId::kUInt16);
+ASMJIT_DEFINE_TYPE_ID(Type::Int32  , TypeId::kInt32);
+ASMJIT_DEFINE_TYPE_ID(Type::UInt32 , TypeId::kUInt32);
+ASMJIT_DEFINE_TYPE_ID(Type::Int64  , TypeId::kInt64);
+ASMJIT_DEFINE_TYPE_ID(Type::UInt64 , TypeId::kUInt64);
+ASMJIT_DEFINE_TYPE_ID(Type::IntPtr , TypeId::kIntPtr);
+ASMJIT_DEFINE_TYPE_ID(Type::UIntPtr, TypeId::kUIntPtr);
+ASMJIT_DEFINE_TYPE_ID(Type::Float32, TypeId::kFloat32);
+ASMJIT_DEFINE_TYPE_ID(Type::Float64, TypeId::kFloat64);
+//! \endcond
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_TYPE_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/virtmem.cpp b/lib/lepton/asmjit/core/virtmem.cpp
new file mode 100644
index 0000000000..43766ef2cd
--- /dev/null
+++ b/lib/lepton/asmjit/core/virtmem.cpp
@@ -0,0 +1,722 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_JIT
+
+#include "../core/osutils.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/virtmem.h"
+
+#if !defined(_WIN32)
+  #include <errno.h>
+  #include <fcntl.h>
+  #include <sys/mman.h>
+  #include <sys/stat.h>
+  #include <sys/types.h>
+  #include <unistd.h>
+
+  // Linux has a `memfd_create` syscall that we would like to use, if available.
+  #if defined(__linux__)
+    #include <sys/syscall.h>
+  #endif
+
+  // Apple recently introduced MAP_JIT flag, which we want to use.
+  #if defined(__APPLE__)
+    #include <pthread.h>
+    #include <TargetConditionals.h>
+    #if TARGET_OS_OSX
+      #include <sys/utsname.h>
+      #include <libkern/OSCacheControl.h> // sys_icache_invalidate().
+    #endif
+    // Older SDK doesn't define `MAP_JIT`.
+    #ifndef MAP_JIT
+      #define MAP_JIT 0x800
+    #endif
+  #endif
+
+  // BSD/MAC: `MAP_ANONYMOUS` is not defined, `MAP_ANON` is.
+  #if !defined(MAP_ANONYMOUS)
+    #define MAP_ANONYMOUS MAP_ANON
+  #endif
+#endif
+
+#include <atomic>
+
+#if defined(__APPLE__) || defined(__BIONIC__)
+  #define ASMJIT_VM_SHM_DETECT 0
+#else
+  #define ASMJIT_VM_SHM_DETECT 1
+#endif
+
+// Android NDK doesn't provide `shm_open()` and `shm_unlink()`.
+#if defined(__BIONIC__)
+  #define ASMJIT_VM_SHM_AVAILABLE 0
+#else
+  #define ASMJIT_VM_SHM_AVAILABLE 1
+#endif
+
+#if defined(__APPLE__) && ASMJIT_ARCH_ARM >= 64
+  #define ASMJIT_HAS_PTHREAD_JIT_WRITE_PROTECT_NP
+#endif
+
+ASMJIT_BEGIN_SUB_NAMESPACE(VirtMem)
+
+// Virtual Memory Utilities
+// ========================
+
+static const MemoryFlags dualMappingFilter[2] = {
+  MemoryFlags::kAccessWrite | MemoryFlags::kMMapMaxAccessWrite,
+  MemoryFlags::kAccessExecute | MemoryFlags::kMMapMaxAccessExecute
+};
+
+// Virtual Memory [Windows]
+// ========================
+
+#if defined(_WIN32)
+
+struct ScopedHandle {
+  inline ScopedHandle() noexcept
+    : value(nullptr) {}
+
+  inline ~ScopedHandle() noexcept {
+    if (value != nullptr)
+      ::CloseHandle(value);
+  }
+
+  HANDLE value;
+};
+
+static void getVMInfo(Info& vmInfo) noexcept {
+  SYSTEM_INFO systemInfo;
+
+  ::GetSystemInfo(&systemInfo);
+  vmInfo.pageSize = Support::alignUpPowerOf2<uint32_t>(systemInfo.dwPageSize);
+  vmInfo.pageGranularity = systemInfo.dwAllocationGranularity;
+}
+
+// Returns windows-specific protectFlags from \ref MemoryFlags.
+static DWORD protectFlagsFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
+  DWORD protectFlags;
+
+  // READ|WRITE|EXECUTE.
+  if (Support::test(memoryFlags, MemoryFlags::kAccessExecute))
+    protectFlags = Support::test(memoryFlags, MemoryFlags::kAccessWrite) ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ;
+  else if (Support::test(memoryFlags, MemoryFlags::kAccessRW))
+    protectFlags = Support::test(memoryFlags, MemoryFlags::kAccessWrite) ? PAGE_READWRITE : PAGE_READONLY;
+  else
+    protectFlags = PAGE_NOACCESS;
+
+  // Any other flags to consider?
+  return protectFlags;
+}
+
+static DWORD desiredAccessFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
+  DWORD access = Support::test(memoryFlags, MemoryFlags::kAccessWrite) ? FILE_MAP_WRITE : FILE_MAP_READ;
+  if (Support::test(memoryFlags, MemoryFlags::kAccessExecute))
+    access |= FILE_MAP_EXECUTE;
+  return access;
+}
+
+static HardenedRuntimeFlags getHardenedRuntimeFlags() noexcept {
+  return HardenedRuntimeFlags::kNone;
+}
+
+Error alloc(void** p, size_t size, MemoryFlags memoryFlags) noexcept {
+  *p = nullptr;
+  if (size == 0)
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  DWORD protectFlags = protectFlagsFromMemoryFlags(memoryFlags);
+  void* result = ::VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, protectFlags);
+
+  if (!result)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  *p = result;
+  return kErrorOk;
+}
+
+Error release(void* p, size_t size) noexcept {
+  DebugUtils::unused(size);
+  if (ASMJIT_UNLIKELY(!::VirtualFree(p, 0, MEM_RELEASE)))
+    return DebugUtils::errored(kErrorInvalidArgument);
+  return kErrorOk;
+}
+
+Error protect(void* p, size_t size, MemoryFlags memoryFlags) noexcept {
+  DWORD protectFlags = protectFlagsFromMemoryFlags(memoryFlags);
+  DWORD oldFlags;
+
+  if (::VirtualProtect(p, size, protectFlags, &oldFlags))
+    return kErrorOk;
+
+  return DebugUtils::errored(kErrorInvalidArgument);
+}
+
+Error allocDualMapping(DualMapping* dm, size_t size, MemoryFlags memoryFlags) noexcept {
+  dm->rx = nullptr;
+  dm->rw = nullptr;
+
+  if (size == 0)
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  ScopedHandle handle;
+  handle.value = ::CreateFileMappingW(
+    INVALID_HANDLE_VALUE,
+    nullptr,
+    PAGE_EXECUTE_READWRITE,
+    (DWORD)(uint64_t(size) >> 32),
+    (DWORD)(size & 0xFFFFFFFFu),
+    nullptr);
+
+  if (ASMJIT_UNLIKELY(!handle.value))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  void* ptr[2];
+  for (uint32_t i = 0; i < 2; i++) {
+    MemoryFlags accessFlags = memoryFlags & ~dualMappingFilter[i];
+    DWORD desiredAccess = desiredAccessFromMemoryFlags(accessFlags);
+    ptr[i] = ::MapViewOfFile(handle.value, desiredAccess, 0, 0, size);
+
+    if (ptr[i] == nullptr) {
+      if (i == 0)
+        ::UnmapViewOfFile(ptr[0]);
+      return DebugUtils::errored(kErrorOutOfMemory);
+    }
+  }
+
+  dm->rx = ptr[0];
+  dm->rw = ptr[1];
+  return kErrorOk;
+}
+
+Error releaseDualMapping(DualMapping* dm, size_t size) noexcept {
+  DebugUtils::unused(size);
+  bool failed = false;
+
+  if (!::UnmapViewOfFile(dm->rx))
+    failed = true;
+
+  if (dm->rx != dm->rw && !UnmapViewOfFile(dm->rw))
+    failed = true;
+
+  if (failed)
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  dm->rx = nullptr;
+  dm->rw = nullptr;
+  return kErrorOk;
+}
+
+#endif
+
+// Virtual Memory [Posix]
+// ======================
+
+#if !defined(_WIN32)
+
+static void getVMInfo(Info& vmInfo) noexcept {
+  uint32_t pageSize = uint32_t(::getpagesize());
+
+  vmInfo.pageSize = pageSize;
+  vmInfo.pageGranularity = Support::max<uint32_t>(pageSize, 65536);
+}
+
+#if !defined(SHM_ANON)
+static const char* getTmpDir() noexcept {
+  const char* tmpDir = getenv("TMPDIR");
+  return tmpDir ? tmpDir : "/tmp";
+}
+#endif
+
+// Translates libc errors specific to VirtualMemory mapping to `asmjit::Error`.
+static Error asmjitErrorFromErrno(int e) noexcept {
+  switch (e) {
+    case EACCES:
+    case EAGAIN:
+    case ENODEV:
+    case EPERM:
+      return kErrorInvalidState;
+
+    case EFBIG:
+    case ENOMEM:
+    case EOVERFLOW:
+      return kErrorOutOfMemory;
+
+    case EMFILE:
+    case ENFILE:
+      return kErrorTooManyHandles;
+
+    default:
+      return kErrorInvalidArgument;
+  }
+}
+
+// Some operating systems don't allow /dev/shm to be executable. On Linux this happens when /dev/shm is mounted with
+// 'noexec', which is enforced by systemd. Other operating systems like MacOS also restrict executable permissions
+// regarding /dev/shm, so we use a runtime detection before attempting to allocate executable memory. Sometimes we
+// don't need the detection as we know it would always result in `ShmStrategy::kTmpDir`.
+enum class ShmStrategy : uint32_t {
+  kUnknown = 0,
+  kDevShm = 1,
+  kTmpDir = 2
+};
+
+class AnonymousMemory {
+public:
+  enum FileType : uint32_t {
+    kFileTypeNone,
+    kFileTypeShm,
+    kFileTypeTmp
+  };
+
+  int _fd;
+  FileType _fileType;
+  StringTmp<128> _tmpName;
+
+  inline AnonymousMemory() noexcept
+    : _fd(-1),
+      _fileType(kFileTypeNone),
+      _tmpName() {}
+
+  inline ~AnonymousMemory() noexcept {
+    unlink();
+    close();
+  }
+
+  inline int fd() const noexcept { return _fd; }
+
+  Error open(bool preferTmpOverDevShm) noexcept {
+#if defined(__linux__) && defined(__NR_memfd_create)
+    // Linux specific 'memfd_create' - if the syscall returns `ENOSYS` it means
+    // it's not available and we will never call it again (would be pointless).
+
+    // Zero initialized, if ever changed to '1' that would mean the syscall is not
+    // available and we must use `shm_open()` and `shm_unlink()`.
+    static volatile uint32_t memfd_create_not_supported;
+
+    if (!memfd_create_not_supported) {
+      _fd = (int)syscall(__NR_memfd_create, "vmem", 0);
+      if (ASMJIT_LIKELY(_fd >= 0))
+        return kErrorOk;
+
+      int e = errno;
+      if (e == ENOSYS)
+        memfd_create_not_supported = 1;
+      else
+        return DebugUtils::errored(asmjitErrorFromErrno(e));
+    }
+#endif
+
+#if defined(SHM_ANON)
+    // Originally FreeBSD extension, apparently works in other BSDs too.
+    DebugUtils::unused(preferTmpOverDevShm);
+    _fd = ::shm_open(SHM_ANON, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+
+    if (ASMJIT_LIKELY(_fd >= 0))
+      return kErrorOk;
+    else
+      return DebugUtils::errored(asmjitErrorFromErrno(errno));
+#else
+    // POSIX API. We have to generate somehow a unique name. This is nothing cryptographic, just using a bit from
+    // the stack address to always have a different base for different threads (as threads have their own stack)
+    // and retries for avoiding collisions. We use `shm_open()` with flags that require creation of the file so we
+    // never open an existing shared memory.
+    static std::atomic<uint32_t> internalCounter;
+    const char* kShmFormat = "/shm-id-%016llX";
+
+    uint32_t kRetryCount = 100;
+    uint64_t bits = ((uintptr_t)(void*)this) & 0x55555555u;
+
+    for (uint32_t i = 0; i < kRetryCount; i++) {
+      bits -= uint64_t(OSUtils::getTickCount()) * 773703683;
+      bits = ((bits >> 14) ^ (bits << 6)) + uint64_t(++internalCounter) * 10619863;
+
+      bool useTmp = !ASMJIT_VM_SHM_DETECT || preferTmpOverDevShm;
+
+      if (useTmp) {
+        _tmpName.assign(getTmpDir());
+        _tmpName.appendFormat(kShmFormat, (unsigned long long)bits);
+        _fd = ::open(_tmpName.data(), O_RDWR | O_CREAT | O_EXCL, 0);
+        if (ASMJIT_LIKELY(_fd >= 0)) {
+          _fileType = kFileTypeTmp;
+          return kErrorOk;
+        }
+      }
+#if ASMJIT_VM_SHM_AVAILABLE
+      else {
+        _tmpName.assignFormat(kShmFormat, (unsigned long long)bits);
+        _fd = ::shm_open(_tmpName.data(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+        if (ASMJIT_LIKELY(_fd >= 0)) {
+          _fileType = kFileTypeShm;
+          return kErrorOk;
+        }
+      }
+#endif
+
+      int e = errno;
+      if (e != EEXIST)
+        return DebugUtils::errored(asmjitErrorFromErrno(e));
+    }
+
+    return DebugUtils::errored(kErrorFailedToOpenAnonymousMemory);
+#endif
+  }
+
+  void unlink() noexcept {
+    FileType type = _fileType;
+    _fileType = kFileTypeNone;
+
+#if ASMJIT_VM_SHM_AVAILABLE
+    if (type == kFileTypeShm) {
+      ::shm_unlink(_tmpName.data());
+      return;
+    }
+#endif
+
+    if (type == kFileTypeTmp) {
+      ::unlink(_tmpName.data());
+      return;
+    }
+  }
+
+  void close() noexcept {
+    if (_fd >= 0) {
+      ::close(_fd);
+      _fd = -1;
+    }
+  }
+
+  Error allocate(size_t size) noexcept {
+    // TODO: Improve this by using `posix_fallocate()` when available.
+    if (ftruncate(_fd, off_t(size)) != 0)
+      return DebugUtils::errored(asmjitErrorFromErrno(errno));
+
+    return kErrorOk;
+  }
+};
+
+// Returns `mmap()` protection flags from \ref MemoryFlags.
+static int mmProtFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
+  int protection = 0;
+  if (Support::test(memoryFlags, MemoryFlags::kAccessRead)) protection |= PROT_READ;
+  if (Support::test(memoryFlags, MemoryFlags::kAccessWrite)) protection |= PROT_READ | PROT_WRITE;
+  if (Support::test(memoryFlags, MemoryFlags::kAccessExecute)) protection |= PROT_READ | PROT_EXEC;
+  return protection;
+}
+
+#if defined(__APPLE__)
+// Detects whether the current process is hardened, which means that pages that have WRITE and EXECUTABLE flags cannot
+// be allocated without MAP_JIT flag.
+static inline bool hasHardenedRuntimeMacOS() noexcept {
+#if TARGET_OS_OSX && ASMJIT_ARCH_ARM >= 64
+  // MacOS on AArch64 has always hardened runtime enabled.
+  return true;
+#else
+  static std::atomic<uint32_t> globalHardenedFlag;
+
+  enum HardenedFlag : uint32_t {
+    kHardenedFlagUnknown  = 0,
+    kHardenedFlagDisabled = 1,
+    kHardenedFlagEnabled  = 2
+  };
+
+  uint32_t flag = globalHardenedFlag.load();
+  if (flag == kHardenedFlagUnknown) {
+    size_t pageSize = ::getpagesize();
+
+    void* ptr = mmap(nullptr, pageSize, PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (ptr == MAP_FAILED) {
+      flag = kHardenedFlagEnabled;
+    }
+    else {
+      flag = kHardenedFlagDisabled;
+      munmap(ptr, pageSize);
+    }
+    globalHardenedFlag.store(flag);
+  }
+
+  return flag == kHardenedFlagEnabled;
+#endif
+}
+
+static inline bool hasMapJitSupportMacOS() noexcept {
+#if TARGET_OS_OSX && ASMJIT_ARCH_ARM >= 64
+  // MacOS for 64-bit AArch architecture always uses hardened runtime. Some documentation can be found here:
+  //   - https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon
+  return true;
+#elif TARGET_OS_OSX
+  // MAP_JIT flag required to run unsigned JIT code is only supported by kernel version 10.14+ (Mojave) and IOS.
+  static std::atomic<uint32_t> globalVersion;
+
+  int ver = globalVersion.load();
+  if (!ver) {
+    struct utsname osname {};
+    uname(&osname);
+    ver = atoi(osname.release);
+    globalVersion.store(ver);
+  }
+  return ver >= 18;
+#else
+  // Assume it's available.
+  return true;
+#endif
+}
+#endif // __APPLE__
+
+// Detects whether the current process is hardened, which means that pages that have WRITE and EXECUTABLE flags
+// cannot be normally allocated. On MacOS such allocation requires MAP_JIT flag.
+static inline bool hasHardenedRuntime() noexcept {
+#if defined(__APPLE__)
+  return hasHardenedRuntimeMacOS();
+#else
+  return false;
+#endif
+}
+
+// Detects whether MAP_JIT is available.
+static inline bool hasMapJitSupport() noexcept {
+#if defined(__APPLE__)
+  return hasMapJitSupportMacOS();
+#else
+  return false;
+#endif
+}
+
+// Returns either MAP_JIT or 0 based on `flags` and the host operating system.
+static inline int mmMapJitFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
+#if defined(__APPLE__)
+  // Always use MAP_JIT flag if user asked for it (could be used for testing on non-hardened processes) and detect
+  // whether it must be used when the process is actually hardened (in that case it doesn't make sense to rely on
+  // user `memoryFlags`).
+  bool useMapJit = Support::test(memoryFlags, MemoryFlags::kMMapEnableMapJit) || hasHardenedRuntime();
+  if (useMapJit)
+    return hasMapJitSupport() ? int(MAP_JIT) : 0;
+  else
+    return 0;
+#else
+  DebugUtils::unused(memoryFlags);
+  return 0;
+#endif
+}
+
+// Returns BSD-specific `PROT_MAX()` flags.
+static inline int mmMaxProtFromMemoryFlags(MemoryFlags memoryFlags) noexcept {
+#if defined(PROT_MAX)
+  static constexpr uint32_t kMaxProtShift = Support::ConstCTZ<uint32_t(MemoryFlags::kMMapMaxAccessRead)>::value;
+
+  if (Support::test(memoryFlags, MemoryFlags::kMMapMaxAccessReadWrite | MemoryFlags::kMMapMaxAccessExecute))
+    return PROT_MAX(mmProtFromMemoryFlags((MemoryFlags)(uint32_t(memoryFlags) >> kMaxProtShift)));
+  else
+    return 0;
+#else
+  DebugUtils::unused(memoryFlags);
+  return 0;
+#endif
+}
+
+#if ASMJIT_VM_SHM_DETECT
+static Error detectShmStrategy(ShmStrategy* strategyOut) noexcept {
+  AnonymousMemory anonMem;
+  Info vmInfo = info();
+
+  ASMJIT_PROPAGATE(anonMem.open(false));
+  ASMJIT_PROPAGATE(anonMem.allocate(vmInfo.pageSize));
+
+  void* ptr = mmap(nullptr, vmInfo.pageSize, PROT_READ | PROT_EXEC, MAP_SHARED, anonMem.fd(), 0);
+  if (ptr == MAP_FAILED) {
+    int e = errno;
+    if (e == EINVAL) {
+      *strategyOut = ShmStrategy::kTmpDir;
+      return kErrorOk;
+    }
+    return DebugUtils::errored(asmjitErrorFromErrno(e));
+  }
+  else {
+    munmap(ptr, vmInfo.pageSize);
+    *strategyOut = ShmStrategy::kDevShm;
+    return kErrorOk;
+  }
+}
+#endif
+
+static Error getShmStrategy(ShmStrategy* strategyOut) noexcept {
+#if ASMJIT_VM_SHM_DETECT
+  // Initially don't assume anything. It has to be tested whether '/dev/shm' was mounted with 'noexec' flag or not.
+  static std::atomic<uint32_t> globalShmStrategy;
+
+  ShmStrategy strategy = static_cast<ShmStrategy>(globalShmStrategy.load());
+  if (strategy == ShmStrategy::kUnknown) {
+    ASMJIT_PROPAGATE(detectShmStrategy(&strategy));
+    globalShmStrategy.store(static_cast<uint32_t>(strategy));
+  }
+
+  *strategyOut = strategy;
+  return kErrorOk;
+#else
+  *strategyOut = ShmStrategy::kTmpDir;
+  return kErrorOk;
+#endif
+}
+
+static HardenedRuntimeFlags getHardenedRuntimeFlags() noexcept {
+  HardenedRuntimeFlags hrFlags = HardenedRuntimeFlags::kNone;
+
+  if (hasHardenedRuntime())
+    hrFlags |= HardenedRuntimeFlags::kEnabled;
+
+  if (hasMapJitSupport())
+    hrFlags |= HardenedRuntimeFlags::kMapJit;
+
+  return hrFlags;
+}
+
+Error alloc(void** p, size_t size, MemoryFlags memoryFlags) noexcept {
+  *p = nullptr;
+  if (size == 0)
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  int protection = mmProtFromMemoryFlags(memoryFlags) | mmMaxProtFromMemoryFlags(memoryFlags);
+  int mmFlags = MAP_PRIVATE | MAP_ANONYMOUS | mmMapJitFromMemoryFlags(memoryFlags);
+
+  void* ptr = mmap(nullptr, size, protection, mmFlags, -1, 0);
+  if (ptr == MAP_FAILED)
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  *p = ptr;
+  return kErrorOk;
+}
+
+Error release(void* p, size_t size) noexcept {
+  if (ASMJIT_UNLIKELY(munmap(p, size) != 0))
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  return kErrorOk;
+}
+
+
+Error protect(void* p, size_t size, MemoryFlags memoryFlags) noexcept {
+  int protection = mmProtFromMemoryFlags(memoryFlags);
+  if (mprotect(p, size, protection) == 0)
+    return kErrorOk;
+
+  return DebugUtils::errored(kErrorInvalidArgument);
+}
+
+Error allocDualMapping(DualMapping* dm, size_t size, MemoryFlags memoryFlags) noexcept {
+  dm->rx = nullptr;
+  dm->rw = nullptr;
+
+  if (off_t(size) <= 0)
+    return DebugUtils::errored(size == 0 ? kErrorInvalidArgument : kErrorTooLarge);
+
+  bool preferTmpOverDevShm = Support::test(memoryFlags, MemoryFlags::kMappingPreferTmp);
+  if (!preferTmpOverDevShm) {
+    ShmStrategy strategy;
+    ASMJIT_PROPAGATE(getShmStrategy(&strategy));
+    preferTmpOverDevShm = (strategy == ShmStrategy::kTmpDir);
+  }
+
+  AnonymousMemory anonMem;
+  ASMJIT_PROPAGATE(anonMem.open(preferTmpOverDevShm));
+  ASMJIT_PROPAGATE(anonMem.allocate(size));
+
+  void* ptr[2];
+  for (uint32_t i = 0; i < 2; i++) {
+    MemoryFlags accessFlags = memoryFlags & ~dualMappingFilter[i];
+    int protection = mmProtFromMemoryFlags(accessFlags) | mmMaxProtFromMemoryFlags(accessFlags);
+
+    ptr[i] = mmap(nullptr, size, protection, MAP_SHARED, anonMem.fd(), 0);
+    if (ptr[i] == MAP_FAILED) {
+      // Get the error now before `munmap()` has a chance to clobber it.
+      int e = errno;
+      if (i == 1)
+        munmap(ptr[0], size);
+      return DebugUtils::errored(asmjitErrorFromErrno(e));
+    }
+  }
+
+  dm->rx = ptr[0];
+  dm->rw = ptr[1];
+  return kErrorOk;
+}
+
+Error releaseDualMapping(DualMapping* dm, size_t size) noexcept {
+  Error err = release(dm->rx, size);
+  if (dm->rx != dm->rw)
+    err |= release(dm->rw, size);
+
+  if (err)
+    return DebugUtils::errored(kErrorInvalidArgument);
+
+  dm->rx = nullptr;
+  dm->rw = nullptr;
+  return kErrorOk;
+}
+#endif
+
+// Virtual Memory - Flush Instruction Cache
+// ========================================
+
+void flushInstructionCache(void* p, size_t size) noexcept {
+#if ASMJIT_ARCH_X86
+  // X86/X86_64 architecture doesn't require to do anything to flush instruction cache.
+  DebugUtils::unused(p, size);
+#elif defined(__APPLE__)
+  sys_icache_invalidate(p, size);
+#elif defined(_WIN32)
+  // Windows has a built-in support in `kernel32.dll`.
+  FlushInstructionCache(GetCurrentProcess(), p, size);
+#elif defined(__GNUC__)
+  char* start = static_cast<char*>(p);
+  char* end = start + size;
+  __builtin___clear_cache(start, end);
+#else
+  #pragma message("asmjit::VirtMem::flushInstructionCache() doesn't have implementation for the target OS and compiler")
+  DebugUtils::unused(p, size);
+#endif
+}
+
+// Virtual Memory - Memory Info
+// ============================
+
+Info info() noexcept {
+  static std::atomic<uint32_t> vmInfoInitialized;
+  static Info vmInfo;
+
+  if (!vmInfoInitialized.load()) {
+    Info localMemInfo;
+    getVMInfo(localMemInfo);
+
+    vmInfo = localMemInfo;
+    vmInfoInitialized.store(1u);
+  }
+
+  return vmInfo;
+}
+
+// Virtual Memory - Hardened Runtime Info
+// ======================================
+
+HardenedRuntimeInfo hardenedRuntimeInfo() noexcept {
+  return HardenedRuntimeInfo { getHardenedRuntimeFlags() };
+}
+
+// Virtual Memory - Project JIT Memory
+// ===================================
+
+void protectJitMemory(ProtectJitAccess access) noexcept {
+#if defined(ASMJIT_HAS_PTHREAD_JIT_WRITE_PROTECT_NP)
+  pthread_jit_write_protect_np(static_cast<uint32_t>(access));
+#else
+  DebugUtils::unused(access);
+#endif
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif
diff --git a/lib/lepton/asmjit/core/virtmem.h b/lib/lepton/asmjit/core/virtmem.h
new file mode 100644
index 0000000000..50f09457eb
--- /dev/null
+++ b/lib/lepton/asmjit/core/virtmem.h
@@ -0,0 +1,242 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_VIRTMEM_H_INCLUDED
+#define ASMJIT_CORE_VIRTMEM_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_JIT
+
+#include "../core/globals.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_virtual_memory
+//! \{
+
+//! Virtual memory management.
+namespace VirtMem {
+
+//! Flushes instruction cache in the given region.
+//!
+//! Only useful on non-x86 architectures, however, it's a good practice to call it on any platform to make your
+//! code more portable.
+ASMJIT_API void flushInstructionCache(void* p, size_t size) noexcept;
+
+//! Virtual memory information.
+struct Info {
+  //! Virtual memory page size.
+  uint32_t pageSize;
+  //! Virtual memory page granularity.
+  uint32_t pageGranularity;
+};
+
+//! Returns virtual memory information, see `VirtMem::Info` for more details.
+ASMJIT_API Info info() noexcept;
+
+//! Virtual memory access and mmap-specific flags.
+enum class MemoryFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+
+  //! Memory is readable.
+  kAccessRead = 0x00000001u,
+
+  //! Memory is writable.
+  kAccessWrite = 0x00000002u,
+
+  //! Memory is executable.
+  kAccessExecute = 0x00000004u,
+
+  //! A combination of \ref MemoryFlags::kAccessRead and \ref MemoryFlags::kAccessWrite.
+  kAccessReadWrite = kAccessRead | kAccessWrite,
+
+  //! A combination of \ref MemoryFlags::kAccessRead, \ref MemoryFlags::kAccessWrite.
+  kAccessRW = kAccessRead | kAccessWrite,
+
+  //! A combination of \ref MemoryFlags::kAccessRead and \ref MemoryFlags::kAccessExecute.
+  kAccessRX = kAccessRead | kAccessExecute,
+
+  //! A combination of \ref MemoryFlags::kAccessRead, \ref MemoryFlags::kAccessWrite, and
+  //! \ref MemoryFlags::kAccessExecute.
+  kAccessRWX = kAccessRead | kAccessWrite | kAccessExecute,
+
+  //! Use a `MAP_JIT` flag available on Apple platforms (introduced by Mojave), which allows JIT code to be executed
+  //! in MAC bundles. This flag is not turned on by default, because when a process uses `fork()` the child process
+  //! has no access to the pages mapped with `MAP_JIT`, which could break code that doesn't expect this behavior.
+  //!
+  //! \note This flag can only be used with \ref VirtMem::alloc().
+  kMMapEnableMapJit = 0x00000010u,
+
+  //! Pass `PROT_MAX(PROT_READ)` to mmap() on platforms that support `PROT_MAX`.
+  //!
+  //! \note This flag can only be used with \ref VirtMem::alloc().
+  kMMapMaxAccessRead = 0x00000020u,
+  //! Pass `PROT_MAX(PROT_WRITE)` to mmap() on platforms that support `PROT_MAX`.
+  //!
+  //! \note This flag can only be used with \ref VirtMem::alloc().
+  kMMapMaxAccessWrite = 0x00000040u,
+  //! Pass `PROT_MAX(PROT_EXEC)` to mmap() on platforms that support `PROT_MAX`.
+  //!
+  //! \note This flag can only be used with \ref VirtMem::alloc().
+  kMMapMaxAccessExecute = 0x00000080u,
+
+  //! A combination of \ref MemoryFlags::kMMapMaxAccessRead and \ref MemoryFlags::kMMapMaxAccessWrite.
+  kMMapMaxAccessReadWrite = kMMapMaxAccessRead | kMMapMaxAccessWrite,
+
+  //! A combination of \ref MemoryFlags::kMMapMaxAccessRead and \ref MemoryFlags::kMMapMaxAccessWrite.
+  kMMapMaxAccessRW = kMMapMaxAccessRead | kMMapMaxAccessWrite,
+
+  //! A combination of \ref MemoryFlags::kMMapMaxAccessRead and \ref MemoryFlags::kMMapMaxAccessExecute.
+  kMMapMaxAccessRX = kMMapMaxAccessRead | kMMapMaxAccessExecute,
+
+  //! A combination of \ref MemoryFlags::kMMapMaxAccessRead, \ref MemoryFlags::kMMapMaxAccessWrite, \ref
+  //! MemoryFlags::kMMapMaxAccessExecute.
+  kMMapMaxAccessRWX = kMMapMaxAccessRead | kMMapMaxAccessWrite | kMMapMaxAccessExecute,
+
+  //! Not an access flag, only used by `allocDualMapping()` to override the default allocation strategy to always use
+  //! a 'tmp' directory instead of "/dev/shm" (on POSIX platforms). Please note that this flag will be ignored if the
+  //! operating system allows to allocate an executable memory by a different API than `open()` or `shm_open()`. For
+  //! example on Linux `memfd_create()` is preferred and on BSDs `shm_open(SHM_ANON, ...)` is used if SHM_ANON is
+  //! defined.
+  //!
+  //! \note This flag can only be used with \ref VirtMem::alloc().
+  kMappingPreferTmp = 0x80000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(MemoryFlags)
+
+//! Allocates virtual memory by either using `mmap()` (POSIX) or `VirtualAlloc()` (Windows).
+//!
+//! \note `size` should be aligned to page size, use \ref VirtMem::info() to obtain it. Invalid size will not be
+//! corrected by the implementation and the allocation would not succeed in such case.
+ASMJIT_API Error alloc(void** p, size_t size, MemoryFlags flags) noexcept;
+
+//! Releases virtual memory previously allocated by \ref VirtMem::alloc().
+//!
+//! \note The size must be the same as used by \ref VirtMem::alloc(). If the size is not the same value the call
+//! will fail on any POSIX system, but pass on Windows, because it's implemented differently.
+ASMJIT_API Error release(void* p, size_t size) noexcept;
+
+//! A cross-platform wrapper around `mprotect()` (POSIX) and `VirtualProtect()` (Windows).
+ASMJIT_API Error protect(void* p, size_t size, MemoryFlags flags) noexcept;
+
+//! Dual memory mapping used to map an anonymous memory into two memory regions where one region is read-only, but
+//! executable, and the second region is read+write, but not executable. See \ref VirtMem::allocDualMapping() for
+//! more details.
+struct DualMapping {
+  //! Pointer to data with 'Read+Execute' access (this memory is not writable).
+  void* rx;
+  //! Pointer to data with 'Read+Write' access (this memory is not executable).
+  void* rw;
+};
+
+//! Allocates virtual memory and creates two views of it where the first view has no write access. This is an addition
+//! to the API that should be used in cases in which the operating system either enforces W^X security policy or the
+//! application wants to use this policy by default to improve security and prevent an accidental (or purposed)
+//! self-modifying code.
+//!
+//! The memory returned in the `dm` are two independent mappings of the same shared memory region. You must use
+//! \ref VirtMem::releaseDualMapping() to release it when it's no longer needed. Never use `VirtMem::release()` to
+//! release the memory returned by `allocDualMapping()` as that would fail on Windows.
+//!
+//! \remarks Both pointers in `dm` would be set to `nullptr` if the function fails.
+ASMJIT_API Error allocDualMapping(DualMapping* dm, size_t size, MemoryFlags flags) noexcept;
+
+//! Releases virtual memory mapping previously allocated by \ref VirtMem::allocDualMapping().
+//!
+//! \remarks Both pointers in `dm` would be set to `nullptr` if the function succeeds.
+ASMJIT_API Error releaseDualMapping(DualMapping* dm, size_t size) noexcept;
+
+//! Hardened runtime flags.
+enum class HardenedRuntimeFlags : uint32_t {
+  //! No flags.
+  kNone = 0,
+
+  //! Hardened runtime is enabled - it's not possible to have "Write & Execute" memory protection. The runtime
+  //! enforces W^X (either write or execute).
+  //!
+  //! \note If the runtime is hardened it means that an operating system specific protection is used. For example on
+  //! MacOS platform it's possible to allocate memory with MAP_JIT flag and then use `pthread_jit_write_protect_np()`
+  //! to temporarily swap access permissions for the current thread. Dual mapping is also a possibility on X86/X64
+  //! architecture.
+  kEnabled = 0x00000001u,
+
+  //! Read+Write+Execute can only be allocated with MAP_JIT flag (Apple specific).
+  kMapJit = 0x00000002u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(HardenedRuntimeFlags)
+
+//! Hardened runtime information.
+struct HardenedRuntimeInfo {
+  //! Hardened runtime flags.
+  HardenedRuntimeFlags flags;
+};
+
+//! Returns runtime features provided by the OS.
+ASMJIT_API HardenedRuntimeInfo hardenedRuntimeInfo() noexcept;
+
+//! Values that can be used with `protectJitMemory()` function.
+enum class ProtectJitAccess : uint32_t {
+  //! Protect JIT memory with Read+Write permissions.
+  kReadWrite = 0,
+  //! Protect JIT memory with Read+Execute permissions.
+  kReadExecute = 1
+};
+
+//! Protects access of memory mapped with MAP_JIT flag for the current thread.
+//!
+//! \note This feature is only available on Apple hardware (AArch64) at the moment and and uses a non-portable
+//! `pthread_jit_write_protect_np()` call when available.
+//!
+//! This function must be called before and after a memory mapped with MAP_JIT flag is modified. Example:
+//!
+//! ```
+//! void* codePtr = ...;
+//! size_t codeSize = ...;
+//!
+//! VirtMem::protectJitMemory(VirtMem::ProtectJitAccess::kReadWrite);
+//! memcpy(codePtr, source, codeSize);
+//! VirtMem::protectJitMemory(VirtMem::ProtectJitAccess::kReadExecute);
+//! VirtMem::flushInstructionCache(codePtr, codeSize);
+//! ```
+//!
+//! See \ref ProtectJitReadWriteScope, which makes it simpler than the code above.
+ASMJIT_API void protectJitMemory(ProtectJitAccess access) noexcept;
+
+//! JIT protection scope that prepares the given memory block to be written to in the current thread.
+//!
+//! It calls `VirtMem::protectJitMemory(VirtMem::ProtectJitAccess::kReadWrite)` at construction time and
+//! `VirtMem::protectJitMemory(VirtMem::ProtectJitAccess::kReadExecute)` combined with `flushInstructionCache()`
+//! in destructor. The purpose of this class is to make writing to JIT memory easier.
+class ProtectJitReadWriteScope {
+public:
+  void* _rxPtr;
+  size_t _size;
+
+  //! Makes the given memory block RW protected.
+  ASMJIT_FORCE_INLINE ProtectJitReadWriteScope(void* rxPtr, size_t size) noexcept
+    : _rxPtr(rxPtr),
+      _size(size) {
+    protectJitMemory(ProtectJitAccess::kReadWrite);
+  }
+
+  // Not copyable.
+  ProtectJitReadWriteScope(const ProtectJitReadWriteScope& other) = delete;
+
+  //! Makes the memory block RX protected again and flushes instruction cache.
+  ASMJIT_FORCE_INLINE  ~ProtectJitReadWriteScope() noexcept {
+    protectJitMemory(ProtectJitAccess::kReadExecute);
+    flushInstructionCache(_rxPtr, _size);
+  }
+};
+
+} // VirtMem
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif
+#endif // ASMJIT_CORE_VIRTMEM_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zone.cpp b/lib/lepton/asmjit/core/zone.cpp
new file mode 100644
index 0000000000..d68e110b48
--- /dev/null
+++ b/lib/lepton/asmjit/core/zone.cpp
@@ -0,0 +1,353 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/support.h"
+#include "../core/zone.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// Zone - Globals
+// ==============
+
+// Zero size block used by `Zone` that doesn't have any memory allocated. Should be allocated in read-only memory
+// and should never be modified.
+const Zone::Block Zone::_zeroBlock = { nullptr, nullptr, 0 };
+
+// Zone - Init & Reset
+// ===================
+
+void Zone::_init(size_t blockSize, size_t blockAlignment, const Support::Temporary* temporary) noexcept {
+  ASMJIT_ASSERT(blockSize >= kMinBlockSize);
+  ASMJIT_ASSERT(blockSize <= kMaxBlockSize);
+  ASMJIT_ASSERT(blockAlignment <= 64);
+
+  // Just to make the compiler happy...
+  constexpr size_t kBlockSizeMask = (Support::allOnes<size_t>() >> 4);
+  constexpr size_t kBlockAlignmentShiftMask = 0x7u;
+
+  _assignZeroBlock();
+  _blockSize = blockSize & kBlockSizeMask;
+  _isTemporary = temporary != nullptr;
+  _blockAlignmentShift = Support::ctz(blockAlignment) & kBlockAlignmentShiftMask;
+
+  // Setup the first [temporary] block, if necessary.
+  if (temporary) {
+    Block* block = temporary->data<Block>();
+    block->prev = nullptr;
+    block->next = nullptr;
+
+    ASMJIT_ASSERT(temporary->size() >= kBlockSize);
+    block->size = temporary->size() - kBlockSize;
+
+    _assignBlock(block);
+  }
+}
+
+void Zone::reset(ResetPolicy resetPolicy) noexcept {
+  Block* cur = _block;
+
+  // Can't be altered.
+  if (cur == &_zeroBlock)
+    return;
+
+  if (resetPolicy == ResetPolicy::kHard) {
+    Block* initial = const_cast<Zone::Block*>(&_zeroBlock);
+    _ptr = initial->data();
+    _end = initial->data();
+    _block = initial;
+
+    // Since cur can be in the middle of the double-linked list, we have to traverse both directions (`prev` and
+    // `next`) separately to visit all.
+    Block* next = cur->next;
+    do {
+      Block* prev = cur->prev;
+
+      // If this is the first block and this ZoneTmp is temporary then the first block is statically allocated.
+      // We cannot free it and it makes sense to keep it even when this is hard reset.
+      if (prev == nullptr && _isTemporary) {
+        cur->prev = nullptr;
+        cur->next = nullptr;
+        _assignBlock(cur);
+        break;
+      }
+
+      ::free(cur);
+      cur = prev;
+    } while (cur);
+
+    cur = next;
+    while (cur) {
+      next = cur->next;
+      ::free(cur);
+      cur = next;
+    }
+  }
+  else {
+    while (cur->prev)
+      cur = cur->prev;
+    _assignBlock(cur);
+  }
+}
+
+// Zone - Alloc
+// ============
+
+void* Zone::_alloc(size_t size, size_t alignment) noexcept {
+  Block* curBlock = _block;
+  Block* next = curBlock->next;
+
+  size_t rawBlockAlignment = blockAlignment();
+  size_t minimumAlignment = Support::max<size_t>(alignment, rawBlockAlignment);
+
+  // If the `Zone` has been cleared the current block doesn't have to be the last one. Check if there is a block
+  // that can be used instead of allocating a new one. If there is a `next` block it's completely unused, we don't
+  // have to check for remaining bytes in that case.
+  if (next) {
+    uint8_t* ptr = Support::alignUp(next->data(), minimumAlignment);
+    uint8_t* end = Support::alignDown(next->data() + next->size, rawBlockAlignment);
+
+    if (size <= (size_t)(end - ptr)) {
+      _block = next;
+      _ptr = ptr + size;
+      _end = Support::alignDown(next->data() + next->size, rawBlockAlignment);
+      return static_cast<void*>(ptr);
+    }
+  }
+
+  size_t blockAlignmentOverhead = alignment - Support::min<size_t>(alignment, Globals::kAllocAlignment);
+  size_t newSize = Support::max(blockSize(), size);
+
+  // Prevent arithmetic overflow.
+  if (ASMJIT_UNLIKELY(newSize > SIZE_MAX - kBlockSize - blockAlignmentOverhead))
+    return nullptr;
+
+  // Allocate new block - we add alignment overhead to `newSize`, which becomes the new block size, and we also add
+  // `kBlockOverhead` to the allocator as it includes members of `Zone::Block` structure.
+  newSize += blockAlignmentOverhead;
+  Block* newBlock = static_cast<Block*>(::malloc(newSize + kBlockSize));
+
+  if (ASMJIT_UNLIKELY(!newBlock))
+    return nullptr;
+
+  // Align the pointer to `minimumAlignment` and adjust the size of this block accordingly. It's the same as using
+  // `minimumAlignment - Support::alignUpDiff()`, just written differently.
+  {
+    newBlock->prev = nullptr;
+    newBlock->next = nullptr;
+    newBlock->size = newSize;
+
+    if (curBlock != &_zeroBlock) {
+      newBlock->prev = curBlock;
+      curBlock->next = newBlock;
+
+      // Does only happen if there is a next block, but the requested memory can't fit into it. In this case a new
+      // buffer is allocated and inserted between the current block and the next one.
+      if (next) {
+        newBlock->next = next;
+        next->prev = newBlock;
+      }
+    }
+
+    uint8_t* ptr = Support::alignUp(newBlock->data(), minimumAlignment);
+    uint8_t* end = Support::alignDown(newBlock->data() + newSize, rawBlockAlignment);
+
+    _ptr = ptr + size;
+    _end = end;
+    _block = newBlock;
+
+    ASMJIT_ASSERT(_ptr <= _end);
+    return static_cast<void*>(ptr);
+  }
+}
+
+void* Zone::allocZeroed(size_t size, size_t alignment) noexcept {
+  void* p = alloc(size, alignment);
+  if (ASMJIT_UNLIKELY(!p))
+    return p;
+  return memset(p, 0, size);
+}
+
+void* Zone::dup(const void* data, size_t size, bool nullTerminate) noexcept {
+  if (ASMJIT_UNLIKELY(!data || !size))
+    return nullptr;
+
+  ASMJIT_ASSERT(size != SIZE_MAX);
+  uint8_t* m = allocT<uint8_t>(size + nullTerminate);
+  if (ASMJIT_UNLIKELY(!m)) return nullptr;
+
+  memcpy(m, data, size);
+  if (nullTerminate) m[size] = '\0';
+
+  return static_cast<void*>(m);
+}
+
+char* Zone::sformat(const char* fmt, ...) noexcept {
+  if (ASMJIT_UNLIKELY(!fmt))
+    return nullptr;
+
+  char buf[512];
+  size_t size;
+  va_list ap;
+
+  va_start(ap, fmt);
+  size = unsigned(vsnprintf(buf, ASMJIT_ARRAY_SIZE(buf) - 1, fmt, ap));
+  va_end(ap);
+
+  buf[size++] = 0;
+  return static_cast<char*>(dup(buf, size));
+}
+
+// ZoneAllocator - Utilities
+// =========================
+
+#if defined(ASMJIT_BUILD_DEBUG)
+static bool ZoneAllocator_hasDynamicBlock(ZoneAllocator* self, ZoneAllocator::DynamicBlock* block) noexcept {
+  ZoneAllocator::DynamicBlock* cur = self->_dynamicBlocks;
+  while (cur) {
+    if (cur == block)
+      return true;
+    cur = cur->next;
+  }
+  return false;
+}
+#endif
+
+// ZoneAllocator - Init & Reset
+// ============================
+
+void ZoneAllocator::reset(Zone* zone) noexcept {
+  // Free dynamic blocks.
+  DynamicBlock* block = _dynamicBlocks;
+  while (block) {
+    DynamicBlock* next = block->next;
+    ::free(block);
+    block = next;
+  }
+
+  // Zero the entire class and initialize to the given `zone`.
+  memset(this, 0, sizeof(*this));
+  _zone = zone;
+}
+
+// asmjit::ZoneAllocator - Alloc & Release
+// =======================================
+
+void* ZoneAllocator::_alloc(size_t size, size_t& allocatedSize) noexcept {
+  ASMJIT_ASSERT(isInitialized());
+
+  // Use the memory pool only if the requested block has a reasonable size.
+  uint32_t slot;
+  if (_getSlotIndex(size, slot, allocatedSize)) {
+    // Slot reuse.
+    uint8_t* p = reinterpret_cast<uint8_t*>(_slots[slot]);
+    size = allocatedSize;
+
+    if (p) {
+      _slots[slot] = reinterpret_cast<Slot*>(p)->next;
+      return p;
+    }
+
+    _zone->align(kBlockAlignment);
+    p = _zone->ptr();
+    size_t remain = (size_t)(_zone->end() - p);
+
+    if (ASMJIT_LIKELY(remain >= size)) {
+      _zone->setPtr(p + size);
+      return p;
+    }
+    else {
+      // Distribute the remaining memory to suitable slots, if possible.
+      if (remain >= kLoGranularity) {
+        do {
+          size_t distSize = Support::min<size_t>(remain, kLoMaxSize);
+          uint32_t distSlot = uint32_t((distSize - kLoGranularity) / kLoGranularity);
+          ASMJIT_ASSERT(distSlot < kLoCount);
+
+          reinterpret_cast<Slot*>(p)->next = _slots[distSlot];
+          _slots[distSlot] = reinterpret_cast<Slot*>(p);
+
+          p += distSize;
+          remain -= distSize;
+        } while (remain >= kLoGranularity);
+        _zone->setPtr(p);
+      }
+
+      p = static_cast<uint8_t*>(_zone->_alloc(size, kBlockAlignment));
+      if (ASMJIT_UNLIKELY(!p)) {
+        allocatedSize = 0;
+        return nullptr;
+      }
+
+      return p;
+    }
+  }
+  else {
+    // Allocate a dynamic block.
+    size_t kBlockOverhead = sizeof(DynamicBlock) + sizeof(DynamicBlock*) + kBlockAlignment;
+
+    // Handle a possible overflow.
+    if (ASMJIT_UNLIKELY(kBlockOverhead >= SIZE_MAX - size))
+      return nullptr;
+
+    void* p = ::malloc(size + kBlockOverhead);
+    if (ASMJIT_UNLIKELY(!p)) {
+      allocatedSize = 0;
+      return nullptr;
+    }
+
+    // Link as first in `_dynamicBlocks` double-linked list.
+    DynamicBlock* block = static_cast<DynamicBlock*>(p);
+    DynamicBlock* next = _dynamicBlocks;
+
+    if (next)
+      next->prev = block;
+
+    block->prev = nullptr;
+    block->next = next;
+    _dynamicBlocks = block;
+
+    // Align the pointer to the guaranteed alignment and store `DynamicBlock`
+    // at the beginning of the memory block, so `_releaseDynamic()` can find it.
+    p = Support::alignUp(static_cast<uint8_t*>(p) + sizeof(DynamicBlock) + sizeof(DynamicBlock*), kBlockAlignment);
+    reinterpret_cast<DynamicBlock**>(p)[-1] = block;
+
+    allocatedSize = size;
+    return p;
+  }
+}
+
+void* ZoneAllocator::_allocZeroed(size_t size, size_t& allocatedSize) noexcept {
+  ASMJIT_ASSERT(isInitialized());
+
+  void* p = _alloc(size, allocatedSize);
+  if (ASMJIT_UNLIKELY(!p)) return p;
+  return memset(p, 0, allocatedSize);
+}
+
+void ZoneAllocator::_releaseDynamic(void* p, size_t size) noexcept {
+  DebugUtils::unused(size);
+  ASMJIT_ASSERT(isInitialized());
+
+  // Pointer to `DynamicBlock` is stored at [-1].
+  DynamicBlock* block = reinterpret_cast<DynamicBlock**>(p)[-1];
+  ASMJIT_ASSERT(ZoneAllocator_hasDynamicBlock(this, block));
+
+  // Unlink and free.
+  DynamicBlock* prev = block->prev;
+  DynamicBlock* next = block->next;
+
+  if (prev)
+    prev->next = next;
+  else
+    _dynamicBlocks = next;
+
+  if (next)
+    next->prev = prev;
+
+  ::free(block);
+}
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/zone.h b/lib/lepton/asmjit/core/zone.h
new file mode 100644
index 0000000000..eaea252903
--- /dev/null
+++ b/lib/lepton/asmjit/core/zone.h
@@ -0,0 +1,615 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONE_H_INCLUDED
+#define ASMJIT_CORE_ZONE_H_INCLUDED
+
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! Zone memory.
+//!
+//! Zone is an incremental memory allocator that allocates memory by simply incrementing a pointer. It allocates
+//! blocks of memory by using C's `malloc()`, but divides these blocks into smaller segments requested by calling
+//! `Zone::alloc()` and friends.
+//!
+//! Zone has no function to release the allocated memory. It has to be released all at once by calling `reset()`.
+//! If you need a more friendly allocator that also supports `release()`, consider using `Zone` with `ZoneAllocator`.
+class Zone {
+public:
+  ASMJIT_NONCOPYABLE(Zone)
+
+  //! \cond INTERNAL
+
+  //! A single block of memory managed by `Zone`.
+  struct Block {
+    inline uint8_t* data() const noexcept {
+      return const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(this) + sizeof(*this));
+    }
+
+    //! Link to the previous block.
+    Block* prev;
+    //! Link to the next block.
+    Block* next;
+    //! Size of the block.
+    size_t size;
+  };
+
+  enum Limits : size_t {
+    kBlockSize = sizeof(Block),
+    kBlockOverhead = Globals::kAllocOverhead + kBlockSize,
+
+    kMinBlockSize = 64, // The number is ridiculously small, but still possible.
+    kMaxBlockSize = size_t(1) << (sizeof(size_t) * 8 - 4 - 1),
+    kMinAlignment = 1,
+    kMaxAlignment = 64
+  };
+
+  //! Pointer in the current block.
+  uint8_t* _ptr;
+  //! End of the current block.
+  uint8_t* _end;
+  //! Current block.
+  Block* _block;
+
+  union {
+    struct {
+      //! Default block size.
+      size_t _blockSize : Support::bitSizeOf<size_t>() - 4;
+      //! First block is temporary (ZoneTmp).
+      size_t _isTemporary : 1;
+      //! Block alignment (1 << alignment).
+      size_t _blockAlignmentShift : 3;
+    };
+    size_t _packedData;
+  };
+
+  static ASMJIT_API const Block _zeroBlock;
+
+  //! \endcond
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new Zone.
+  //!
+  //! The `blockSize` parameter describes the default size of the block. If the `size` parameter passed to `alloc()`
+  //! is greater than the default size `Zone` will allocate and use a larger block, but it will not change the
+  //! default `blockSize`.
+  //!
+  //! It's not required, but it's good practice to set `blockSize` to a reasonable value that depends on the usage
+  //! of `Zone`. Greater block sizes are generally safer and perform better than unreasonably low block sizes.
+  inline explicit Zone(size_t blockSize, size_t blockAlignment = 1) noexcept {
+    _init(blockSize, blockAlignment, nullptr);
+  }
+
+  //! Creates a new Zone with a first block pointing to a `temporary` memory.
+  inline Zone(size_t blockSize, size_t blockAlignment, const Support::Temporary& temporary) noexcept {
+    _init(blockSize, blockAlignment, &temporary);
+  }
+
+  //! \overload
+  inline Zone(size_t blockSize, size_t blockAlignment, const Support::Temporary* temporary) noexcept {
+    _init(blockSize, blockAlignment, temporary);
+  }
+
+  //! Moves an existing `Zone`.
+  //!
+  //! \note You cannot move an existing `ZoneTmp` as it uses embedded storage. Attempting to move `ZoneTmp` would
+  //! result in assertion failure in debug mode and undefined behavior in release mode.
+  inline Zone(Zone&& other) noexcept
+    : _ptr(other._ptr),
+      _end(other._end),
+      _block(other._block),
+      _packedData(other._packedData) {
+    ASMJIT_ASSERT(!other.isTemporary());
+    other._block = const_cast<Block*>(&_zeroBlock);
+    other._ptr = other._block->data();
+    other._end = other._block->data();
+  }
+
+  //! Destroys the `Zone` instance.
+  //!
+  //! This will destroy the `Zone` instance and release all blocks of memory allocated by it. It performs implicit
+  //! `reset(ResetPolicy::kHard)`.
+  inline ~Zone() noexcept { reset(ResetPolicy::kHard); }
+
+  ASMJIT_API void _init(size_t blockSize, size_t blockAlignment, const Support::Temporary* temporary) noexcept;
+
+  //! Resets the `Zone` invalidating all blocks allocated.
+  //!
+  //! See `Globals::ResetPolicy` for more details.
+  ASMJIT_API void reset(ResetPolicy resetPolicy = ResetPolicy::kSoft) noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether this `Zone` is actually a `ZoneTmp` that uses temporary memory.
+  inline bool isTemporary() const noexcept { return _isTemporary != 0; }
+
+  //! Returns the default block size.
+  inline size_t blockSize() const noexcept { return _blockSize; }
+  //! Returns the default block alignment.
+  inline size_t blockAlignment() const noexcept { return size_t(1) << _blockAlignmentShift; }
+  //! Returns remaining size of the current block.
+  inline size_t remainingSize() const noexcept { return (size_t)(_end - _ptr); }
+
+  //! Returns the current zone cursor (dangerous).
+  //!
+  //! This is a function that can be used to get exclusive access to the current block's memory buffer.
+  template<typename T = uint8_t>
+  inline T* ptr() noexcept { return reinterpret_cast<T*>(_ptr); }
+
+  //! Returns the end of the current zone block, only useful if you use `ptr()`.
+  template<typename T = uint8_t>
+  inline T* end() noexcept { return reinterpret_cast<T*>(_end); }
+
+  //! Sets the current zone pointer to `ptr` (must be within the current block).
+  template<typename T>
+  inline void setPtr(T* ptr) noexcept {
+    uint8_t* p = reinterpret_cast<uint8_t*>(ptr);
+    ASMJIT_ASSERT(p >= _ptr && p <= _end);
+    _ptr = p;
+  }
+
+  //! Sets the end zone pointer to `end` (must be within the current block).
+  template<typename T>
+  inline void setEnd(T* end) noexcept {
+    uint8_t* p = reinterpret_cast<uint8_t*>(end);
+    ASMJIT_ASSERT(p >= _ptr && p <= _end);
+    _end = p;
+  }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(Zone& other) noexcept {
+    // This could lead to a disaster.
+    ASMJIT_ASSERT(!this->isTemporary());
+    ASMJIT_ASSERT(!other.isTemporary());
+
+    std::swap(_ptr, other._ptr);
+    std::swap(_end, other._end);
+    std::swap(_block, other._block);
+    std::swap(_packedData, other._packedData);
+  }
+
+  //! Aligns the current pointer to `alignment`.
+  inline void align(size_t alignment) noexcept {
+    _ptr = Support::min(Support::alignUp(_ptr, alignment), _end);
+  }
+
+  //! Ensures the remaining size is at least equal or greater than `size`.
+  //!
+  //! \note This function doesn't respect any alignment. If you need to ensure there is enough room for an aligned
+  //! allocation you need to call `align()` before calling `ensure()`.
+  inline Error ensure(size_t size) noexcept {
+    if (size <= remainingSize())
+      return kErrorOk;
+    else
+      return _alloc(0, 1) ? kErrorOk : DebugUtils::errored(kErrorOutOfMemory);
+  }
+
+  inline void _assignBlock(Block* block) noexcept {
+    size_t alignment = blockAlignment();
+    _ptr = Support::alignUp(block->data(), alignment);
+    _end = Support::alignDown(block->data() + block->size, alignment);
+    _block = block;
+  }
+
+  inline void _assignZeroBlock() noexcept {
+    Block* block = const_cast<Block*>(&_zeroBlock);
+    _ptr = block->data();
+    _end = block->data();
+    _block = block;
+  }
+
+  //! \}
+
+  //! \name Allocation
+  //! \{
+
+  //! Allocates the requested memory specified by `size`.
+  //!
+  //! Pointer returned is valid until the `Zone` instance is destroyed or reset by calling `reset()`. If you plan to
+  //! make an instance of C++ from the given pointer use placement `new` and `delete` operators:
+  //!
+  //! ```
+  //! using namespace asmjit;
+  //!
+  //! class Object { ... };
+  //!
+  //! // Create Zone with default block size of approximately 65536 bytes.
+  //! Zone zone(65536 - Zone::kBlockOverhead);
+  //!
+  //! // Create your objects using zone object allocating, for example:
+  //! Object* obj = static_cast<Object*>( zone.alloc(sizeof(Object)) );
+  //!
+  //! if (!obj) {
+  //!   // Handle out of memory error.
+  //! }
+  //!
+  //! // Placement `new` and `delete` operators can be used to instantiate it.
+  //! new(obj) Object();
+  //!
+  //! // ... lifetime of your objects ...
+  //!
+  //! // To destroy the instance (if required).
+  //! obj->~Object();
+  //!
+  //! // Reset or destroy `Zone`.
+  //! zone.reset();
+  //! ```
+  inline void* alloc(size_t size) noexcept {
+    if (ASMJIT_UNLIKELY(size > remainingSize()))
+      return _alloc(size, 1);
+
+    uint8_t* ptr = _ptr;
+    _ptr += size;
+    return static_cast<void*>(ptr);
+  }
+
+  //! Allocates the requested memory specified by `size` and `alignment`.
+  inline void* alloc(size_t size, size_t alignment) noexcept {
+    ASMJIT_ASSERT(Support::isPowerOf2(alignment));
+    uint8_t* ptr = Support::alignUp(_ptr, alignment);
+
+    if (ptr >= _end || size > (size_t)(_end - ptr))
+      return _alloc(size, alignment);
+
+    _ptr = ptr + size;
+    return static_cast<void*>(ptr);
+  }
+
+  //! Allocates the requested memory specified by `size` without doing any checks.
+  //!
+  //! Can only be called if `remainingSize()` returns size at least equal to `size`.
+  inline void* allocNoCheck(size_t size) noexcept {
+    ASMJIT_ASSERT(remainingSize() >= size);
+
+    uint8_t* ptr = _ptr;
+    _ptr += size;
+    return static_cast<void*>(ptr);
+  }
+
+  //! Allocates the requested memory specified by `size` and `alignment` without doing any checks.
+  //!
+  //! Performs the same operation as `Zone::allocNoCheck(size)` with `alignment` applied.
+  inline void* allocNoCheck(size_t size, size_t alignment) noexcept {
+    ASMJIT_ASSERT(Support::isPowerOf2(alignment));
+
+    uint8_t* ptr = Support::alignUp(_ptr, alignment);
+    ASMJIT_ASSERT(size <= (size_t)(_end - ptr));
+
+    _ptr = ptr + size;
+    return static_cast<void*>(ptr);
+  }
+
+  //! Allocates `size` bytes of zeroed memory. See `alloc()` for more details.
+  ASMJIT_API void* allocZeroed(size_t size, size_t alignment = 1) noexcept;
+
+  //! Like `alloc()`, but the return pointer is casted to `T*`.
+  template<typename T>
+  inline T* allocT(size_t size = sizeof(T), size_t alignment = alignof(T)) noexcept {
+    return static_cast<T*>(alloc(size, alignment));
+  }
+
+  //! Like `allocNoCheck()`, but the return pointer is casted to `T*`.
+  template<typename T>
+  inline T* allocNoCheckT(size_t size = sizeof(T), size_t alignment = alignof(T)) noexcept {
+    return static_cast<T*>(allocNoCheck(size, alignment));
+  }
+
+  //! Like `allocZeroed()`, but the return pointer is casted to `T*`.
+  template<typename T>
+  inline T* allocZeroedT(size_t size = sizeof(T), size_t alignment = alignof(T)) noexcept {
+    return static_cast<T*>(allocZeroed(size, alignment));
+  }
+
+  //! Like `new(std::nothrow) T(...)`, but allocated by `Zone`.
+  template<typename T>
+  inline T* newT() noexcept {
+    void* p = alloc(sizeof(T), alignof(T));
+    if (ASMJIT_UNLIKELY(!p))
+      return nullptr;
+    return new(p) T();
+  }
+
+  //! Like `new(std::nothrow) T(...)`, but allocated by `Zone`.
+  template<typename T, typename... Args>
+  inline T* newT(Args&&... args) noexcept {
+    void* p = alloc(sizeof(T), alignof(T));
+    if (ASMJIT_UNLIKELY(!p))
+      return nullptr;
+    return new(p) T(std::forward<Args>(args)...);
+  }
+
+  //! \cond INTERNAL
+  //!
+  //! Internal alloc function used by other inlines.
+  ASMJIT_API void* _alloc(size_t size, size_t alignment) noexcept;
+  //! \endcond
+
+  //! Helper to duplicate data.
+  ASMJIT_API void* dup(const void* data, size_t size, bool nullTerminate = false) noexcept;
+
+  //! Helper to duplicate data.
+  inline void* dupAligned(const void* data, size_t size, size_t alignment, bool nullTerminate = false) noexcept {
+    align(alignment);
+    return dup(data, size, nullTerminate);
+  }
+
+  //! Helper to duplicate a formatted string, maximum size is 256 bytes.
+  ASMJIT_API char* sformat(const char* str, ...) noexcept;
+
+  //! \}
+};
+
+//! \ref Zone with `N` bytes of a static storage, used for the initial block.
+//!
+//! Temporary zones are used in cases where it's known that some memory will be required, but in many cases it won't
+//! exceed N bytes, so the whole operation can be performed without a dynamic memory allocation.
+template<size_t N>
+class ZoneTmp : public Zone {
+public:
+  ASMJIT_NONCOPYABLE(ZoneTmp)
+
+  //! Temporary storage, embedded after \ref Zone.
+  struct Storage {
+    char data[N];
+  } _storage;
+
+  //! Creates a temporary zone. Dynamic block size is specified by `blockSize`.
+  inline explicit ZoneTmp(size_t blockSize, size_t blockAlignment = 1) noexcept
+    : Zone(blockSize, blockAlignment, Support::Temporary(_storage.data, N)) {}
+};
+
+//! Zone-based memory allocator that uses an existing `Zone` and provides a `release()` functionality on top of it.
+//! It uses `Zone` only for chunks that can be pooled, and uses libc `malloc()` for chunks that are large.
+//!
+//! The advantage of ZoneAllocator is that it can allocate small chunks of memory really fast, and these chunks,
+//! when released, will be reused by consecutive calls to `alloc()`. Also, since ZoneAllocator uses `Zone`, you can
+//! turn any `Zone` into a `ZoneAllocator`, and use it in your `Pass` when necessary.
+//!
+//! ZoneAllocator is used by AsmJit containers to make containers having only few elements fast (and lightweight)
+//! and to allow them to grow and use dynamic blocks when require more storage.
+class ZoneAllocator {
+public:
+  ASMJIT_NONCOPYABLE(ZoneAllocator)
+
+  //! \cond INTERNAL
+
+  // In short, we pool chunks of these sizes:
+  //   [32, 64, 96, 128, 192, 256, 320, 384, 448, 512]
+
+  enum : uint32_t {
+    //! How many bytes per a low granularity pool (has to be at least 16).
+    kLoGranularity = 32,
+    //! Number of slots of a low granularity pool.
+    kLoCount = 4,
+    //! Maximum size of a block that can be allocated in a low granularity pool.
+    kLoMaxSize = kLoGranularity * kLoCount,
+
+    //! How many bytes per a high granularity pool.
+    kHiGranularity = 64,
+    //! Number of slots of a high granularity pool.
+    kHiCount = 6,
+    //! Maximum size of a block that can be allocated in a high granularity pool.
+    kHiMaxSize = kLoMaxSize + kHiGranularity * kHiCount,
+
+    //! Alignment of every pointer returned by `alloc()`.
+    kBlockAlignment = kLoGranularity
+  };
+
+  //! Single-linked list used to store unused chunks.
+  struct Slot {
+    //! Link to a next slot in a single-linked list.
+    Slot* next;
+  };
+
+  //! A block of memory that has been allocated dynamically and is not part of block-list used by the allocator.
+  //! This is used to keep track of all these blocks so they can be freed by `reset()` if not freed explicitly.
+  struct DynamicBlock {
+    DynamicBlock* prev;
+    DynamicBlock* next;
+  };
+
+  //! \endcond
+
+  //! \name Members
+  //! \{
+
+  //! Zone used to allocate memory that fits into slots.
+  Zone* _zone;
+  //! Indexed slots containing released memory.
+  Slot* _slots[kLoCount + kHiCount];
+  //! Dynamic blocks for larger allocations (no slots).
+  DynamicBlock* _dynamicBlocks;
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new `ZoneAllocator`.
+  //!
+  //! \note To use it, you must first `init()` it.
+  inline ZoneAllocator() noexcept {
+    memset(this, 0, sizeof(*this));
+  }
+
+  //! Creates a new `ZoneAllocator` initialized to use `zone`.
+  inline explicit ZoneAllocator(Zone* zone) noexcept {
+    memset(this, 0, sizeof(*this));
+    _zone = zone;
+  }
+
+  //! Destroys the `ZoneAllocator`.
+  inline ~ZoneAllocator() noexcept { reset(); }
+
+  //! Tests whether the `ZoneAllocator` is initialized (i.e. has `Zone`).
+  inline bool isInitialized() const noexcept { return _zone != nullptr; }
+
+  //! Convenience function to initialize the `ZoneAllocator` with `zone`.
+  //!
+  //! It's the same as calling `reset(zone)`.
+  inline void init(Zone* zone) noexcept { reset(zone); }
+
+  //! Resets this `ZoneAllocator` and also forget about the current `Zone` which is attached (if any). Reset
+  //! optionally attaches a new `zone` passed, or keeps the `ZoneAllocator` in an uninitialized state, if
+  //! `zone` is null.
+  ASMJIT_API void reset(Zone* zone = nullptr) noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the assigned `Zone` of this allocator or null if this `ZoneAllocator` is not initialized.
+  inline Zone* zone() const noexcept { return _zone; }
+
+  //! \}
+
+  //! \cond
+  //! \name Internals
+  //! \{
+
+  //! Returns the slot index to be used for `size`. Returns `true` if a valid slot has been written to `slot` and
+  //! `allocatedSize` has been filled with slot exact size (`allocatedSize` can be equal or slightly greater than
+  //! `size`).
+  static inline bool _getSlotIndex(size_t size, uint32_t& slot) noexcept {
+    ASMJIT_ASSERT(size > 0);
+    if (size > kHiMaxSize)
+      return false;
+
+    if (size <= kLoMaxSize)
+      slot = uint32_t((size - 1) / kLoGranularity);
+    else
+      slot = uint32_t((size - kLoMaxSize - 1) / kHiGranularity) + kLoCount;
+
+    return true;
+  }
+
+  //! \overload
+  static inline bool _getSlotIndex(size_t size, uint32_t& slot, size_t& allocatedSize) noexcept {
+    ASMJIT_ASSERT(size > 0);
+    if (size > kHiMaxSize)
+      return false;
+
+    if (size <= kLoMaxSize) {
+      slot = uint32_t((size - 1) / kLoGranularity);
+      allocatedSize = Support::alignUp(size, kLoGranularity);
+    }
+    else {
+      slot = uint32_t((size - kLoMaxSize - 1) / kHiGranularity) + kLoCount;
+      allocatedSize = Support::alignUp(size, kHiGranularity);
+    }
+
+    return true;
+  }
+
+  //! \}
+  //! \endcond
+
+  //! \name Allocation
+  //! \{
+
+  //! \cond INTERNAL
+  ASMJIT_API void* _alloc(size_t size, size_t& allocatedSize) noexcept;
+  ASMJIT_API void* _allocZeroed(size_t size, size_t& allocatedSize) noexcept;
+  ASMJIT_API void _releaseDynamic(void* p, size_t size) noexcept;
+  //! \endcond
+
+  //! Allocates `size` bytes of memory, ideally from an available pool.
+  //!
+  //! \note `size` can't be zero, it will assert in debug mode in such case.
+  inline void* alloc(size_t size) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    size_t allocatedSize;
+    return _alloc(size, allocatedSize);
+  }
+
+  //! Like `alloc(size)`, but provides a second argument `allocatedSize` that provides a way to know how big
+  //! the block returned actually is. This is useful for containers to prevent growing too early.
+  inline void* alloc(size_t size, size_t& allocatedSize) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    return _alloc(size, allocatedSize);
+  }
+
+  //! Like `alloc()`, but the return pointer is casted to `T*`.
+  template<typename T>
+  inline T* allocT(size_t size = sizeof(T)) noexcept {
+    return static_cast<T*>(alloc(size));
+  }
+
+  //! Like `alloc(size)`, but returns zeroed memory.
+  inline void* allocZeroed(size_t size) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    size_t allocatedSize;
+    return _allocZeroed(size, allocatedSize);
+  }
+
+  //! Like `alloc(size, allocatedSize)`, but returns zeroed memory.
+  inline void* allocZeroed(size_t size, size_t& allocatedSize) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    return _allocZeroed(size, allocatedSize);
+  }
+
+  //! Like `allocZeroed()`, but the return pointer is casted to `T*`.
+  template<typename T>
+  inline T* allocZeroedT(size_t size = sizeof(T)) noexcept {
+    return static_cast<T*>(allocZeroed(size));
+  }
+
+  //! Like `new(std::nothrow) T(...)`, but allocated by `Zone`.
+  template<typename T>
+  inline T* newT() noexcept {
+    void* p = allocT<T>();
+    if (ASMJIT_UNLIKELY(!p))
+      return nullptr;
+    return new(p) T();
+  }
+  //! Like `new(std::nothrow) T(...)`, but allocated by `Zone`.
+  template<typename T, typename... Args>
+  inline T* newT(Args&&... args) noexcept {
+    void* p = allocT<T>();
+    if (ASMJIT_UNLIKELY(!p))
+      return nullptr;
+    return new(p) T(std::forward<Args>(args)...);
+  }
+
+  //! Releases the memory previously allocated by `alloc()`. The `size` argument has to be the same as used to call
+  //! `alloc()` or `allocatedSize` returned  by `alloc()`.
+  inline void release(void* p, size_t size) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    ASMJIT_ASSERT(p != nullptr);
+    ASMJIT_ASSERT(size != 0);
+
+    uint32_t slot;
+    if (_getSlotIndex(size, slot)) {
+      static_cast<Slot*>(p)->next = static_cast<Slot*>(_slots[slot]);
+      _slots[slot] = static_cast<Slot*>(p);
+    }
+    else {
+      _releaseDynamic(p, size);
+    }
+  }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONE_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zonehash.cpp b/lib/lepton/asmjit/core/zonehash.cpp
new file mode 100644
index 0000000000..3778fbe226
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonehash.cpp
@@ -0,0 +1,309 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/support.h"
+#include "../core/zone.h"
+#include "../core/zonehash.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// ZoneHashBase - Prime Numbers
+// ============================
+
+#define ASMJIT_POPULATE_PRIMES(ENTRY) \
+  ENTRY(2         , 0x80000000, 32), /* [N * 0x80000000 >> 32] (rcp=2147483648) */ \
+  ENTRY(11        , 0xBA2E8BA3, 35), /* [N * 0xBA2E8BA3 >> 35] (rcp=3123612579) */ \
+  ENTRY(29        , 0x8D3DCB09, 36), /* [N * 0x8D3DCB09 >> 36] (rcp=2369637129) */ \
+  ENTRY(41        , 0xC7CE0C7D, 37), /* [N * 0xC7CE0C7D >> 37] (rcp=3352169597) */ \
+  ENTRY(59        , 0x8AD8F2FC, 37), /* [N * 0x8AD8F2FC >> 37] (rcp=2329473788) */ \
+  ENTRY(83        , 0xC565C87C, 38), /* [N * 0xC565C87C >> 38] (rcp=3311782012) */ \
+  ENTRY(131       , 0xFA232CF3, 39), /* [N * 0xFA232CF3 >> 39] (rcp=4196609267) */ \
+  ENTRY(191       , 0xAB8F69E3, 39), /* [N * 0xAB8F69E3 >> 39] (rcp=2878302691) */ \
+  ENTRY(269       , 0xF3A0D52D, 40), /* [N * 0xF3A0D52D >> 40] (rcp=4087403821) */ \
+  ENTRY(383       , 0xAB1CBDD4, 40), /* [N * 0xAB1CBDD4 >> 40] (rcp=2870787540) */ \
+  ENTRY(541       , 0xF246FACC, 41), /* [N * 0xF246FACC >> 41] (rcp=4064737996) */ \
+  ENTRY(757       , 0xAD2589A4, 41), /* [N * 0xAD2589A4 >> 41] (rcp=2904918436) */ \
+  ENTRY(1061      , 0xF7129426, 42), /* [N * 0xF7129426 >> 42] (rcp=4145189926) */ \
+  ENTRY(1499      , 0xAEE116B7, 42), /* [N * 0xAEE116B7 >> 42] (rcp=2933986999) */ \
+  ENTRY(2099      , 0xF9C7A737, 43), /* [N * 0xF9C7A737 >> 43] (rcp=4190611255) */ \
+  ENTRY(2939      , 0xB263D25C, 43), /* [N * 0xB263D25C >> 43] (rcp=2992886364) */ \
+  ENTRY(4111      , 0xFF10E02E, 44), /* [N * 0xFF10E02E >> 44] (rcp=4279296046) */ \
+  ENTRY(5779      , 0xB5722823, 44), /* [N * 0xB5722823 >> 44] (rcp=3044157475) */ \
+  ENTRY(8087      , 0x81A97405, 44), /* [N * 0x81A97405 >> 44] (rcp=2175366149) */ \
+  ENTRY(11321     , 0xB93E91DB, 45), /* [N * 0xB93E91DB >> 45] (rcp=3107885531) */ \
+  ENTRY(15859     , 0x843CC26B, 45), /* [N * 0x843CC26B >> 45] (rcp=2218574443) */ \
+  ENTRY(22189     , 0xBD06B9EA, 46), /* [N * 0xBD06B9EA >> 46] (rcp=3171334634) */ \
+  ENTRY(31051     , 0x8713F186, 46), /* [N * 0x8713F186 >> 46] (rcp=2266231174) */ \
+  ENTRY(43451     , 0xC10F1CB9, 47), /* [N * 0xC10F1CB9 >> 47] (rcp=3238993081) */ \
+  ENTRY(60869     , 0x89D06A86, 47), /* [N * 0x89D06A86 >> 47] (rcp=2312137350) */ \
+  ENTRY(85159     , 0xC502AF3B, 48), /* [N * 0xC502AF3B >> 48] (rcp=3305287483) */ \
+  ENTRY(102107    , 0xA44F65AE, 48), /* [N * 0xA44F65AE >> 48] (rcp=2756666798) */ \
+  ENTRY(122449    , 0x89038F77, 48), /* [N * 0x89038F77 >> 48] (rcp=2298711927) */ \
+  ENTRY(146819    , 0xE48AF7E9, 49), /* [N * 0xE48AF7E9 >> 49] (rcp=3834312681) */ \
+  ENTRY(176041    , 0xBE9B145B, 49), /* [N * 0xBE9B145B >> 49] (rcp=3197834331) */ \
+  ENTRY(211073    , 0x9EF882BA, 49), /* [N * 0x9EF882BA >> 49] (rcp=2667086522) */ \
+  ENTRY(253081    , 0x849571AB, 49), /* [N * 0x849571AB >> 49] (rcp=2224386475) */ \
+  ENTRY(303469    , 0xDD239C97, 50), /* [N * 0xDD239C97 >> 50] (rcp=3710098583) */ \
+  ENTRY(363887    , 0xB86C196D, 50), /* [N * 0xB86C196D >> 50] (rcp=3094092141) */ \
+  ENTRY(436307    , 0x99CFA4E9, 50), /* [N * 0x99CFA4E9 >> 50] (rcp=2580522217) */ \
+  ENTRY(523177    , 0x804595C0, 50), /* [N * 0x804595C0 >> 50] (rcp=2152043968) */ \
+  ENTRY(627293    , 0xD5F69FCF, 51), /* [N * 0xD5F69FCF >> 51] (rcp=3589709775) */ \
+  ENTRY(752177    , 0xB27063BA, 51), /* [N * 0xB27063BA >> 51] (rcp=2993710010) */ \
+  ENTRY(901891    , 0x94D170AC, 51), /* [N * 0x94D170AC >> 51] (rcp=2496753836) */ \
+  ENTRY(1081369   , 0xF83C9767, 52), /* [N * 0xF83C9767 >> 52] (rcp=4164720487) */ \
+  ENTRY(1296563   , 0xCF09435D, 52), /* [N * 0xCF09435D >> 52] (rcp=3473490781) */ \
+  ENTRY(1554583   , 0xACAC7198, 52), /* [N * 0xACAC7198 >> 52] (rcp=2896982424) */ \
+  ENTRY(1863971   , 0x90033EE3, 52), /* [N * 0x90033EE3 >> 52] (rcp=2416131811) */ \
+  ENTRY(2234923   , 0xF0380EBD, 53), /* [N * 0xF0380EBD >> 53] (rcp=4030205629) */ \
+  ENTRY(2679673   , 0xC859731E, 53), /* [N * 0xC859731E >> 53] (rcp=3361305374) */ \
+  ENTRY(3212927   , 0xA718DE27, 53), /* [N * 0xA718DE27 >> 53] (rcp=2803424807) */ \
+  ENTRY(3852301   , 0x8B5D1B4B, 53), /* [N * 0x8B5D1B4B >> 53] (rcp=2338134859) */ \
+  ENTRY(4618921   , 0xE8774804, 54), /* [N * 0xE8774804 >> 54] (rcp=3900131332) */ \
+  ENTRY(5076199   , 0xD386574E, 54), /* [N * 0xD386574E >> 54] (rcp=3548796750) */ \
+  ENTRY(5578757   , 0xC0783FE1, 54), /* [N * 0xC0783FE1 >> 54] (rcp=3229106145) */ \
+  ENTRY(6131057   , 0xAF21B08F, 54), /* [N * 0xAF21B08F >> 54] (rcp=2938220687) */ \
+  ENTRY(6738031   , 0x9F5AFD6E, 54), /* [N * 0x9F5AFD6E >> 54] (rcp=2673540462) */ \
+  ENTRY(7405163   , 0x90FFC3B9, 54), /* [N * 0x90FFC3B9 >> 54] (rcp=2432680889) */ \
+  ENTRY(8138279   , 0x83EFECFC, 54), /* [N * 0x83EFECFC >> 54] (rcp=2213539068) */ \
+  ENTRY(8943971   , 0xF01AA2EF, 55), /* [N * 0xF01AA2EF >> 55] (rcp=4028277487) */ \
+  ENTRY(9829447   , 0xDA7979B2, 55), /* [N * 0xDA7979B2 >> 55] (rcp=3665394098) */ \
+  ENTRY(10802581  , 0xC6CB2771, 55), /* [N * 0xC6CB2771 >> 55] (rcp=3335202673) */ \
+  ENTRY(11872037  , 0xB4E2C7DD, 55), /* [N * 0xB4E2C7DD >> 55] (rcp=3034761181) */ \
+  ENTRY(13047407  , 0xA4974124, 55), /* [N * 0xA4974124 >> 55] (rcp=2761376036) */ \
+  ENTRY(14339107  , 0x95C39CF1, 55), /* [N * 0x95C39CF1 >> 55] (rcp=2512624881) */ \
+  ENTRY(15758737  , 0x8845C763, 55), /* [N * 0x8845C763 >> 55] (rcp=2286274403) */ \
+  ENTRY(17318867  , 0xF7FE593F, 56), /* [N * 0xF7FE593F >> 56] (rcp=4160641343) */ \
+  ENTRY(19033439  , 0xE1A75D93, 56), /* [N * 0xE1A75D93 >> 56] (rcp=3785842067) */ \
+  ENTRY(20917763  , 0xCD5389B3, 56), /* [N * 0xCD5389B3 >> 56] (rcp=3444804019) */ \
+  ENTRY(22988621  , 0xBAD4841A, 56), /* [N * 0xBAD4841A >> 56] (rcp=3134489626) */ \
+  ENTRY(25264543  , 0xA9FFF2FF, 56), /* [N * 0xA9FFF2FF >> 56] (rcp=2852123391) */ \
+  ENTRY(27765763  , 0x9AAF8BF3, 56), /* [N * 0x9AAF8BF3 >> 56] (rcp=2595195891) */ \
+  ENTRY(30514607  , 0x8CC04E18, 56), /* [N * 0x8CC04E18 >> 56] (rcp=2361413144) */ \
+  ENTRY(33535561  , 0x80127068, 56), /* [N * 0x80127068 >> 56] (rcp=2148692072) */ \
+  ENTRY(36855587  , 0xE911F0BB, 57), /* [N * 0xE911F0BB >> 57] (rcp=3910267067) */ \
+  ENTRY(38661533  , 0xDE2ED7BE, 57), /* [N * 0xDE2ED7BE >> 57] (rcp=3727611838) */ \
+  ENTRY(40555961  , 0xD3CDF2FD, 57), /* [N * 0xD3CDF2FD >> 57] (rcp=3553489661) */ \
+  ENTRY(42543269  , 0xC9E9196C, 57), /* [N * 0xC9E9196C >> 57] (rcp=3387496812) */ \
+  ENTRY(44627909  , 0xC07A9EB6, 57), /* [N * 0xC07A9EB6 >> 57] (rcp=3229261494) */ \
+  ENTRY(46814687  , 0xB77CEF65, 57), /* [N * 0xB77CEF65 >> 57] (rcp=3078418277) */ \
+  ENTRY(49108607  , 0xAEEAC65C, 57), /* [N * 0xAEEAC65C >> 57] (rcp=2934621788) */ \
+  ENTRY(51514987  , 0xA6BF0EF0, 57), /* [N * 0xA6BF0EF0 >> 57] (rcp=2797539056) */ \
+  ENTRY(54039263  , 0x9EF510B5, 57), /* [N * 0x9EF510B5 >> 57] (rcp=2666860725) */ \
+  ENTRY(56687207  , 0x97883B42, 57), /* [N * 0x97883B42 >> 57] (rcp=2542287682) */ \
+  ENTRY(59464897  , 0x907430ED, 57), /* [N * 0x907430ED >> 57] (rcp=2423533805) */ \
+  ENTRY(62378699  , 0x89B4CA91, 57), /* [N * 0x89B4CA91 >> 57] (rcp=2310326929) */ \
+  ENTRY(65435273  , 0x83461568, 57), /* [N * 0x83461568 >> 57] (rcp=2202408296) */ \
+  ENTRY(68641607  , 0xFA489AA8, 58), /* [N * 0xFA489AA8 >> 58] (rcp=4199062184) */ \
+  ENTRY(72005051  , 0xEE97B1C5, 58), /* [N * 0xEE97B1C5 >> 58] (rcp=4002918853) */ \
+  ENTRY(75533323  , 0xE3729293, 58), /* [N * 0xE3729293 >> 58] (rcp=3815936659) */ \
+  ENTRY(79234469  , 0xD8D2BBA3, 58), /* [N * 0xD8D2BBA3 >> 58] (rcp=3637689251) */ \
+  ENTRY(83116967  , 0xCEB1F196, 58), /* [N * 0xCEB1F196 >> 58] (rcp=3467768214) */ \
+  ENTRY(87189709  , 0xC50A4426, 58), /* [N * 0xC50A4426 >> 58] (rcp=3305784358) */ \
+  ENTRY(91462061  , 0xBBD6052B, 58), /* [N * 0xBBD6052B >> 58] (rcp=3151365419) */ \
+  ENTRY(95943737  , 0xB30FD999, 58), /* [N * 0xB30FD999 >> 58] (rcp=3004160409) */ \
+  ENTRY(100644991 , 0xAAB29CED, 58), /* [N * 0xAAB29CED >> 58] (rcp=2863832301) */ \
+  ENTRY(105576619 , 0xA2B96421, 58), /* [N * 0xA2B96421 >> 58] (rcp=2730058785) */ \
+  ENTRY(110749901 , 0x9B1F8434, 58), /* [N * 0x9B1F8434 >> 58] (rcp=2602533940) */ \
+  ENTRY(116176651 , 0x93E08B4A, 58), /* [N * 0x93E08B4A >> 58] (rcp=2480966474) */ \
+  ENTRY(121869317 , 0x8CF837E0, 58), /* [N * 0x8CF837E0 >> 58] (rcp=2365077472) */ \
+  ENTRY(127840913 , 0x86627F01, 58), /* [N * 0x86627F01 >> 58] (rcp=2254601985) */ \
+  ENTRY(134105159 , 0x801B8178, 58), /* [N * 0x801B8178 >> 58] (rcp=2149286264) */ \
+  ENTRY(140676353 , 0xF43F294F, 59), /* [N * 0xF43F294F >> 59] (rcp=4097780047) */ \
+  ENTRY(147569509 , 0xE8D67089, 59), /* [N * 0xE8D67089 >> 59] (rcp=3906367625) */ \
+  ENTRY(154800449 , 0xDDF6243C, 59), /* [N * 0xDDF6243C >> 59] (rcp=3723895868) */ \
+  ENTRY(162385709 , 0xD397E6AE, 59), /* [N * 0xD397E6AE >> 59] (rcp=3549947566) */ \
+  ENTRY(170342629 , 0xC9B5A65A, 59), /* [N * 0xC9B5A65A >> 59] (rcp=3384125018) */ \
+  ENTRY(178689419 , 0xC0499865, 59), /* [N * 0xC0499865 >> 59] (rcp=3226048613) */ \
+  ENTRY(187445201 , 0xB74E35FA, 59), /* [N * 0xB74E35FA >> 59] (rcp=3075356154) */ \
+  ENTRY(196630033 , 0xAEBE3AC1, 59), /* [N * 0xAEBE3AC1 >> 59] (rcp=2931702465) */ \
+  ENTRY(206264921 , 0xA694A37F, 59), /* [N * 0xA694A37F >> 59] (rcp=2794759039) */ \
+  ENTRY(216371963 , 0x9ECCA59F, 59), /* [N * 0x9ECCA59F >> 59] (rcp=2664211871) */ \
+  ENTRY(226974197 , 0x9761B6AE, 59), /* [N * 0x9761B6AE >> 59] (rcp=2539763374) */ \
+  ENTRY(238095983 , 0x904F79A1, 59), /* [N * 0x904F79A1 >> 59] (rcp=2421127585) */ \
+  ENTRY(249762697 , 0x8991CD1F, 59), /* [N * 0x8991CD1F >> 59] (rcp=2308033823) */ \
+  ENTRY(262001071 , 0x8324BCA5, 59), /* [N * 0x8324BCA5 >> 59] (rcp=2200222885) */ \
+  ENTRY(274839137 , 0xFA090732, 60), /* [N * 0xFA090732 >> 60] (rcp=4194895666) */ \
+  ENTRY(288306269 , 0xEE5B16ED, 60), /* [N * 0xEE5B16ED >> 60] (rcp=3998947053) */ \
+  ENTRY(302433337 , 0xE338CE49, 60), /* [N * 0xE338CE49 >> 60] (rcp=3812150857) */ \
+  ENTRY(317252587 , 0xD89BABC0, 60), /* [N * 0xD89BABC0 >> 60] (rcp=3634080704) */ \
+  ENTRY(374358107 , 0xB790EF43, 60), /* [N * 0xB790EF43 >> 60] (rcp=3079728963) */ \
+  ENTRY(441742621 , 0x9B908414, 60), /* [N * 0x9B908414 >> 60] (rcp=2609939476) */ \
+  ENTRY(521256293 , 0x83D596FA, 60), /* [N * 0x83D596FA >> 60] (rcp=2211813114) */ \
+  ENTRY(615082441 , 0xDF72B16E, 61), /* [N * 0xDF72B16E >> 61] (rcp=3748835694) */ \
+  ENTRY(725797313 , 0xBD5CDB3B, 61), /* [N * 0xBD5CDB3B >> 61] (rcp=3176979259) */ \
+  ENTRY(856440829 , 0xA07A14E9, 61), /* [N * 0xA07A14E9 >> 61] (rcp=2692355305) */ \
+  ENTRY(1010600209, 0x87FF5289, 61), /* [N * 0x87FF5289 >> 61] (rcp=2281656969) */ \
+  ENTRY(1192508257, 0xE6810540, 62), /* [N * 0xE6810540 >> 62] (rcp=3867215168) */ \
+  ENTRY(1407159797, 0xC357A480, 62), /* [N * 0xC357A480 >> 62] (rcp=3277300864) */ \
+  ENTRY(1660448617, 0xA58B5B4F, 62), /* [N * 0xA58B5B4F >> 62] (rcp=2777373519) */ \
+  ENTRY(1959329399, 0x8C4AB55F, 62), /* [N * 0x8C4AB55F >> 62] (rcp=2353706335) */ \
+  ENTRY(2312008693, 0xEDC86320, 63), /* [N * 0xEDC86320 >> 63] (rcp=3989332768) */ \
+  ENTRY(2728170257, 0xC982C4D2, 63), /* [N * 0xC982C4D2 >> 63] (rcp=3380790482) */ \
+  ENTRY(3219240923, 0xAAC599B6, 63)  /* [N * 0xAAC599B6 >> 63] (rcp=2865076662) */
+
+
+struct HashPrime {
+  //! Prime number
+  uint32_t prime;
+  //! Reciprocal to turn division into multiplication.
+  uint32_t rcp;
+};
+
+static const HashPrime ZoneHash_primeArray[] = {
+  #define E(PRIME, RCP, SHIFT) { PRIME, RCP }
+  ASMJIT_POPULATE_PRIMES(E)
+  #undef E
+};
+
+static const uint8_t ZoneHash_primeShift[] = {
+  #define E(PRIME, RCP, SHIFT) uint8_t(SHIFT)
+  ASMJIT_POPULATE_PRIMES(E)
+  #undef E
+};
+
+// ZoneHashBase - Rehash
+// =====================
+
+void ZoneHashBase::_rehash(ZoneAllocator* allocator, uint32_t primeIndex) noexcept {
+  ASMJIT_ASSERT(primeIndex < ASMJIT_ARRAY_SIZE(ZoneHash_primeArray));
+  uint32_t newCount = ZoneHash_primeArray[primeIndex].prime;
+
+  ZoneHashNode** oldData = _data;
+  ZoneHashNode** newData = reinterpret_cast<ZoneHashNode**>(
+    allocator->allocZeroed(size_t(newCount) * sizeof(ZoneHashNode*)));
+
+  // We can still store nodes into the table, but it will degrade.
+  if (ASMJIT_UNLIKELY(newData == nullptr))
+    return;
+
+  uint32_t i;
+  uint32_t oldCount = _bucketsCount;
+
+  _data = newData;
+  _bucketsCount = newCount;
+  _bucketsGrow = uint32_t(newCount * 0.9);
+  _rcpValue = ZoneHash_primeArray[primeIndex].rcp;
+  _rcpShift = ZoneHash_primeShift[primeIndex];
+  _primeIndex = uint8_t(primeIndex);
+
+  for (i = 0; i < oldCount; i++) {
+    ZoneHashNode* node = oldData[i];
+    while (node) {
+      ZoneHashNode* next = node->_hashNext;
+      uint32_t hashMod = _calcMod(node->_hashCode);
+
+      node->_hashNext = newData[hashMod];
+      newData[hashMod] = node;
+      node = next;
+    }
+  }
+
+  if (oldData != _embedded)
+    allocator->release(oldData, oldCount * sizeof(ZoneHashNode*));
+}
+
+// ZoneHashBase - Operations
+// =========================
+
+ZoneHashNode* ZoneHashBase::_insert(ZoneAllocator* allocator, ZoneHashNode* node) noexcept {
+  uint32_t hashMod = _calcMod(node->_hashCode);
+  ZoneHashNode* next = _data[hashMod];
+
+  node->_hashNext = next;
+  _data[hashMod] = node;
+
+  if (++_size > _bucketsGrow) {
+    uint32_t primeIndex = Support::min<uint32_t>(_primeIndex + 2, ASMJIT_ARRAY_SIZE(ZoneHash_primeArray) - 1);
+    if (primeIndex > _primeIndex)
+      _rehash(allocator, primeIndex);
+  }
+
+  return node;
+}
+
+ZoneHashNode* ZoneHashBase::_remove(ZoneAllocator* allocator, ZoneHashNode* node) noexcept {
+  DebugUtils::unused(allocator);
+  uint32_t hashMod = _calcMod(node->_hashCode);
+
+  ZoneHashNode** pPrev = &_data[hashMod];
+  ZoneHashNode* p = *pPrev;
+
+  while (p) {
+    if (p == node) {
+      *pPrev = p->_hashNext;
+      _size--;
+      return node;
+    }
+
+    pPrev = &p->_hashNext;
+    p = *pPrev;
+  }
+
+  return nullptr;
+}
+
+// ZoneHashBase - Tests
+// ====================
+
+#if defined(ASMJIT_TEST)
+struct MyHashNode : public ZoneHashNode {
+  inline MyHashNode(uint32_t key) noexcept
+    : ZoneHashNode(key),
+      _key(key) {}
+
+  uint32_t _key;
+};
+
+struct MyKeyMatcher {
+  inline MyKeyMatcher(uint32_t key) noexcept
+    : _key(key) {}
+
+  inline uint32_t hashCode() const noexcept { return _key; }
+  inline bool matches(const MyHashNode* node) const noexcept { return node->_key == _key; }
+
+  uint32_t _key;
+};
+
+UNIT(zone_hash) {
+  uint32_t kCount = BrokenAPI::hasArg("--quick") ? 1000 : 10000;
+
+  Zone zone(4096);
+  ZoneAllocator allocator(&zone);
+
+  ZoneHash<MyHashNode> hashTable;
+
+  uint32_t key;
+  INFO("Inserting %u elements to HashTable", unsigned(kCount));
+  for (key = 0; key < kCount; key++) {
+    hashTable.insert(&allocator, zone.newT<MyHashNode>(key));
+  }
+
+  uint32_t count = kCount;
+  INFO("Removing %u elements from HashTable and validating each operation", unsigned(kCount));
+  do {
+    MyHashNode* node;
+
+    for (key = 0; key < count; key++) {
+      node = hashTable.get(MyKeyMatcher(key));
+      EXPECT(node != nullptr);
+      EXPECT(node->_key == key);
+    }
+
+    {
+      count--;
+      node = hashTable.get(MyKeyMatcher(count));
+      hashTable.remove(&allocator, node);
+
+      node = hashTable.get(MyKeyMatcher(count));
+      EXPECT(node == nullptr);
+    }
+  } while (count);
+
+  EXPECT(hashTable.empty());
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/zonehash.h b/lib/lepton/asmjit/core/zonehash.h
new file mode 100644
index 0000000000..f332290b54
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonehash.h
@@ -0,0 +1,186 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONEHASH_H_INCLUDED
+#define ASMJIT_CORE_ZONEHASH_H_INCLUDED
+
+#include "../core/zone.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! Node used by \ref ZoneHash template.
+//!
+//! You must provide function `bool eq(const Key& key)` in order to make `ZoneHash::get()` working.
+class ZoneHashNode {
+public:
+  ASMJIT_NONCOPYABLE(ZoneHashNode)
+
+  inline ZoneHashNode(uint32_t hashCode = 0) noexcept
+    : _hashNext(nullptr),
+      _hashCode(hashCode),
+      _customData(0) {}
+
+  //! Next node in the chain, null if it terminates the chain.
+  ZoneHashNode* _hashNext;
+  //! Precalculated hash-code of key.
+  uint32_t _hashCode;
+  //! Padding, can be reused by any Node that inherits `ZoneHashNode`.
+  uint32_t _customData;
+};
+
+//! Base class used by \ref ZoneHash template
+class ZoneHashBase {
+public:
+  ASMJIT_NONCOPYABLE(ZoneHashBase)
+
+  //! Buckets data.
+  ZoneHashNode** _data;
+  //! Count of records inserted into the hash table.
+  size_t _size;
+  //! Count of hash buckets.
+  uint32_t _bucketsCount;
+  //! When buckets array should grow (only checked after insertion).
+  uint32_t _bucketsGrow;
+  //! Reciprocal value of `_bucketsCount`.
+  uint32_t _rcpValue;
+  //! How many bits to shift right when hash is multiplied with `_rcpValue`.
+  uint8_t _rcpShift;
+  //! Prime value index in internal prime array.
+  uint8_t _primeIndex;
+
+  //! Embedded data, used by empty hash tables.
+  ZoneHashNode* _embedded[1];
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneHashBase() noexcept {
+    reset();
+  }
+
+  inline ZoneHashBase(ZoneHashBase&& other) noexcept {
+    _data = other._data;
+    _size = other._size;
+    _bucketsCount = other._bucketsCount;
+    _bucketsGrow = other._bucketsGrow;
+    _rcpValue = other._rcpValue;
+    _rcpShift = other._rcpShift;
+    _primeIndex = other._primeIndex;
+    _embedded[0] = other._embedded[0];
+
+    if (_data == other._embedded) _data = _embedded;
+  }
+
+  inline void reset() noexcept {
+    _data = _embedded;
+    _size = 0;
+    _bucketsCount = 1;
+    _bucketsGrow = 1;
+    _rcpValue = 1;
+    _rcpShift = 0;
+    _primeIndex = 0;
+    _embedded[0] = nullptr;
+  }
+
+  inline void release(ZoneAllocator* allocator) noexcept {
+    ZoneHashNode** oldData = _data;
+    if (oldData != _embedded)
+      allocator->release(oldData, _bucketsCount * sizeof(ZoneHashNode*));
+    reset();
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool empty() const noexcept { return _size == 0; }
+  inline size_t size() const noexcept { return _size; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void _swap(ZoneHashBase& other) noexcept {
+    std::swap(_data, other._data);
+    std::swap(_size, other._size);
+    std::swap(_bucketsCount, other._bucketsCount);
+    std::swap(_bucketsGrow, other._bucketsGrow);
+    std::swap(_rcpValue, other._rcpValue);
+    std::swap(_rcpShift, other._rcpShift);
+    std::swap(_primeIndex, other._primeIndex);
+    std::swap(_embedded[0], other._embedded[0]);
+
+    if (_data == other._embedded) _data = _embedded;
+    if (other._data == _embedded) other._data = other._embedded;
+  }
+
+  //! \cond INTERNAL
+  inline uint32_t _calcMod(uint32_t hash) const noexcept {
+    uint32_t x = uint32_t((uint64_t(hash) * _rcpValue) >> _rcpShift);
+    return hash - x * _bucketsCount;
+  }
+
+  ASMJIT_API void _rehash(ZoneAllocator* allocator, uint32_t newCount) noexcept;
+  ASMJIT_API ZoneHashNode* _insert(ZoneAllocator* allocator, ZoneHashNode* node) noexcept;
+  ASMJIT_API ZoneHashNode* _remove(ZoneAllocator* allocator, ZoneHashNode* node) noexcept;
+  //! \endcond
+
+  //! \}
+};
+
+//! Low-level hash table specialized for storing string keys and POD values.
+//!
+//! This hash table allows duplicates to be inserted (the API is so low level that it's up to you if you allow it or
+//! not, as you should first `get()` the node and then modify it or insert a new node by using `insert()`, depending
+//! on the intention).
+template<typename NodeT>
+class ZoneHash : public ZoneHashBase {
+public:
+  ASMJIT_NONCOPYABLE(ZoneHash)
+
+  typedef NodeT Node;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneHash() noexcept
+    : ZoneHashBase() {}
+
+  inline ZoneHash(ZoneHash&& other) noexcept
+    : ZoneHash(other) {}
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(ZoneHash& other) noexcept { ZoneHashBase::_swap(other); }
+
+  template<typename KeyT>
+  inline NodeT* get(const KeyT& key) const noexcept {
+    uint32_t hashMod = _calcMod(key.hashCode());
+    NodeT* node = static_cast<NodeT*>(_data[hashMod]);
+
+    while (node && !key.matches(node))
+      node = static_cast<NodeT*>(node->_hashNext);
+    return node;
+  }
+
+  inline NodeT* insert(ZoneAllocator* allocator, NodeT* node) noexcept { return static_cast<NodeT*>(_insert(allocator, node)); }
+  inline NodeT* remove(ZoneAllocator* allocator, NodeT* node) noexcept { return static_cast<NodeT*>(_remove(allocator, node)); }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONEHASH_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zonelist.cpp b/lib/lepton/asmjit/core/zonelist.cpp
new file mode 100644
index 0000000000..d4b311d430
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonelist.cpp
@@ -0,0 +1,163 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/zone.h"
+#include "../core/zonelist.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// ZoneList - Tests
+// ================
+
+#if defined(ASMJIT_TEST)
+class MyListNode : public ZoneListNode<MyListNode> {};
+
+UNIT(zone_list) {
+  Zone zone(4096);
+  ZoneList<MyListNode> list;
+
+  MyListNode* a = zone.newT<MyListNode>();
+  MyListNode* b = zone.newT<MyListNode>();
+  MyListNode* c = zone.newT<MyListNode>();
+  MyListNode* d = zone.newT<MyListNode>();
+
+  INFO("Append / Unlink");
+
+  // []
+  EXPECT(list.empty() == true);
+
+  // [A]
+  list.append(a);
+  EXPECT(list.empty() == false);
+  EXPECT(list.first() == a);
+  EXPECT(list.last() == a);
+  EXPECT(a->prev() == nullptr);
+  EXPECT(a->next() == nullptr);
+
+  // [A, B]
+  list.append(b);
+  EXPECT(list.first() == a);
+  EXPECT(list.last() == b);
+  EXPECT(a->prev() == nullptr);
+  EXPECT(a->next() == b);
+  EXPECT(b->prev() == a);
+  EXPECT(b->next() == nullptr);
+
+  // [A, B, C]
+  list.append(c);
+  EXPECT(list.first() == a);
+  EXPECT(list.last() == c);
+  EXPECT(a->prev() == nullptr);
+  EXPECT(a->next() == b);
+  EXPECT(b->prev() == a);
+  EXPECT(b->next() == c);
+  EXPECT(c->prev() == b);
+  EXPECT(c->next() == nullptr);
+
+  // [B, C]
+  list.unlink(a);
+  EXPECT(list.first() == b);
+  EXPECT(list.last() == c);
+  EXPECT(a->prev() == nullptr);
+  EXPECT(a->next() == nullptr);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == c);
+  EXPECT(c->prev() == b);
+  EXPECT(c->next() == nullptr);
+
+  // [B]
+  list.unlink(c);
+  EXPECT(list.first() == b);
+  EXPECT(list.last() == b);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == nullptr);
+  EXPECT(c->prev() == nullptr);
+  EXPECT(c->next() == nullptr);
+
+  // []
+  list.unlink(b);
+  EXPECT(list.empty() == true);
+  EXPECT(list.first() == nullptr);
+  EXPECT(list.last() == nullptr);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == nullptr);
+
+  INFO("Prepend / Unlink");
+
+  // [A]
+  list.prepend(a);
+  EXPECT(list.empty() == false);
+  EXPECT(list.first() == a);
+  EXPECT(list.last() == a);
+  EXPECT(a->prev() == nullptr);
+  EXPECT(a->next() == nullptr);
+
+  // [B, A]
+  list.prepend(b);
+  EXPECT(list.first() == b);
+  EXPECT(list.last() == a);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == a);
+  EXPECT(a->prev() == b);
+  EXPECT(a->next() == nullptr);
+
+  INFO("InsertAfter / InsertBefore");
+
+  // [B, A, C]
+  list.insertAfter(a, c);
+  EXPECT(list.first() == b);
+  EXPECT(list.last() == c);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == a);
+  EXPECT(a->prev() == b);
+  EXPECT(a->next() == c);
+  EXPECT(c->prev() == a);
+  EXPECT(c->next() == nullptr);
+
+  // [B, D, A, C]
+  list.insertBefore(a, d);
+  EXPECT(list.first() == b);
+  EXPECT(list.last() == c);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == d);
+  EXPECT(d->prev() == b);
+  EXPECT(d->next() == a);
+  EXPECT(a->prev() == d);
+  EXPECT(a->next() == c);
+  EXPECT(c->prev() == a);
+  EXPECT(c->next() == nullptr);
+
+  INFO("PopFirst / Pop");
+
+  // [D, A, C]
+  EXPECT(list.popFirst() == b);
+  EXPECT(b->prev() == nullptr);
+  EXPECT(b->next() == nullptr);
+
+  EXPECT(list.first() == d);
+  EXPECT(list.last() == c);
+  EXPECT(d->prev() == nullptr);
+  EXPECT(d->next() == a);
+  EXPECT(a->prev() == d);
+  EXPECT(a->next() == c);
+  EXPECT(c->prev() == a);
+  EXPECT(c->next() == nullptr);
+
+  // [D, A]
+  EXPECT(list.pop() == c);
+  EXPECT(c->prev() == nullptr);
+  EXPECT(c->next() == nullptr);
+
+  EXPECT(list.first() == d);
+  EXPECT(list.last() == a);
+  EXPECT(d->prev() == nullptr);
+  EXPECT(d->next() == a);
+  EXPECT(a->prev() == d);
+  EXPECT(a->next() == nullptr);
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/zonelist.h b/lib/lepton/asmjit/core/zonelist.h
new file mode 100644
index 0000000000..c5e0013658
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonelist.h
@@ -0,0 +1,209 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONELIST_H_INCLUDED
+#define ASMJIT_CORE_ZONELIST_H_INCLUDED
+
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! Node used by \ref ZoneList template.
+template<typename NodeT>
+class ZoneListNode {
+public:
+  ASMJIT_NONCOPYABLE(ZoneListNode)
+
+  //! \name Constants
+  //! \{
+
+  enum : size_t {
+    kNodeIndexPrev = 0,
+    kNodeIndexNext = 1
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  NodeT* _listNodes[2];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneListNode() noexcept
+    : _listNodes { nullptr, nullptr } {}
+
+  inline ZoneListNode(ZoneListNode&& other) noexcept
+    : _listNodes { other._listNodes[0], other._listNodes[1] } {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool hasPrev() const noexcept { return _listNodes[kNodeIndexPrev] != nullptr; }
+  inline bool hasNext() const noexcept { return _listNodes[kNodeIndexNext] != nullptr; }
+
+  inline NodeT* prev() const noexcept { return _listNodes[kNodeIndexPrev]; }
+  inline NodeT* next() const noexcept { return _listNodes[kNodeIndexNext]; }
+
+  //! \}
+};
+
+//! Zone allocated list container that uses nodes of `NodeT` type.
+template <typename NodeT>
+class ZoneList {
+public:
+  ASMJIT_NONCOPYABLE(ZoneList)
+
+  //! \name Constants
+  //! \{
+
+  enum : size_t {
+    kNodeIndexFirst = 0,
+    kNodeIndexLast = 1
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  NodeT* _nodes[2];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneList() noexcept
+    : _nodes { nullptr, nullptr } {}
+
+  inline ZoneList(ZoneList&& other) noexcept
+    : _nodes { other._nodes[0], other._nodes[1] } {}
+
+  inline void reset() noexcept {
+    _nodes[0] = nullptr;
+    _nodes[1] = nullptr;
+  }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool empty() const noexcept { return _nodes[0] == nullptr; }
+  inline NodeT* first() const noexcept { return _nodes[kNodeIndexFirst]; }
+  inline NodeT* last() const noexcept { return _nodes[kNodeIndexLast]; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(ZoneList& other) noexcept {
+    std::swap(_nodes[0], other._nodes[0]);
+    std::swap(_nodes[1], other._nodes[1]);
+  }
+
+  // Can be used to both append and prepend.
+  inline void _addNode(NodeT* node, size_t dir) noexcept {
+    NodeT* prev = _nodes[dir];
+
+    node->_listNodes[!dir] = prev;
+    _nodes[dir] = node;
+    if (prev)
+      prev->_listNodes[dir] = node;
+    else
+      _nodes[!dir] = node;
+  }
+
+  // Can be used to both append and prepend.
+  inline void _insertNode(NodeT* ref, NodeT* node, size_t dir) noexcept {
+    ASMJIT_ASSERT(ref != nullptr);
+
+    NodeT* prev = ref;
+    NodeT* next = ref->_listNodes[dir];
+
+    prev->_listNodes[dir] = node;
+    if (next)
+      next->_listNodes[!dir] = node;
+    else
+      _nodes[dir] = node;
+
+    node->_listNodes[!dir] = prev;
+    node->_listNodes[ dir] = next;
+  }
+
+  inline void append(NodeT* node) noexcept { _addNode(node, kNodeIndexLast); }
+  inline void prepend(NodeT* node) noexcept { _addNode(node, kNodeIndexFirst); }
+
+  inline void insertAfter(NodeT* ref, NodeT* node) noexcept { _insertNode(ref, node, NodeT::kNodeIndexNext); }
+  inline void insertBefore(NodeT* ref, NodeT* node) noexcept { _insertNode(ref, node, NodeT::kNodeIndexPrev); }
+
+  inline NodeT* unlink(NodeT* node) noexcept {
+    NodeT* prev = node->prev();
+    NodeT* next = node->next();
+
+    if (prev) { prev->_listNodes[1] = next; node->_listNodes[0] = nullptr; } else { _nodes[0] = next; }
+    if (next) { next->_listNodes[0] = prev; node->_listNodes[1] = nullptr; } else { _nodes[1] = prev; }
+
+    node->_listNodes[0] = nullptr;
+    node->_listNodes[1] = nullptr;
+
+    return node;
+  }
+
+  inline NodeT* popFirst() noexcept {
+    NodeT* node = _nodes[0];
+    ASMJIT_ASSERT(node != nullptr);
+
+    NodeT* next = node->next();
+    _nodes[0] = next;
+
+    if (next) {
+      next->_listNodes[0] = nullptr;
+      node->_listNodes[1] = nullptr;
+    }
+    else {
+      _nodes[1] = nullptr;
+    }
+
+    return node;
+  }
+
+  inline NodeT* pop() noexcept {
+    NodeT* node = _nodes[1];
+    ASMJIT_ASSERT(node != nullptr);
+
+    NodeT* prev = node->prev();
+    _nodes[1] = prev;
+
+    if (prev) {
+      prev->_listNodes[1] = nullptr;
+      node->_listNodes[0] = nullptr;
+    }
+    else {
+      _nodes[0] = nullptr;
+    }
+
+    return node;
+  }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONELIST_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zonestack.cpp b/lib/lepton/asmjit/core/zonestack.cpp
new file mode 100644
index 0000000000..77e6f202c7
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonestack.cpp
@@ -0,0 +1,176 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/zone.h"
+#include "../core/zonestack.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// ZoneStackBase - Init & Reset
+// ============================
+
+Error ZoneStackBase::_init(ZoneAllocator* allocator, size_t middleIndex) noexcept {
+  ZoneAllocator* oldAllocator = _allocator;
+
+  if (oldAllocator) {
+    Block* block = _block[kBlockIndexFirst];
+    while (block) {
+      Block* next = block->next();
+      oldAllocator->release(block, kBlockSize);
+      block = next;
+    }
+
+    _allocator = nullptr;
+    _block[kBlockIndexFirst] = nullptr;
+    _block[kBlockIndexLast] = nullptr;
+  }
+
+  if (allocator) {
+    Block* block = static_cast<Block*>(allocator->alloc(kBlockSize));
+    if (ASMJIT_UNLIKELY(!block))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    block->_link[kBlockIndexPrev] = nullptr;
+    block->_link[kBlockIndexNext] = nullptr;
+    block->_start = (uint8_t*)block + middleIndex;
+    block->_end = (uint8_t*)block + middleIndex;
+
+    _allocator = allocator;
+    _block[kBlockIndexFirst] = block;
+    _block[kBlockIndexLast] = block;
+  }
+
+  return kErrorOk;
+}
+
+// ZoneStackBase - Operations
+// ==========================
+
+Error ZoneStackBase::_prepareBlock(uint32_t side, size_t initialIndex) noexcept {
+  ASMJIT_ASSERT(isInitialized());
+
+  Block* prev = _block[side];
+  ASMJIT_ASSERT(!prev->empty());
+
+  Block* block = _allocator->allocT<Block>(kBlockSize);
+  if (ASMJIT_UNLIKELY(!block))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  block->_link[ side] = nullptr;
+  block->_link[!side] = prev;
+  block->_start = (uint8_t*)block + initialIndex;
+  block->_end = (uint8_t*)block + initialIndex;
+
+  prev->_link[side] = block;
+  _block[side] = block;
+
+  return kErrorOk;
+}
+
+void ZoneStackBase::_cleanupBlock(uint32_t side, size_t middleIndex) noexcept {
+  Block* block = _block[side];
+  ASMJIT_ASSERT(block->empty());
+
+  Block* prev = block->_link[!side];
+  if (prev) {
+    ASMJIT_ASSERT(prev->_link[side] == block);
+    _allocator->release(block, kBlockSize);
+
+    prev->_link[side] = nullptr;
+    _block[side] = prev;
+  }
+  else if (_block[!side] == block) {
+    // If the container becomes empty center both pointers in the remaining block.
+    block->_start = (uint8_t*)block + middleIndex;
+    block->_end = (uint8_t*)block + middleIndex;
+  }
+}
+
+// ZoneStack - Tests
+// =================
+
+#if defined(ASMJIT_TEST)
+template<typename T>
+static void test_zone_stack(ZoneAllocator* allocator, const char* typeName) {
+  ZoneStack<T> stack;
+
+  INFO("Testing ZoneStack<%s>", typeName);
+  INFO("  (%d items per one Block)", ZoneStack<T>::kNumBlockItems);
+
+  EXPECT(stack.init(allocator) == kErrorOk);
+  EXPECT(stack.empty(), "Stack must be empty after `init()`");
+
+  EXPECT(stack.append(42) == kErrorOk);
+  EXPECT(!stack.empty()        , "Stack must not be empty after an item has been appended");
+  EXPECT(stack.pop() == 42     , "Stack.pop() must return the item that has been appended last");
+  EXPECT(stack.empty()         , "Stack must be empty after the last item has been removed");
+
+  EXPECT(stack.prepend(43) == kErrorOk);
+  EXPECT(!stack.empty()        , "Stack must not be empty after an item has been prepended");
+  EXPECT(stack.popFirst() == 43, "Stack.popFirst() must return the item that has been prepended last");
+  EXPECT(stack.empty()         , "Stack must be empty after the last item has been removed");
+
+  int i;
+  int iMin =-100000;
+  int iMax = 100000;
+
+  INFO("Validating prepend() & popFirst()");
+  for (i = iMax; i >= 0; i--) stack.prepend(T(i));
+  for (i = 0; i <= iMax; i++) {
+    T item = stack.popFirst();
+    EXPECT(i == item, "Item '%d' didn't match the item '%lld' popped", i, (long long)item);
+    if (!stack.empty()) {
+      item = stack.popFirst();
+      EXPECT(i + 1 == item, "Item '%d' didn't match the item '%lld' popped", i + 1, (long long)item);
+      stack.prepend(item);
+    }
+  }
+  EXPECT(stack.empty());
+
+  INFO("Validating append() & pop()");
+  for (i = 0; i <= iMax; i++) stack.append(T(i));
+  for (i = iMax; i >= 0; i--) {
+    T item = stack.pop();
+    EXPECT(i == item, "Item '%d' didn't match the item '%lld' popped", i, (long long)item);
+    if (!stack.empty()) {
+      item = stack.pop();
+      EXPECT(i - 1 == item, "Item '%d' didn't match the item '%lld' popped", i - 1, (long long)item);
+      stack.append(item);
+    }
+  }
+  EXPECT(stack.empty());
+
+  INFO("Validating append()/prepend() & popFirst()");
+  for (i = 1; i <= iMax; i++) stack.append(T(i));
+  for (i = 0; i >= iMin; i--) stack.prepend(T(i));
+
+  for (i = iMin; i <= iMax; i++) {
+    T item = stack.popFirst();
+    EXPECT(i == item, "Item '%d' didn't match the item '%lld' popped", i, (long long)item);
+  }
+  EXPECT(stack.empty());
+
+  INFO("Validating append()/prepend() & pop()");
+  for (i = 0; i >= iMin; i--) stack.prepend(T(i));
+  for (i = 1; i <= iMax; i++) stack.append(T(i));
+
+  for (i = iMax; i >= iMin; i--) {
+    T item = stack.pop();
+    EXPECT(i == item, "Item '%d' didn't match the item '%lld' popped", i, (long long)item);
+  }
+  EXPECT(stack.empty());
+}
+
+UNIT(zone_stack) {
+  Zone zone(8096 - Zone::kBlockOverhead);
+  ZoneAllocator allocator(&zone);
+
+  test_zone_stack<int>(&allocator, "int");
+  test_zone_stack<int64_t>(&allocator, "int64_t");
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/zonestack.h b/lib/lepton/asmjit/core/zonestack.h
new file mode 100644
index 0000000000..aea7b6868f
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonestack.h
@@ -0,0 +1,239 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONESTACK_H_INCLUDED
+#define ASMJIT_CORE_ZONESTACK_H_INCLUDED
+
+#include "../core/zone.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! Base class used by \ref ZoneStack.
+class ZoneStackBase {
+public:
+  ASMJIT_NONCOPYABLE(ZoneStackBase)
+
+  //! \name Constants
+  //! \{
+
+  enum : size_t {
+    kBlockIndexPrev = 0,
+    kBlockIndexNext = 1,
+
+    kBlockIndexFirst = 0,
+    kBlockIndexLast = 1,
+
+    kBlockSize = ZoneAllocator::kHiMaxSize
+  };
+
+  //! \}
+
+  //! \name Types
+  //! \{
+
+  struct Block {
+    //! Next and previous blocks.
+    Block* _link[2];
+    //! Pointer to the start of the array.
+    void* _start;
+    //! Pointer to the end of the array.
+    void* _end;
+
+    inline bool empty() const noexcept { return _start == _end; }
+    inline Block* prev() const noexcept { return _link[kBlockIndexPrev]; }
+    inline Block* next() const noexcept { return _link[kBlockIndexNext]; }
+
+    inline void setPrev(Block* block) noexcept { _link[kBlockIndexPrev] = block; }
+    inline void setNext(Block* block) noexcept { _link[kBlockIndexNext] = block; }
+
+    template<typename T>
+    inline T* start() const noexcept { return static_cast<T*>(_start); }
+    template<typename T>
+    inline void setStart(T* start) noexcept { _start = static_cast<void*>(start); }
+
+    template<typename T>
+    inline T* end() const noexcept { return (T*)_end; }
+    template<typename T>
+    inline void setEnd(T* end) noexcept { _end = (void*)end; }
+
+    template<typename T>
+    inline T* data() const noexcept { return (T*)((uint8_t*)(this) + sizeof(Block)); }
+
+    template<typename T>
+    inline bool canPrepend() const noexcept { return _start > data<void>(); }
+
+    template<typename T>
+    inline bool canAppend() const noexcept {
+      size_t kNumBlockItems = (kBlockSize - sizeof(Block)) / sizeof(T);
+      size_t kStartBlockIndex = sizeof(Block);
+      size_t kEndBlockIndex = kStartBlockIndex + kNumBlockItems * sizeof(T);
+
+      return (uintptr_t)_end <= ((uintptr_t)this + kEndBlockIndex - sizeof(T));
+    }
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Allocator used to allocate data.
+  ZoneAllocator* _allocator;
+  //! First and last blocks.
+  Block* _block[2];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneStackBase() noexcept {
+    _allocator = nullptr;
+    _block[0] = nullptr;
+    _block[1] = nullptr;
+  }
+  inline ~ZoneStackBase() noexcept { reset(); }
+
+  inline bool isInitialized() const noexcept { return _allocator != nullptr; }
+  ASMJIT_API Error _init(ZoneAllocator* allocator, size_t middleIndex) noexcept;
+  inline Error reset() noexcept { return _init(nullptr, 0); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns `ZoneAllocator` attached to this container.
+  inline ZoneAllocator* allocator() const noexcept { return _allocator; }
+
+  inline bool empty() const noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    return _block[0]->start<void>() == _block[1]->end<void>();
+  }
+
+  //! \}
+
+  //! \cond INTERNAL
+  //! \name Internal
+  //! \{
+
+  ASMJIT_API Error _prepareBlock(uint32_t side, size_t initialIndex) noexcept;
+  ASMJIT_API void _cleanupBlock(uint32_t side, size_t middleIndex) noexcept;
+
+  //! \}
+  //! \endcond
+};
+
+//! Zone allocated stack container.
+template<typename T>
+class ZoneStack : public ZoneStackBase {
+public:
+  ASMJIT_NONCOPYABLE(ZoneStack)
+
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    kNumBlockItems   = uint32_t((kBlockSize - sizeof(Block)) / sizeof(T)),
+    kStartBlockIndex = uint32_t(sizeof(Block)),
+    kMidBlockIndex   = uint32_t(kStartBlockIndex + (kNumBlockItems / 2) * sizeof(T)),
+    kEndBlockIndex   = uint32_t(kStartBlockIndex + (kNumBlockItems    ) * sizeof(T))
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneStack() noexcept {}
+  inline ~ZoneStack() noexcept {}
+
+  inline Error init(ZoneAllocator* allocator) noexcept { return _init(allocator, kMidBlockIndex); }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline Error prepend(T item) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    Block* block = _block[kBlockIndexFirst];
+
+    if (!block->canPrepend<T>()) {
+      ASMJIT_PROPAGATE(_prepareBlock(kBlockIndexFirst, kEndBlockIndex));
+      block = _block[kBlockIndexFirst];
+    }
+
+    T* ptr = block->start<T>() - 1;
+    ASMJIT_ASSERT(ptr >= block->data<T>() && ptr <= block->data<T>() + (kNumBlockItems - 1));
+    *ptr = item;
+    block->setStart<T>(ptr);
+    return kErrorOk;
+  }
+
+  inline Error append(T item) noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    Block* block = _block[kBlockIndexLast];
+
+    if (!block->canAppend<T>()) {
+      ASMJIT_PROPAGATE(_prepareBlock(kBlockIndexLast, kStartBlockIndex));
+      block = _block[kBlockIndexLast];
+    }
+
+    T* ptr = block->end<T>();
+    ASMJIT_ASSERT(ptr >= block->data<T>() && ptr <= block->data<T>() + (kNumBlockItems - 1));
+
+    *ptr++ = item;
+    block->setEnd(ptr);
+    return kErrorOk;
+  }
+
+  inline T popFirst() noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    ASMJIT_ASSERT(!empty());
+
+    Block* block = _block[kBlockIndexFirst];
+    ASMJIT_ASSERT(!block->empty());
+
+    T* ptr = block->start<T>();
+    T item = *ptr++;
+
+    block->setStart(ptr);
+    if (block->empty())
+      _cleanupBlock(kBlockIndexFirst, kMidBlockIndex);
+
+    return item;
+  }
+
+  inline T pop() noexcept {
+    ASMJIT_ASSERT(isInitialized());
+    ASMJIT_ASSERT(!empty());
+
+    Block* block = _block[kBlockIndexLast];
+    ASMJIT_ASSERT(!block->empty());
+
+    T* ptr = block->end<T>();
+    T item = *--ptr;
+    ASMJIT_ASSERT(ptr >= block->data<T>());
+    ASMJIT_ASSERT(ptr >= block->start<T>());
+
+    block->setEnd(ptr);
+    if (block->empty())
+      _cleanupBlock(kBlockIndexLast, kMidBlockIndex);
+
+    return item;
+  }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONESTACK_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zonestring.h b/lib/lepton/asmjit/core/zonestring.h
new file mode 100644
index 0000000000..01f5bd89f6
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonestring.h
@@ -0,0 +1,120 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONESTRING_H_INCLUDED
+#define ASMJIT_CORE_ZONESTRING_H_INCLUDED
+
+#include "../core/globals.h"
+#include "../core/zone.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! A helper class used by \ref ZoneString implementation.
+struct ZoneStringBase {
+  union {
+    struct {
+      uint32_t _size;
+      char _embedded[sizeof(void*) * 2 - 4];
+    };
+    struct {
+      void* _dummy;
+      char* _external;
+    };
+  };
+
+  inline void reset() noexcept {
+    _dummy = nullptr;
+    _external = nullptr;
+  }
+
+  Error setData(Zone* zone, uint32_t maxEmbeddedSize, const char* str, size_t size) noexcept {
+    if (size == SIZE_MAX)
+      size = strlen(str);
+
+    if (size <= maxEmbeddedSize) {
+      memcpy(_embedded, str, size);
+      _embedded[size] = '\0';
+    }
+    else {
+      char* external = static_cast<char*>(zone->dup(str, size, true));
+      if (ASMJIT_UNLIKELY(!external))
+        return DebugUtils::errored(kErrorOutOfMemory);
+      _external = external;
+    }
+
+    _size = uint32_t(size);
+    return kErrorOk;
+  }
+};
+
+//! A string template that can be zone allocated.
+//!
+//! Helps with creating strings that can be either statically allocated if they are small, or externally allocated
+//! in case their size exceeds the limit. The `N` represents the size of the whole `ZoneString` structure, based on
+//! that size the maximum size of the internal buffer is determined.
+template<size_t N>
+class ZoneString {
+public:
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    kWholeSize = (N > sizeof(ZoneStringBase)) ? uint32_t(N) : uint32_t(sizeof(ZoneStringBase)),
+    kMaxEmbeddedSize = kWholeSize - 5
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  union {
+    ZoneStringBase _base;
+    char _wholeData[kWholeSize];
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneString() noexcept { reset(); }
+  inline void reset() noexcept { _base.reset(); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the string is empty.
+  inline bool empty() const noexcept { return _base._size == 0; }
+
+  //! Returns the string data.
+  inline const char* data() const noexcept { return _base._size <= kMaxEmbeddedSize ? _base._embedded : _base._external; }
+  //! Returns the string size.
+  inline uint32_t size() const noexcept { return _base._size; }
+
+  //! Tests whether the string is embedded (e.g. no dynamically allocated).
+  inline bool isEmbedded() const noexcept { return _base._size <= kMaxEmbeddedSize; }
+
+  //! Copies a new `data` of the given `size` to the string.
+  //!
+  //! If the `size` exceeds the internal buffer the given `zone` will be used to duplicate the data, otherwise
+  //! the internal buffer will be used as a storage.
+  inline Error setData(Zone* zone, const char* data, size_t size) noexcept {
+    return _base.setData(zone, kMaxEmbeddedSize, data, size);
+  }
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONESTRING_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zonetree.cpp b/lib/lepton/asmjit/core/zonetree.cpp
new file mode 100644
index 0000000000..8c42af8c02
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonetree.cpp
@@ -0,0 +1,99 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/support.h"
+#include "../core/zone.h"
+#include "../core/zonetree.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// ZoneTreeBase - Tests
+// ====================
+
+#if defined(ASMJIT_TEST)
+template<typename NodeT>
+struct ZoneRBUnit {
+  typedef ZoneTree<NodeT> Tree;
+
+  static void verifyTree(Tree& tree) noexcept {
+    EXPECT(checkHeight(static_cast<NodeT*>(tree._root)) > 0);
+  }
+
+  // Check whether the Red-Black tree is valid.
+  static int checkHeight(NodeT* node) noexcept {
+    if (!node) return 1;
+
+    NodeT* ln = node->left();
+    NodeT* rn = node->right();
+
+    // Invalid tree.
+    EXPECT(ln == nullptr || *ln < *node);
+    EXPECT(rn == nullptr || *rn > *node);
+
+    // Red violation.
+    EXPECT(!node->isRed() ||
+          (!ZoneTreeNode::_isValidRed(ln) && !ZoneTreeNode::_isValidRed(rn)));
+
+    // Black violation.
+    int lh = checkHeight(ln);
+    int rh = checkHeight(rn);
+    EXPECT(!lh || !rh || lh == rh);
+
+    // Only count black links.
+    return (lh && rh) ? lh + !node->isRed() : 0;
+  }
+};
+
+class MyRBNode : public ZoneTreeNodeT<MyRBNode> {
+public:
+  ASMJIT_NONCOPYABLE(MyRBNode)
+
+  inline explicit MyRBNode(uint32_t key) noexcept
+    : _key(key) {}
+
+  inline bool operator<(const MyRBNode& other) const noexcept { return _key < other._key; }
+  inline bool operator>(const MyRBNode& other) const noexcept { return _key > other._key; }
+
+  inline bool operator<(uint32_t queryKey) const noexcept { return _key < queryKey; }
+  inline bool operator>(uint32_t queryKey) const noexcept { return _key > queryKey; }
+
+  uint32_t _key;
+};
+
+UNIT(zone_rbtree) {
+  uint32_t kCount = BrokenAPI::hasArg("--quick") ? 1000 : 10000;
+
+  Zone zone(4096);
+  ZoneTree<MyRBNode> rbTree;
+
+  uint32_t key;
+  INFO("Inserting %u elements to RBTree and validating each operation", unsigned(kCount));
+  for (key = 0; key < kCount; key++) {
+    rbTree.insert(zone.newT<MyRBNode>(key));
+    ZoneRBUnit<MyRBNode>::verifyTree(rbTree);
+  }
+
+  uint32_t count = kCount;
+  INFO("Removing %u elements from RBTree and validating each operation", unsigned(kCount));
+  do {
+    MyRBNode* node;
+
+    for (key = 0; key < count; key++) {
+      node = rbTree.get(key);
+      EXPECT(node != nullptr);
+      EXPECT(node->_key == key);
+    }
+
+    node = rbTree.get(--count);
+    rbTree.remove(node);
+    ZoneRBUnit<MyRBNode>::verifyTree(rbTree);
+  } while (count);
+
+  EXPECT(rbTree.empty());
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/zonetree.h b/lib/lepton/asmjit/core/zonetree.h
new file mode 100644
index 0000000000..c5dbc78f49
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonetree.h
@@ -0,0 +1,380 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONETREE_H_INCLUDED
+#define ASMJIT_CORE_ZONETREE_H_INCLUDED
+
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! RB-Tree node.
+//!
+//! The color is stored in a least significant bit of the `left` node.
+//!
+//! WARNING: Always use accessors to access left and right children.
+class ZoneTreeNode {
+public:
+  ASMJIT_NONCOPYABLE(ZoneTreeNode)
+
+  //! \name Constants
+  //! \{
+
+  enum : uintptr_t {
+    kRedMask = 0x1,
+    kPtrMask = ~kRedMask
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  uintptr_t _rbNodeData[2];
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneTreeNode() noexcept
+    : _rbNodeData { 0, 0 } {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool isRed() const noexcept { return static_cast<bool>(_rbNodeData[0] & kRedMask); }
+
+  inline bool hasChild(size_t i) const noexcept { return _rbNodeData[i] > kRedMask; }
+  inline bool hasLeft() const noexcept { return _rbNodeData[0] > kRedMask; }
+  inline bool hasRight() const noexcept { return _rbNodeData[1] != 0; }
+
+  template<typename T = ZoneTreeNode>
+  inline T* child(size_t i) const noexcept { return static_cast<T*>(_getChild(i)); }
+  template<typename T = ZoneTreeNode>
+  inline T* left() const noexcept { return static_cast<T*>(_getLeft()); }
+  template<typename T = ZoneTreeNode>
+  inline T* right() const noexcept { return static_cast<T*>(_getRight()); }
+
+  //! \}
+
+  //! \cond INTERNAL
+  //! \name Internal
+  //! \{
+
+  inline ZoneTreeNode* _getChild(size_t i) const noexcept { return (ZoneTreeNode*)(_rbNodeData[i] & kPtrMask); }
+  inline ZoneTreeNode* _getLeft() const noexcept { return (ZoneTreeNode*)(_rbNodeData[0] & kPtrMask); }
+  inline ZoneTreeNode* _getRight() const noexcept { return (ZoneTreeNode*)(_rbNodeData[1]); }
+
+  inline void _setChild(size_t i, ZoneTreeNode* node) noexcept { _rbNodeData[i] = (_rbNodeData[i] & kRedMask) | (uintptr_t)node; }
+  inline void _setLeft(ZoneTreeNode* node) noexcept { _rbNodeData[0] = (_rbNodeData[0] & kRedMask) | (uintptr_t)node; }
+  inline void _setRight(ZoneTreeNode* node) noexcept { _rbNodeData[1] = (uintptr_t)node; }
+
+  inline void _makeRed() noexcept { _rbNodeData[0] |= kRedMask; }
+  inline void _makeBlack() noexcept { _rbNodeData[0] &= kPtrMask; }
+
+  //! Tests whether the node is RED (RED node must be non-null and must have RED flag set).
+  static inline bool _isValidRed(ZoneTreeNode* node) noexcept { return node && node->isRed(); }
+
+  //! \}
+  //! \endcond
+};
+
+//! RB-Tree node casted to `NodeT`.
+template<typename NodeT>
+class ZoneTreeNodeT : public ZoneTreeNode {
+public:
+  ASMJIT_NONCOPYABLE(ZoneTreeNodeT)
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneTreeNodeT() noexcept
+    : ZoneTreeNode() {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline NodeT* child(size_t i) const noexcept { return static_cast<NodeT*>(_getChild(i)); }
+  inline NodeT* left() const noexcept { return static_cast<NodeT*>(_getLeft()); }
+  inline NodeT* right() const noexcept { return static_cast<NodeT*>(_getRight()); }
+
+  //! \}
+};
+
+//! RB-Tree.
+template<typename NodeT>
+class ZoneTree {
+public:
+  ASMJIT_NONCOPYABLE(ZoneTree)
+
+  typedef NodeT Node;
+  NodeT* _root;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneTree() noexcept
+    : _root(nullptr) {}
+
+  inline ZoneTree(ZoneTree&& other) noexcept
+    : _root(other._root) {}
+
+  inline void reset() noexcept { _root = nullptr; }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  inline bool empty() const noexcept { return _root == nullptr; }
+  inline NodeT* root() const noexcept { return static_cast<NodeT*>(_root); }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(ZoneTree& other) noexcept {
+    std::swap(_root, other._root);
+  }
+
+  template<typename CompareT = Support::Compare<Support::SortOrder::kAscending>>
+  void insert(NodeT* ASMJIT_NONNULL(node), const CompareT& cmp = CompareT()) noexcept {
+    // Node to insert must not contain garbage.
+    ASMJIT_ASSERT(!node->hasLeft());
+    ASMJIT_ASSERT(!node->hasRight());
+    ASMJIT_ASSERT(!node->isRed());
+
+    if (!_root) {
+      _root = node;
+      return;
+    }
+
+    ZoneTreeNode head;           // False root node,
+    head._setRight(_root);       // having root on the right.
+
+    ZoneTreeNode* g = nullptr;   // Grandparent.
+    ZoneTreeNode* p = nullptr;   // Parent.
+    ZoneTreeNode* t = &head;     // Iterator.
+    ZoneTreeNode* q = _root;     // Query.
+
+    size_t dir = 0;              // Direction for accessing child nodes.
+    size_t last = 0;             // Not needed to initialize, but makes some tools happy.
+
+    node->_makeRed();            // New nodes are always red and violations fixed appropriately.
+
+    // Search down the tree.
+    for (;;) {
+      if (!q) {
+        // Insert new node at the bottom.
+        q = node;
+        p->_setChild(dir, node);
+      }
+      else if (_isValidRed(q->_getLeft()) && _isValidRed(q->_getRight())) {
+        // Color flip.
+        q->_makeRed();
+        q->_getLeft()->_makeBlack();
+        q->_getRight()->_makeBlack();
+      }
+
+      // Fix red violation.
+      if (_isValidRed(q) && _isValidRed(p)) {
+        ASMJIT_ASSUME(g != nullptr);
+        ASMJIT_ASSUME(p != nullptr);
+        t->_setChild(t->_getRight() == g,
+                     q == p->_getChild(last) ? _singleRotate(g, !last) : _doubleRotate(g, !last));
+      }
+
+      // Stop if found.
+      if (q == node)
+        break;
+
+      last = dir;
+      dir = cmp(*static_cast<NodeT*>(q), *static_cast<NodeT*>(node)) < 0;
+
+      // Update helpers.
+      if (g) t = g;
+
+      g = p;
+      p = q;
+      q = q->_getChild(dir);
+    }
+
+    // Update root and make it black.
+    _root = static_cast<NodeT*>(head._getRight());
+    _root->_makeBlack();
+  }
+
+  //! Remove node from RBTree.
+  template<typename CompareT = Support::Compare<Support::SortOrder::kAscending>>
+  void remove(ZoneTreeNode* ASMJIT_NONNULL(node), const CompareT& cmp = CompareT()) noexcept {
+    ZoneTreeNode head;           // False root node,
+    head._setRight(_root);       // having root on the right.
+
+    ZoneTreeNode* g = nullptr;   // Grandparent.
+    ZoneTreeNode* p = nullptr;   // Parent.
+    ZoneTreeNode* q = &head;     // Query.
+
+    ZoneTreeNode* f  = nullptr;  // Found item.
+    ZoneTreeNode* gf = nullptr;  // Found grandparent.
+    size_t dir = 1;              // Direction (0 or 1).
+
+    // Search and push a red down.
+    while (q->hasChild(dir)) {
+      size_t last = dir;
+
+      // Update helpers.
+      g = p;
+      p = q;
+      q = q->_getChild(dir);
+      dir = cmp(*static_cast<NodeT*>(q), *static_cast<NodeT*>(node)) < 0;
+
+      // Save found node.
+      if (q == node) {
+        f = q;
+        gf = g;
+      }
+
+      // Push the red node down.
+      if (!_isValidRed(q) && !_isValidRed(q->_getChild(dir))) {
+        if (_isValidRed(q->_getChild(!dir))) {
+          ZoneTreeNode* child = _singleRotate(q, dir);
+          p->_setChild(last, child);
+          p = child;
+        }
+        else if (!_isValidRed(q->_getChild(!dir)) && p->_getChild(!last)) {
+          ZoneTreeNode* s = p->_getChild(!last);
+          if (!_isValidRed(s->_getChild(!last)) && !_isValidRed(s->_getChild(last))) {
+            // Color flip.
+            p->_makeBlack();
+            s->_makeRed();
+            q->_makeRed();
+          }
+          else {
+            ASMJIT_ASSUME(g != nullptr);
+            ASMJIT_ASSUME(s != nullptr);
+
+            size_t dir2 = g->_getRight() == p;
+            ZoneTreeNode* child = g->_getChild(dir2);
+
+            if (_isValidRed(s->_getChild(last))) {
+              child = _doubleRotate(p, last);
+              g->_setChild(dir2, child);
+            }
+            else if (_isValidRed(s->_getChild(!last))) {
+              child = _singleRotate(p, last);
+              g->_setChild(dir2, child);
+            }
+
+            // Ensure correct coloring.
+            q->_makeRed();
+            child->_makeRed();
+            child->_getLeft()->_makeBlack();
+            child->_getRight()->_makeBlack();
+          }
+        }
+      }
+    }
+
+    // Replace and remove.
+    ASMJIT_ASSERT(f != nullptr);
+    ASMJIT_ASSERT(f != &head);
+    ASMJIT_ASSERT(q != &head);
+
+    p->_setChild(p->_getRight() == q,
+                 q->_getChild(q->_getLeft() == nullptr));
+
+    // NOTE: The original algorithm used a trick to just copy 'key/value' to `f` and mark `q` for deletion. But this
+    // is unacceptable here as we really want to destroy the passed `node`. So, we have to make sure that we have
+    // really removed `f` and not `q`.
+    if (f != q) {
+      ASMJIT_ASSERT(f != &head);
+      ASMJIT_ASSERT(f != gf);
+
+      ZoneTreeNode* n = gf ? gf : &head;
+      dir = (n == &head) ? 1  : cmp(*static_cast<NodeT*>(n), *static_cast<NodeT*>(node)) < 0;
+
+      for (;;) {
+        if (n->_getChild(dir) == f) {
+          n->_setChild(dir, q);
+          // RAW copy, including the color.
+          q->_rbNodeData[0] = f->_rbNodeData[0];
+          q->_rbNodeData[1] = f->_rbNodeData[1];
+          break;
+        }
+
+        n = n->_getChild(dir);
+
+        // Cannot be true as we know that it must reach `f` in few iterations.
+        ASMJIT_ASSERT(n != nullptr);
+        dir = cmp(*static_cast<NodeT*>(n), *static_cast<NodeT*>(node)) < 0;
+      }
+    }
+
+    // Update root and make it black.
+    _root = static_cast<NodeT*>(head._getRight());
+    if (_root) _root->_makeBlack();
+  }
+
+  template<typename KeyT, typename CompareT = Support::Compare<Support::SortOrder::kAscending>>
+  inline NodeT* get(const KeyT& key, const CompareT& cmp = CompareT()) const noexcept {
+    ZoneTreeNode* node = _root;
+    while (node) {
+      auto result = cmp(*static_cast<const NodeT*>(node), key);
+      if (result == 0) break;
+
+      // Go left or right depending on the `result`.
+      node = node->_getChild(result < 0);
+    }
+    return static_cast<NodeT*>(node);
+  }
+
+  //! \}
+
+  //! \cond INTERNAL
+  //! \name Internal
+  //! \{
+
+  static inline bool _isValidRed(ZoneTreeNode* node) noexcept { return ZoneTreeNode::_isValidRed(node); }
+
+  //! Single rotation.
+  static inline ZoneTreeNode* _singleRotate(ZoneTreeNode* ASMJIT_NONNULL(root), size_t dir) noexcept {
+    ZoneTreeNode* save = root->_getChild(!dir);
+    ASMJIT_ASSUME(save != nullptr);
+
+    ZoneTreeNode* saveChild = save->_getChild(dir);
+    root->_setChild(!dir, saveChild);
+    save->_setChild( dir, root);
+    root->_makeRed();
+    save->_makeBlack();
+    return save;
+  }
+
+  //! Double rotation.
+  static inline ZoneTreeNode* _doubleRotate(ZoneTreeNode* ASMJIT_NONNULL(root), size_t dir) noexcept {
+    ZoneTreeNode* child = root->_getChild(!dir);
+    ASMJIT_ASSUME(child != nullptr);
+
+    root->_setChild(!dir, _singleRotate(child, !dir));
+    return _singleRotate(root, dir);
+  }
+
+  //! \}
+  //! \endcond
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONETREE_H_INCLUDED
diff --git a/lib/lepton/asmjit/core/zonevector.cpp b/lib/lepton/asmjit/core/zonevector.cpp
new file mode 100644
index 0000000000..dfec5d5f79
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonevector.cpp
@@ -0,0 +1,356 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#include "../core/support.h"
+#include "../core/zone.h"
+#include "../core/zonevector.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// ZoneVectorBase - Helpers
+// ========================
+
+Error ZoneVectorBase::_grow(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept {
+  uint32_t threshold = Globals::kGrowThreshold / sizeOfT;
+  uint32_t capacity = _capacity;
+  uint32_t after = _size;
+
+  if (ASMJIT_UNLIKELY(std::numeric_limits<uint32_t>::max() - n < after))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  after += n;
+  if (capacity >= after)
+    return kErrorOk;
+
+  // ZoneVector is used as an array to hold short-lived data structures used
+  // during code generation. The growing strategy is simple - use small capacity
+  // at the beginning (very good for ZoneAllocator) and then grow quicker to
+  // prevent successive reallocations.
+  if (capacity < 4)
+    capacity = 4;
+  else if (capacity < 8)
+    capacity = 8;
+  else if (capacity < 16)
+    capacity = 16;
+  else if (capacity < 64)
+    capacity = 64;
+  else if (capacity < 256)
+    capacity = 256;
+
+  while (capacity < after) {
+    if (capacity < threshold)
+      capacity *= 2;
+    else
+      capacity += threshold;
+  }
+
+  return _reserve(allocator, sizeOfT, capacity);
+}
+
+Error ZoneVectorBase::_reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept {
+  uint32_t oldCapacity = _capacity;
+  if (oldCapacity >= n) return kErrorOk;
+
+  uint32_t nBytes = n * sizeOfT;
+  if (ASMJIT_UNLIKELY(nBytes < n))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  size_t allocatedBytes;
+  uint8_t* newData = static_cast<uint8_t*>(allocator->alloc(nBytes, allocatedBytes));
+
+  if (ASMJIT_UNLIKELY(!newData))
+    return DebugUtils::errored(kErrorOutOfMemory);
+
+  void* oldData = _data;
+  if (_size)
+    memcpy(newData, oldData, size_t(_size) * sizeOfT);
+
+  if (oldData)
+    allocator->release(oldData, size_t(oldCapacity) * sizeOfT);
+
+  _capacity = uint32_t(allocatedBytes / sizeOfT);
+  ASMJIT_ASSERT(_capacity >= n);
+
+  _data = newData;
+  return kErrorOk;
+}
+
+Error ZoneVectorBase::_resize(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept {
+  uint32_t size = _size;
+
+  if (_capacity < n) {
+    ASMJIT_PROPAGATE(_grow(allocator, sizeOfT, n - size));
+    ASMJIT_ASSERT(_capacity >= n);
+  }
+
+  if (size < n)
+    memset(static_cast<uint8_t*>(_data) + size_t(size) * sizeOfT, 0, size_t(n - size) * sizeOfT);
+
+  _size = n;
+  return kErrorOk;
+}
+
+// ZoneBitVector - Operations
+// ==========================
+
+Error ZoneBitVector::copyFrom(ZoneAllocator* allocator, const ZoneBitVector& other) noexcept {
+  BitWord* data = _data;
+  uint32_t newSize = other.size();
+
+  if (!newSize) {
+    _size = 0;
+    return kErrorOk;
+  }
+
+  if (newSize > _capacity) {
+    // Realloc needed... Calculate the minimum capacity (in bytes) required.
+    uint32_t minimumCapacityInBits = Support::alignUp<uint32_t>(newSize, kBitWordSizeInBits);
+    if (ASMJIT_UNLIKELY(minimumCapacityInBits < newSize))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    // Normalize to bytes.
+    uint32_t minimumCapacity = minimumCapacityInBits / 8;
+    size_t allocatedCapacity;
+
+    BitWord* newData = static_cast<BitWord*>(allocator->alloc(minimumCapacity, allocatedCapacity));
+    if (ASMJIT_UNLIKELY(!newData))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    // `allocatedCapacity` now contains number in bytes, we need bits.
+    size_t allocatedCapacityInBits = allocatedCapacity * 8;
+
+    // Arithmetic overflow should normally not happen. If it happens we just
+    // change the `allocatedCapacityInBits` to the `minimumCapacityInBits` as
+    // this value is still safe to be used to call `_allocator->release(...)`.
+    if (ASMJIT_UNLIKELY(allocatedCapacityInBits < allocatedCapacity))
+      allocatedCapacityInBits = minimumCapacityInBits;
+
+    if (data)
+      allocator->release(data, _capacity / 8);
+    data = newData;
+
+    _data = data;
+    _capacity = uint32_t(allocatedCapacityInBits);
+  }
+
+  _size = newSize;
+  _copyBits(data, other.data(), _wordsPerBits(newSize));
+
+  return kErrorOk;
+}
+
+Error ZoneBitVector::_resize(ZoneAllocator* allocator, uint32_t newSize, uint32_t idealCapacity, bool newBitsValue) noexcept {
+  ASMJIT_ASSERT(idealCapacity >= newSize);
+
+  if (newSize <= _size) {
+    // The size after the resize is lesser than or equal to the current size.
+    uint32_t idx = newSize / kBitWordSizeInBits;
+    uint32_t bit = newSize % kBitWordSizeInBits;
+
+    // Just set all bits outside of the new size in the last word to zero.
+    // There is a case that there are not bits to set if `bit` is zero. This
+    // happens when `newSize` is a multiply of `kBitWordSizeInBits` like 64, 128,
+    // and so on. In that case don't change anything as that would mean settings
+    // bits outside of the `_size`.
+    if (bit)
+      _data[idx] &= (BitWord(1) << bit) - 1u;
+
+    _size = newSize;
+    return kErrorOk;
+  }
+
+  uint32_t oldSize = _size;
+  BitWord* data = _data;
+
+  if (newSize > _capacity) {
+    // Realloc needed, calculate the minimum capacity (in bytes) required.
+    uint32_t minimumCapacityInBits = Support::alignUp<uint32_t>(idealCapacity, kBitWordSizeInBits);
+
+    if (ASMJIT_UNLIKELY(minimumCapacityInBits < newSize))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    // Normalize to bytes.
+    uint32_t minimumCapacity = minimumCapacityInBits / 8;
+    size_t allocatedCapacity;
+
+    BitWord* newData = static_cast<BitWord*>(allocator->alloc(minimumCapacity, allocatedCapacity));
+    if (ASMJIT_UNLIKELY(!newData))
+      return DebugUtils::errored(kErrorOutOfMemory);
+
+    // `allocatedCapacity` now contains number in bytes, we need bits.
+    size_t allocatedCapacityInBits = allocatedCapacity * 8;
+
+    // Arithmetic overflow should normally not happen. If it happens we just
+    // change the `allocatedCapacityInBits` to the `minimumCapacityInBits` as
+    // this value is still safe to be used to call `_allocator->release(...)`.
+    if (ASMJIT_UNLIKELY(allocatedCapacityInBits < allocatedCapacity))
+      allocatedCapacityInBits = minimumCapacityInBits;
+
+    _copyBits(newData, data, _wordsPerBits(oldSize));
+
+    if (data)
+      allocator->release(data, _capacity / 8);
+    data = newData;
+
+    _data = data;
+    _capacity = uint32_t(allocatedCapacityInBits);
+  }
+
+  // Start (of the old size) and end (of the new size) bits
+  uint32_t idx = oldSize / kBitWordSizeInBits;
+  uint32_t startBit = oldSize % kBitWordSizeInBits;
+  uint32_t endBit = newSize % kBitWordSizeInBits;
+
+  // Set new bits to either 0 or 1. The `pattern` is used to set multiple
+  // bits per bit-word and contains either all zeros or all ones.
+  BitWord pattern = Support::bitMaskFromBool<BitWord>(newBitsValue);
+
+  // First initialize the last bit-word of the old size.
+  if (startBit) {
+    uint32_t nBits = 0;
+
+    if (idx == (newSize / kBitWordSizeInBits)) {
+      // The number of bit-words is the same after the resize. In that case
+      // we need to set only bits necessary in the current last bit-word.
+      ASMJIT_ASSERT(startBit < endBit);
+      nBits = endBit - startBit;
+    }
+    else {
+      // There is be more bit-words after the resize. In that case we don't
+      // have to be extra careful about the last bit-word of the old size.
+      nBits = kBitWordSizeInBits - startBit;
+    }
+
+    data[idx++] |= pattern << nBits;
+  }
+
+  // Initialize all bit-words after the last bit-word of the old size.
+  uint32_t endIdx = _wordsPerBits(newSize);
+  while (idx < endIdx) data[idx++] = pattern;
+
+  // Clear unused bits of the last bit-word.
+  if (endBit)
+    data[endIdx - 1] = pattern & ((BitWord(1) << endBit) - 1);
+
+  _size = newSize;
+  return kErrorOk;
+}
+
+Error ZoneBitVector::_append(ZoneAllocator* allocator, bool value) noexcept {
+  uint32_t kThreshold = Globals::kGrowThreshold * 8;
+  uint32_t newSize = _size + 1;
+  uint32_t idealCapacity = _capacity;
+
+  if (idealCapacity < 128)
+    idealCapacity = 128;
+  else if (idealCapacity <= kThreshold)
+    idealCapacity *= 2;
+  else
+    idealCapacity += kThreshold;
+
+  if (ASMJIT_UNLIKELY(idealCapacity < _capacity)) {
+    if (ASMJIT_UNLIKELY(_size == std::numeric_limits<uint32_t>::max()))
+      return DebugUtils::errored(kErrorOutOfMemory);
+    idealCapacity = newSize;
+  }
+
+  return _resize(allocator, newSize, idealCapacity, value);
+}
+
+// ZoneVector / ZoneBitVector - Tests
+// ==================================
+
+#if defined(ASMJIT_TEST)
+template<typename T>
+static void test_zone_vector(ZoneAllocator* allocator, const char* typeName) {
+  int i;
+  int kMax = 100000;
+
+  ZoneVector<T> vec;
+
+  INFO("ZoneVector<%s> basic tests", typeName);
+  EXPECT(vec.append(allocator, 0) == kErrorOk);
+  EXPECT(vec.empty() == false);
+  EXPECT(vec.size() == 1);
+  EXPECT(vec.capacity() >= 1);
+  EXPECT(vec.indexOf(0) == 0);
+  EXPECT(vec.indexOf(-11) == Globals::kNotFound);
+
+  vec.clear();
+  EXPECT(vec.empty());
+  EXPECT(vec.size() == 0);
+  EXPECT(vec.indexOf(0) == Globals::kNotFound);
+
+  for (i = 0; i < kMax; i++) {
+    EXPECT(vec.append(allocator, T(i)) == kErrorOk);
+  }
+  EXPECT(vec.empty() == false);
+  EXPECT(vec.size() == uint32_t(kMax));
+  EXPECT(vec.indexOf(T(kMax - 1)) == uint32_t(kMax - 1));
+
+  EXPECT(vec.rbegin()[0] == kMax - 1);
+
+  vec.release(allocator);
+}
+
+static void test_zone_bitvector(ZoneAllocator* allocator) {
+  Zone zone(8096 - Zone::kBlockOverhead);
+
+  uint32_t i, count;
+  uint32_t kMaxCount = 100;
+
+  ZoneBitVector vec;
+  EXPECT(vec.empty());
+  EXPECT(vec.size() == 0);
+
+  INFO("ZoneBitVector::resize()");
+  for (count = 1; count < kMaxCount; count++) {
+    vec.clear();
+    EXPECT(vec.resize(allocator, count, false) == kErrorOk);
+    EXPECT(vec.size() == count);
+
+    for (i = 0; i < count; i++)
+      EXPECT(vec.bitAt(i) == false);
+
+    vec.clear();
+    EXPECT(vec.resize(allocator, count, true) == kErrorOk);
+    EXPECT(vec.size() == count);
+
+    for (i = 0; i < count; i++)
+      EXPECT(vec.bitAt(i) == true);
+  }
+
+  INFO("ZoneBitVector::fillBits() / clearBits()");
+  for (count = 1; count < kMaxCount; count += 2) {
+    vec.clear();
+    EXPECT(vec.resize(allocator, count) == kErrorOk);
+    EXPECT(vec.size() == count);
+
+    for (i = 0; i < (count + 1) / 2; i++) {
+      bool value = bool(i & 1);
+      if (value)
+        vec.fillBits(i, count - i * 2);
+      else
+        vec.clearBits(i, count - i * 2);
+    }
+
+    for (i = 0; i < count; i++) {
+      EXPECT(vec.bitAt(i) == bool(i & 1));
+    }
+  }
+}
+
+UNIT(zone_vector) {
+  Zone zone(8096 - Zone::kBlockOverhead);
+  ZoneAllocator allocator(&zone);
+
+  test_zone_vector<int>(&allocator, "int");
+  test_zone_vector<int64_t>(&allocator, "int64_t");
+  test_zone_bitvector(&allocator);
+}
+#endif
+
+ASMJIT_END_NAMESPACE
diff --git a/lib/lepton/asmjit/core/zonevector.h b/lib/lepton/asmjit/core/zonevector.h
new file mode 100644
index 0000000000..447c08cb92
--- /dev/null
+++ b/lib/lepton/asmjit/core/zonevector.h
@@ -0,0 +1,690 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_CORE_ZONEVECTOR_H_INCLUDED
+#define ASMJIT_CORE_ZONEVECTOR_H_INCLUDED
+
+#include "../core/support.h"
+#include "../core/zone.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+//! \addtogroup asmjit_zone
+//! \{
+
+//! Base class used by \ref ZoneVector template.
+class ZoneVectorBase {
+public:
+  ASMJIT_NONCOPYABLE(ZoneVectorBase)
+
+  // STL compatibility;
+  typedef uint32_t size_type;
+  typedef ptrdiff_t difference_type;
+
+  //! Vector data (untyped).
+  void* _data = nullptr;
+  //! Size of the vector.
+  size_type _size = 0;
+  //! Capacity of the vector.
+  size_type _capacity = 0;
+
+protected:
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a new instance of `ZoneVectorBase`.
+  inline ZoneVectorBase() noexcept {}
+
+  inline ZoneVectorBase(ZoneVectorBase&& other) noexcept
+    : _data(other._data),
+      _size(other._size),
+      _capacity(other._capacity) {}
+
+  //! \}
+
+  //! \cond INTERNAL
+  //! \name Internal
+  //! \{
+
+  inline void _release(ZoneAllocator* allocator, uint32_t sizeOfT) noexcept {
+    if (_data != nullptr) {
+      allocator->release(_data, _capacity * sizeOfT);
+      reset();
+    }
+  }
+
+  ASMJIT_API Error _grow(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept;
+  ASMJIT_API Error _resize(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept;
+  ASMJIT_API Error _reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept;
+
+  inline void _swap(ZoneVectorBase& other) noexcept {
+    std::swap(_data, other._data);
+    std::swap(_size, other._size);
+    std::swap(_capacity, other._capacity);
+  }
+
+  //! \}
+  //! \endcond
+
+public:
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the vector is empty.
+  inline bool empty() const noexcept { return _size == 0; }
+  //! Returns the vector size.
+  inline size_type size() const noexcept { return _size; }
+  //! Returns the vector capacity.
+  inline size_type capacity() const noexcept { return _capacity; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Makes the vector empty (won't change the capacity or data pointer).
+  inline void clear() noexcept { _size = 0; }
+  //! Resets the vector data and set its `size` to zero.
+  inline void reset() noexcept {
+    _data = nullptr;
+    _size = 0;
+    _capacity = 0;
+  }
+
+  //! Truncates the vector to at most `n` items.
+  inline void truncate(size_type n) noexcept {
+    _size = Support::min(_size, n);
+  }
+
+  //! Sets size of the vector to `n`. Used internally by some algorithms.
+  inline void _setSize(size_type n) noexcept {
+    ASMJIT_ASSERT(n <= _capacity);
+    _size = n;
+  }
+
+  //! \}
+};
+
+//! Template used to store and manage array of Zone allocated data.
+//!
+//! This template has these advantages over other std::vector<>:
+//! - Always non-copyable (designed to be non-copyable, we want it).
+//! - Optimized for working only with POD types.
+//! - Uses ZoneAllocator, thus small vectors are almost for free.
+//! - Explicit allocation, ZoneAllocator is not part of the data.
+template <typename T>
+class ZoneVector : public ZoneVectorBase {
+public:
+  ASMJIT_NONCOPYABLE(ZoneVector)
+
+  // STL compatibility;
+  typedef T value_type;
+  typedef T* pointer;
+  typedef const T* const_pointer;
+  typedef T& reference;
+  typedef const T& const_reference;
+
+  typedef T* iterator;
+  typedef const T* const_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneVector() noexcept : ZoneVectorBase() {}
+  inline ZoneVector(ZoneVector&& other) noexcept : ZoneVector(other) {}
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns vector data.
+  inline T* data() noexcept { return static_cast<T*>(_data); }
+  //! Returns vector data (const)
+  inline const T* data() const noexcept { return static_cast<const T*>(_data); }
+
+  //! Returns item at the given index `i` (const).
+  inline const T& at(size_t i) const noexcept {
+    ASMJIT_ASSERT(i < _size);
+    return data()[i];
+  }
+
+  inline void _setEndPtr(T* p) noexcept {
+    ASMJIT_ASSERT(p >= data() && p <= data() + _capacity);
+    _setSize(uint32_t((uintptr_t)(p - data())));
+  }
+
+  //! \}
+
+  //! \name STL Compatibility (Iterators)
+  //! \{
+
+  inline iterator begin() noexcept { return iterator(data()); };
+  inline const_iterator begin() const noexcept { return const_iterator(data()); };
+
+  inline iterator end() noexcept { return iterator(data() + _size); };
+  inline const_iterator end() const noexcept { return const_iterator(data() + _size); };
+
+  inline reverse_iterator rbegin() noexcept { return reverse_iterator(end()); };
+  inline const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); };
+
+  inline reverse_iterator rend() noexcept { return reverse_iterator(begin()); };
+  inline const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); };
+
+  inline const_iterator cbegin() const noexcept { return const_iterator(data()); };
+  inline const_iterator cend() const noexcept { return const_iterator(data() + _size); };
+
+  inline const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(cend()); };
+  inline const_reverse_iterator crend() const noexcept { return const_reverse_iterator(cbegin()); };
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  //! Swaps this vector with `other`.
+  ASMJIT_FORCE_INLINE void swap(ZoneVector<T>& other) noexcept { _swap(other); }
+
+  //! Prepends `item` to the vector.
+  ASMJIT_FORCE_INLINE Error prepend(ZoneAllocator* allocator, const T& item) noexcept {
+    if (ASMJIT_UNLIKELY(_size == _capacity))
+      ASMJIT_PROPAGATE(grow(allocator, 1));
+
+    ::memmove(static_cast<T*>(_data) + 1, _data, size_t(_size) * sizeof(T));
+    memcpy(_data, &item, sizeof(T));
+
+    _size++;
+    return kErrorOk;
+  }
+
+  //! Inserts an `item` at the specified `index`.
+  ASMJIT_FORCE_INLINE Error insert(ZoneAllocator* allocator, size_t index, const T& item) noexcept {
+    ASMJIT_ASSERT(index <= _size);
+
+    if (ASMJIT_UNLIKELY(_size == _capacity))
+      ASMJIT_PROPAGATE(grow(allocator, 1));
+
+    T* dst = static_cast<T*>(_data) + index;
+    ::memmove(dst + 1, dst, size_t(_size - index) * sizeof(T));
+    memcpy(dst, &item, sizeof(T));
+    _size++;
+
+    return kErrorOk;
+  }
+
+  //! Appends `item` to the vector.
+  ASMJIT_FORCE_INLINE Error append(ZoneAllocator* allocator, const T& item) noexcept {
+    if (ASMJIT_UNLIKELY(_size == _capacity))
+      ASMJIT_PROPAGATE(grow(allocator, 1));
+
+    memcpy(static_cast<T*>(_data) + _size, &item, sizeof(T));
+    _size++;
+
+    return kErrorOk;
+  }
+
+  //! Appends `other` vector at the end of this vector.
+  ASMJIT_FORCE_INLINE Error concat(ZoneAllocator* allocator, const ZoneVector<T>& other) noexcept {
+    uint32_t size = other._size;
+    if (_capacity - _size < size)
+      ASMJIT_PROPAGATE(grow(allocator, size));
+
+    if (size) {
+      memcpy(static_cast<T*>(_data) + _size, other._data, size_t(size) * sizeof(T));
+      _size += size;
+    }
+
+    return kErrorOk;
+  }
+
+  //! Prepends `item` to the vector (unsafe case).
+  //!
+  //! Can only be used together with `willGrow()`. If `willGrow(N)` returns `kErrorOk` then N elements
+  //! can be added to the vector without checking if there is a place for them. Used mostly internally.
+  ASMJIT_FORCE_INLINE void prependUnsafe(const T& item) noexcept {
+    ASMJIT_ASSERT(_size < _capacity);
+    T* data = static_cast<T*>(_data);
+
+    if (_size)
+      ::memmove(data + 1, data, size_t(_size) * sizeof(T));
+
+    memcpy(data, &item, sizeof(T));
+    _size++;
+  }
+
+  //! Append s`item` to the vector (unsafe case).
+  //!
+  //! Can only be used together with `willGrow()`. If `willGrow(N)` returns `kErrorOk` then N elements
+  //! can be added to the vector without checking if there is a place for them. Used mostly internally.
+  ASMJIT_FORCE_INLINE void appendUnsafe(const T& item) noexcept {
+    ASMJIT_ASSERT(_size < _capacity);
+
+    memcpy(static_cast<T*>(_data) + _size, &item, sizeof(T));
+    _size++;
+  }
+
+  //! Inserts an `item` at the specified `index` (unsafe case).
+  ASMJIT_FORCE_INLINE void insertUnsafe(size_t index, const T& item) noexcept {
+    ASMJIT_ASSERT(_size < _capacity);
+    ASMJIT_ASSERT(index <= _size);
+
+    T* dst = static_cast<T*>(_data) + index;
+    ::memmove(dst + 1, dst, size_t(_size - index) * sizeof(T));
+    memcpy(dst, &item, sizeof(T));
+    _size++;
+  }
+  //! Concatenates all items of `other` at the end of the vector.
+  ASMJIT_FORCE_INLINE void concatUnsafe(const ZoneVector<T>& other) noexcept {
+    uint32_t size = other._size;
+    ASMJIT_ASSERT(_capacity - _size >= size);
+
+    if (size) {
+      memcpy(static_cast<T*>(_data) + _size, other._data, size_t(size) * sizeof(T));
+      _size += size;
+    }
+  }
+
+  //! Returns index of the given `val` or `Globals::kNotFound` if it doesn't exist.
+  ASMJIT_FORCE_INLINE uint32_t indexOf(const T& val) const noexcept {
+    const T* data = static_cast<const T*>(_data);
+    uint32_t size = _size;
+
+    for (uint32_t i = 0; i < size; i++)
+      if (data[i] == val)
+        return i;
+    return Globals::kNotFound;
+  }
+
+  //! Tests whether the vector contains `val`.
+  inline bool contains(const T& val) const noexcept {
+    return indexOf(val) != Globals::kNotFound;
+  }
+
+  //! Removes item at index `i`.
+  inline void removeAt(size_t i) noexcept {
+    ASMJIT_ASSERT(i < _size);
+
+    T* data = static_cast<T*>(_data) + i;
+    size_t size = --_size - i;
+
+    if (size)
+      ::memmove(data, data + 1, size_t(size) * sizeof(T));
+  }
+
+  //! Pops the last element from the vector and returns it.
+  inline T pop() noexcept {
+    ASMJIT_ASSERT(_size > 0);
+
+    uint32_t index = --_size;
+    return data()[index];
+  }
+
+  template<typename CompareT = Support::Compare<Support::SortOrder::kAscending>>
+  inline void sort(const CompareT& cmp = CompareT()) noexcept {
+    Support::qSort<T, CompareT>(data(), size(), cmp);
+  }
+
+  //! Returns item at index `i`.
+  inline T& operator[](size_t i) noexcept {
+    ASMJIT_ASSERT(i < _size);
+    return data()[i];
+  }
+
+  //! Returns item at index `i`.
+  inline const T& operator[](size_t i) const noexcept {
+    ASMJIT_ASSERT(i < _size);
+    return data()[i];
+  }
+
+  //! Returns a reference to the first element of the vector.
+  //!
+  //! \note The vector must have at least one element. Attempting to use `first()` on empty vector will trigger
+  //! an assertion failure in debug builds.
+  inline T& first() noexcept { return operator[](0); }
+  //! \overload
+  inline const T& first() const noexcept { return operator[](0); }
+
+  //! Returns a reference to the last element of the vector.
+  //!
+  //! \note The vector must have at least one element. Attempting to use `last()` on empty vector will trigger
+  //! an assertion failure in debug builds.
+  inline T& last() noexcept { return operator[](_size - 1); }
+  //! \overload
+  inline const T& last() const noexcept { return operator[](_size - 1); }
+
+  //! \}
+
+  //! \name Memory Management
+  //! \{
+
+  //! Releases the memory held by `ZoneVector<T>` back to the `allocator`.
+  inline void release(ZoneAllocator* allocator) noexcept {
+    _release(allocator, sizeof(T));
+  }
+
+  //! Called to grow the buffer to fit at least `n` elements more.
+  inline Error grow(ZoneAllocator* allocator, uint32_t n) noexcept {
+    return ZoneVectorBase::_grow(allocator, sizeof(T), n);
+  }
+
+  //! Resizes the vector to hold `n` elements.
+  //!
+  //! If `n` is greater than the current size then the additional elements' content will be initialized to zero.
+  //! If `n` is less than the current size then the vector will be truncated to exactly `n` elements.
+  inline Error resize(ZoneAllocator* allocator, uint32_t n) noexcept {
+    return ZoneVectorBase::_resize(allocator, sizeof(T), n);
+  }
+
+  //! Reallocates the internal array to fit at least `n` items.
+  inline Error reserve(ZoneAllocator* allocator, uint32_t n) noexcept {
+    return n > _capacity ? ZoneVectorBase::_reserve(allocator, sizeof(T), n) : Error(kErrorOk);
+  }
+
+  inline Error willGrow(ZoneAllocator* allocator, uint32_t n = 1) noexcept {
+    return _capacity - _size < n ? grow(allocator, n) : Error(kErrorOk);
+  }
+
+  //! \}
+};
+
+//! Zone-allocated bit vector.
+class ZoneBitVector {
+public:
+  typedef Support::BitWord BitWord;
+
+  ASMJIT_NONCOPYABLE(ZoneBitVector)
+
+  //! \name Constants
+  //! \{
+
+  enum : uint32_t {
+    kBitWordSizeInBits = Support::kBitWordSizeInBits
+  };
+
+  //! \}
+
+  //! \name Members
+  //! \{
+
+  //! Bits.
+  BitWord* _data = nullptr;
+  //! Size of the bit-vector (in bits).
+  uint32_t _size = 0;
+  //! Capacity of the bit-vector (in bits).
+  uint32_t _capacity = 0;
+
+  //! \}
+
+  //! \cond INTERNAL
+  //! \name Internal
+  //! \{
+
+  static inline uint32_t _wordsPerBits(uint32_t nBits) noexcept {
+    return ((nBits + kBitWordSizeInBits - 1) / kBitWordSizeInBits);
+  }
+
+  static inline void _zeroBits(BitWord* dst, uint32_t nBitWords) noexcept {
+    for (uint32_t i = 0; i < nBitWords; i++)
+      dst[i] = 0;
+  }
+
+  static inline void _fillBits(BitWord* dst, uint32_t nBitWords) noexcept {
+    for (uint32_t i = 0; i < nBitWords; i++)
+      dst[i] = ~BitWord(0);
+  }
+
+  static inline void _copyBits(BitWord* dst, const BitWord* src, uint32_t nBitWords) noexcept {
+    for (uint32_t i = 0; i < nBitWords; i++)
+      dst[i] = src[i];
+  }
+
+  //! \}
+  //! \endcond
+
+  //! \name Construction & Destruction
+  //! \{
+
+  inline ZoneBitVector() noexcept {}
+
+  inline ZoneBitVector(ZoneBitVector&& other) noexcept
+    : _data(other._data),
+      _size(other._size),
+      _capacity(other._capacity) {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline bool operator==(const ZoneBitVector& other) const noexcept { return  eq(other); }
+  inline bool operator!=(const ZoneBitVector& other) const noexcept { return !eq(other); }
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Tests whether the bit-vector is empty (has no bits).
+  inline bool empty() const noexcept { return _size == 0; }
+  //! Returns the size of this bit-vector (in bits).
+  inline uint32_t size() const noexcept { return _size; }
+  //! Returns the capacity of this bit-vector (in bits).
+  inline uint32_t capacity() const noexcept { return _capacity; }
+
+  //! Returns the size of the `BitWord[]` array in `BitWord` units.
+  inline uint32_t sizeInBitWords() const noexcept { return _wordsPerBits(_size); }
+  //! Returns the capacity of the `BitWord[]` array in `BitWord` units.
+  inline uint32_t capacityInBitWords() const noexcept { return _wordsPerBits(_capacity); }
+
+  //! REturns bit-vector data as `BitWord[]`.
+  inline BitWord* data() noexcept { return _data; }
+  //! \overload
+  inline const BitWord* data() const noexcept { return _data; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline void swap(ZoneBitVector& other) noexcept {
+    std::swap(_data, other._data);
+    std::swap(_size, other._size);
+    std::swap(_capacity, other._capacity);
+  }
+
+  inline void clear() noexcept {
+    _size = 0;
+  }
+
+  inline void reset() noexcept {
+    _data = nullptr;
+    _size = 0;
+    _capacity = 0;
+  }
+
+  inline void truncate(uint32_t newSize) noexcept {
+    _size = Support::min(_size, newSize);
+    _clearUnusedBits();
+  }
+
+  inline bool bitAt(uint32_t index) const noexcept {
+    ASMJIT_ASSERT(index < _size);
+    return Support::bitVectorGetBit(_data, index);
+  }
+
+  inline void setBit(uint32_t index, bool value) noexcept {
+    ASMJIT_ASSERT(index < _size);
+    Support::bitVectorSetBit(_data, index, value);
+  }
+
+  inline void flipBit(uint32_t index) noexcept {
+    ASMJIT_ASSERT(index < _size);
+    Support::bitVectorFlipBit(_data, index);
+  }
+
+  ASMJIT_FORCE_INLINE Error append(ZoneAllocator* allocator, bool value) noexcept {
+    uint32_t index = _size;
+    if (ASMJIT_UNLIKELY(index >= _capacity))
+      return _append(allocator, value);
+
+    uint32_t idx = index / kBitWordSizeInBits;
+    uint32_t bit = index % kBitWordSizeInBits;
+
+    if (bit == 0)
+      _data[idx] = BitWord(value) << bit;
+    else
+      _data[idx] |= BitWord(value) << bit;
+
+    _size++;
+    return kErrorOk;
+  }
+
+  ASMJIT_API Error copyFrom(ZoneAllocator* allocator, const ZoneBitVector& other) noexcept;
+
+  ASMJIT_FORCE_INLINE void clearAll() noexcept {
+    _zeroBits(_data, _wordsPerBits(_size));
+  }
+
+  ASMJIT_FORCE_INLINE void fillAll() noexcept {
+    _fillBits(_data, _wordsPerBits(_size));
+    _clearUnusedBits();
+  }
+
+  ASMJIT_FORCE_INLINE void clearBits(uint32_t start, uint32_t count) noexcept {
+    ASMJIT_ASSERT(start <= _size);
+    ASMJIT_ASSERT(_size - start >= count);
+
+    Support::bitVectorClear(_data, start, count);
+  }
+
+  ASMJIT_FORCE_INLINE void fillBits(uint32_t start, uint32_t count) noexcept {
+    ASMJIT_ASSERT(start <= _size);
+    ASMJIT_ASSERT(_size - start >= count);
+
+    Support::bitVectorFill(_data, start, count);
+  }
+
+  //! Performs a logical bitwise AND between bits specified in this array and bits in `other`. If `other` has less
+  //! bits than `this` then all remaining bits are set to zero.
+  //!
+  //! \note The size of the BitVector is unaffected by this operation.
+  ASMJIT_FORCE_INLINE void and_(const ZoneBitVector& other) noexcept {
+    BitWord* dst = _data;
+    const BitWord* src = other._data;
+
+    uint32_t thisBitWordCount = sizeInBitWords();
+    uint32_t otherBitWordCount = other.sizeInBitWords();
+    uint32_t commonBitWordCount = Support::min(thisBitWordCount, otherBitWordCount);
+
+    uint32_t i = 0;
+    while (i < commonBitWordCount) {
+      dst[i] = dst[i] & src[i];
+      i++;
+    }
+
+    while (i < thisBitWordCount) {
+      dst[i] = 0;
+      i++;
+    }
+  }
+
+  //! Performs a logical bitwise AND between bits specified in this array and negated bits in `other`. If `other`
+  //! has less bits than `this` then all remaining bits are kept intact.
+  //!
+  //! \note The size of the BitVector is unaffected by this operation.
+  ASMJIT_FORCE_INLINE void andNot(const ZoneBitVector& other) noexcept {
+    BitWord* dst = _data;
+    const BitWord* src = other._data;
+
+    uint32_t commonBitWordCount = _wordsPerBits(Support::min(_size, other._size));
+    for (uint32_t i = 0; i < commonBitWordCount; i++)
+      dst[i] = dst[i] & ~src[i];
+  }
+
+  //! Performs a logical bitwise OP between bits specified in this array and bits in `other`. If `other` has less
+  //! bits than `this` then all remaining bits are kept intact.
+  //!
+  //! \note The size of the BitVector is unaffected by this operation.
+  ASMJIT_FORCE_INLINE void or_(const ZoneBitVector& other) noexcept {
+    BitWord* dst = _data;
+    const BitWord* src = other._data;
+
+    uint32_t commonBitWordCount = _wordsPerBits(Support::min(_size, other._size));
+    for (uint32_t i = 0; i < commonBitWordCount; i++)
+      dst[i] = dst[i] | src[i];
+    _clearUnusedBits();
+  }
+
+  ASMJIT_FORCE_INLINE void _clearUnusedBits() noexcept {
+    uint32_t idx = _size / kBitWordSizeInBits;
+    uint32_t bit = _size % kBitWordSizeInBits;
+
+    if (!bit)
+      return;
+    _data[idx] &= (BitWord(1) << bit) - 1u;
+  }
+
+  ASMJIT_FORCE_INLINE bool eq(const ZoneBitVector& other) const noexcept {
+    if (_size != other._size)
+      return false;
+
+    const BitWord* aData = _data;
+    const BitWord* bData = other._data;
+    uint32_t numBitWords = _wordsPerBits(_size);
+
+    for (uint32_t i = 0; i < numBitWords; i++)
+      if (aData[i] != bData[i])
+        return false;
+    return true;
+  }
+
+  //! \}
+
+  //! \name Memory Management
+  //! \{
+
+  inline void release(ZoneAllocator* allocator) noexcept {
+    if (!_data) return;
+    allocator->release(_data, _capacity / 8);
+    reset();
+  }
+
+  inline Error resize(ZoneAllocator* allocator, uint32_t newSize, bool newBitsValue = false) noexcept {
+    return _resize(allocator, newSize, newSize, newBitsValue);
+  }
+
+  ASMJIT_API Error _resize(ZoneAllocator* allocator, uint32_t newSize, uint32_t idealCapacity, bool newBitsValue) noexcept;
+  ASMJIT_API Error _append(ZoneAllocator* allocator, bool value) noexcept;
+
+  //! \}
+
+  //! \name Iterators
+  //! \{
+
+  class ForEachBitSet : public Support::BitVectorIterator<BitWord> {
+  public:
+    inline explicit ForEachBitSet(const ZoneBitVector& bitVector) noexcept
+      : Support::BitVectorIterator<BitWord>(bitVector.data(), bitVector.sizeInBitWords()) {}
+  };
+
+  template<class Operator>
+  class ForEachBitOp : public Support::BitVectorOpIterator<BitWord, Operator> {
+  public:
+    inline ForEachBitOp(const ZoneBitVector& a, const ZoneBitVector& b) noexcept
+      : Support::BitVectorOpIterator<BitWord, Operator>(a.data(), b.data(), a.sizeInBitWords()) {
+      ASMJIT_ASSERT(a.size() == b.size());
+    }
+  };
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_NAMESPACE
+
+#endif // ASMJIT_CORE_ZONEVECTOR_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86.h b/lib/lepton/asmjit/x86.h
new file mode 100644
index 0000000000..84bc84bb2d
--- /dev/null
+++ b/lib/lepton/asmjit/x86.h
@@ -0,0 +1,93 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_H_INCLUDED
+#define ASMJIT_X86_H_INCLUDED
+
+//! \addtogroup asmjit_x86
+//!
+//! ### Namespace
+//!
+//!   - \ref x86 - x86 namespace provides support for X86/X64 code generation.
+//!
+//! ### Emitters
+//!
+//!   - \ref x86::Assembler - X86/X64 assembler (must read, provides examples).
+//!   - \ref x86::Builder - X86/X64 builder.
+//!   - \ref x86::Compiler - X86/X64 compiler.
+//!   - \ref x86::Emitter - X86/X64 emitter (abstract).
+//!
+//! ### Supported Instructions
+//!
+//!   - Emitters:
+//!     - \ref x86::EmitterExplicitT - Provides all instructions that use explicit operands, provides also utility
+//!       functions. The member functions provided are part of all X86 emitters.
+//!     - \ref x86::EmitterImplicitT - Provides all instructions that use implicit operands, these cannot be used
+//!       with \ref x86::Compiler.
+//!
+//!   - Instruction representation:
+//!     - \ref x86::Inst::Id - Provides instruction identifiers for both X86/X86_64 architectures.
+//!     - \ref InstOptions - Provides generic and X86/X86_64 specific options.
+//!
+//! ### Register Operands
+//!
+//!   - \ref x86::Reg - Base class for any X86 register.
+//!     - \ref x86::Gp - General purpose register:
+//!       - \ref x86::GpbLo - 8-bit low register.
+//!       - \ref x86::GpbHi - 8-bit high register.
+//!       - \ref x86::Gpw - 16-bit register.
+//!       - \ref x86::Gpd - 32-bit register.
+//!       - \ref x86::Gpq - 64-bit register (X64 only).
+//!     - \ref x86::Vec - Vector (SIMD) register:
+//!       - \ref x86::Xmm - 128-bit SIMD register (SSE+).
+//!       - \ref x86::Ymm - 256-bit SIMD register (AVX+).
+//!       - \ref x86::Zmm - 512-bit SIMD register (AVX512+).
+//!     - \ref x86::Mm - 64-bit MMX register.
+//!     - \ref x86::St - 80-bit FPU register.
+//!     - \ref x86::KReg - opmask registers (AVX512+).
+//!     - \ref x86::SReg - segment register.
+//!     - \ref x86::CReg - control register.
+//!     - \ref x86::DReg - debug register.
+//!     - \ref x86::Bnd - bound register (discontinued).
+//!     - \ref x86::Rip - relative instruction pointer.
+//!
+//! ### Memory Operands
+//!
+//!   - \ref x86::Mem - X86/X64 memory operand that provides support for all X86 and X64 addressing features
+//!     including absolute addresses, index scales, and segment override prefixes.
+//!
+//! ### Status and Control Words
+//!
+//!   - \ref x86::FpuStatusWord - FPU status word bits / decomposition.
+//!   - \ref x86::FpuControlWord - FPU control word bits / decomposition.
+//!
+//! ### Predicates (immediate values)
+//!
+//!   - \ref x86::CmpImm - `CMP[PD|PS|SD|SS]` predicate (SSE+).
+//!   - \ref x86::PCmpStrImm - `[V]PCMP[I|E]STR[I|M]` predicate (SSE4.1+, AVX+).
+//!   - \ref x86::RoundImm - `[V]ROUND[PD|PS|SD|SS]` predicate (SSE+, AVX+).
+//!   - \ref x86::VCmpImm - `VCMP[PD|PS|SD|SS]` predicate (AVX+).
+//!   - \ref x86::VFixupImm - `VFIXUPIMM[PD|PS|SD|SS]` predicate (AVX512+).
+//!   - \ref x86::VFPClassImm - `VFPCLASS[PD|PS|SD|SS]` predicate (AVX512+).
+//!   - \ref x86::VGetMantImm - `VGETMANT[PD|PS|SD|SS]` predicate (AVX512+).
+//!   - \ref x86::VPCmpImm - `VPCMP[U][B|W|D|Q]` predicate (AVX512+).
+//!   - \ref x86::VPComImm - `VPCOM[U][B|W|D|Q]` predicate (XOP).
+//!   - \ref x86::VRangeImm - `VRANGE[PD|PS|SD|SS]` predicate (AVX512+).
+//!   - \ref x86::VReduceImm - `REDUCE[PD|PS|SD|SS]` predicate (AVX512+).
+//!   - \ref x86::TLogImm - `VPTERNLOG[D|Q]` predicate and operations (AVX512+).
+
+#include "core.h"
+
+#include "asmjit-scope-begin.h"
+#include "x86/x86assembler.h"
+#include "x86/x86builder.h"
+#include "x86/x86compiler.h"
+#include "x86/x86emitter.h"
+#include "x86/x86globals.h"
+#include "x86/x86instdb.h"
+#include "x86/x86operand.h"
+#include "asmjit-scope-end.h"
+
+#endif // ASMJIT_X86_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86archtraits_p.h b/lib/lepton/asmjit/x86/x86archtraits_p.h
new file mode 100644
index 0000000000..90ae5d54f2
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86archtraits_p.h
@@ -0,0 +1,148 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED
+#define ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/misc_p.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86 architecture traits (internal).
+static const constexpr ArchTraits x86ArchTraits = {
+  // SP/FP/LR/PC.
+  Gp::kIdSp, Gp::kIdBp, 0xFF, 0xFF,
+
+  // Reserved.
+  { 0, 0, 0 },
+
+  // HW stack alignment.
+  1,
+
+  // Min/Max stack offset
+  0x7FFFFFFFu, 0x7FFFFFFFu,
+
+  // ISA features [Gp, Vec, Other0, Other1].
+  {{
+    InstHints::kRegSwap | InstHints::kPushPop,
+    InstHints::kNoHints,
+    InstHints::kNoHints,
+    InstHints::kNoHints
+  }},
+
+  // Register signatures.
+  #define V(index) OperandSignature{x86::RegTraits<RegType(index)>::kSignature}
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // RegTypeToTypeId.
+  #define V(index) TypeId(x86::RegTraits<RegType(index)>::kTypeId)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // TypeIdToRegType.
+  #define V(index) (index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt8)    ? RegType::kX86_GpbLo : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt8)   ? RegType::kX86_GpbLo : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt16)   ? RegType::kX86_Gpw   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt16)  ? RegType::kX86_Gpw   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt32)   ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt32)  ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kIntPtr)  ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUIntPtr) ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat32) ? RegType::kX86_Xmm   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat64) ? RegType::kX86_Xmm   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask8)   ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask16)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask32)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask64)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx32)   ? RegType::kX86_Mm    : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx64)   ? RegType::kX86_Mm    : RegType::kNone)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // Word names of 8-bit, 16-bit, 32-bit, and 64-bit quantities.
+  {
+    ArchTypeNameId::kDB,
+    ArchTypeNameId::kDW,
+    ArchTypeNameId::kDD,
+    ArchTypeNameId::kDQ
+  }
+};
+
+//! X64 architecture traits (internal).
+static const constexpr ArchTraits x64ArchTraits = {
+  // SP/FP/LR/PC.
+  Gp::kIdSp, Gp::kIdBp, 0xFF, 0xFF,
+
+  // Reserved.
+  { 0, 0, 0 },
+
+  // HW stack alignment.
+  1,
+
+  // Min/Max stack offset
+  0x7FFFFFFFu, 0x7FFFFFFFu,
+
+  // ISA features [Gp, Vec, Other0, Other1].
+  {{
+    InstHints::kRegSwap | InstHints::kPushPop,
+    InstHints::kNoHints,
+    InstHints::kNoHints,
+    InstHints::kNoHints
+  }},
+
+  // Register signatures.
+  #define V(index) OperandSignature{x86::RegTraits<RegType(index)>::kSignature}
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // RegTypeToTypeId.
+  #define V(index) TypeId(x86::RegTraits<RegType(index)>::kTypeId)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // TypeIdToRegType.
+  #define V(index) (index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt8)    ? RegType::kX86_GpbLo : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt8)   ? RegType::kX86_GpbLo : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt16)   ? RegType::kX86_Gpw   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt16)  ? RegType::kX86_Gpw   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt32)   ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt32)  ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt64)   ? RegType::kX86_Gpq   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt64)  ? RegType::kX86_Gpq   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kIntPtr)  ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUIntPtr) ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat32) ? RegType::kX86_Xmm   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat64) ? RegType::kX86_Xmm   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask8)   ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask16)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask32)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask64)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx32)   ? RegType::kX86_Mm    : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx64)   ? RegType::kX86_Mm    : RegType::kNone)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // Word names of 8-bit, 16-bit, 32-bit, and 64-bit quantities.
+  {
+    ArchTypeNameId::kDB,
+    ArchTypeNameId::kDW,
+    ArchTypeNameId::kDD,
+    ArchTypeNameId::kDQ
+  }
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86assembler.cpp b/lib/lepton/asmjit/x86/x86assembler.cpp
new file mode 100644
index 0000000000..f11fea0023
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86assembler.cpp
@@ -0,0 +1,5110 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../core/assembler.h"
+#include "../core/codewriter_p.h"
+#include "../core/cpuinfo.h"
+#include "../core/emitterutils_p.h"
+#include "../core/formatter.h"
+#include "../core/logger.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../x86/x86assembler.h"
+#include "../x86/x86emithelper_p.h"
+#include "../x86/x86instapi_p.h"
+#include "../x86/x86instdb_p.h"
+#include "../x86/x86formatter_p.h"
+#include "../x86/x86opcode_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+typedef Support::FastUInt8 FastUInt8;
+
+// x86::Assembler - Constants
+// ==========================
+
+//! X86 bytes used to encode important prefixes.
+enum X86Byte : uint32_t {
+  //! 1-byte REX prefix mask.
+  kX86ByteRex = 0x40,
+
+  //! 1-byte REX.W component.
+  kX86ByteRexW = 0x08,
+
+  kX86ByteInvalidRex = 0x80,
+
+  //! 2-byte VEX prefix:
+  //!   - `[0]` - `0xC5`.
+  //!   - `[1]` - `RvvvvLpp`.
+  kX86ByteVex2 = 0xC5,
+
+  //! 3-byte VEX prefix:
+  //!   - `[0]` - `0xC4`.
+  //!   - `[1]` - `RXBmmmmm`.
+  //!   - `[2]` - `WvvvvLpp`.
+  kX86ByteVex3 = 0xC4,
+
+  //! 3-byte XOP prefix:
+  //!   - `[0]` - `0x8F`.
+  //!   - `[1]` - `RXBmmmmm`.
+  //!   - `[2]` - `WvvvvLpp`.
+  kX86ByteXop3 = 0x8F,
+
+  //! 4-byte EVEX prefix:
+  //!   - `[0]` - `0x62`.
+  //!   - `[1]` - Payload0 or `P[ 7: 0]` - `[R  X  B  R' 0  m  m  m]`.
+  //!   - `[2]` - Payload1 or `P[15: 8]` - `[W  v  v  v  v  1  p  p]`.
+  //!   - `[3]` - Payload2 or `P[23:16]` - `[z  L' L  b  V' a  a  a]`.
+  //!
+  //! Payload:
+  //!   - `P[ 2: 0]` - OPCODE: EVEX.mmmmm, only lowest 3 bits [2:0] used.
+  //!   - `P[    3]` - ______: Must be 0.
+  //!   - `P[    4]` - REG-ID: EVEX.R' - 5th bit of 'RRRRR'.
+  //!   - `P[    5]` - REG-ID: EVEX.B  - 4th bit of 'BBBBB'.
+  //!   - `P[    6]` - REG-ID: EVEX.X  - 5th bit of 'BBBBB' or 4th bit of 'XXXX' (with SIB).
+  //!   - `P[    7]` - REG-ID: EVEX.R  - 4th bit of 'RRRRR'.
+  //!   - `P[ 9: 8]` - OPCODE: EVEX.pp.
+  //!   - `P[   10]` - ______: Must be 1.
+  //!   - `P[14:11]` - REG-ID: 4 bits of 'VVVV'.
+  //!   - `P[   15]` - OPCODE: EVEX.W.
+  //!   - `P[18:16]` - REG-ID: K register k0...k7 (Merging/Zeroing Vector Ops).
+  //!   - `P[   19]` - REG-ID: 5th bit of 'VVVVV'.
+  //!   - `P[   20]` - OPCODE: Broadcast/Rounding Control/SAE bit.
+  //!   - `P[22.21]` - OPCODE: Vector Length (L' and  L) / Rounding Control.
+  //!   - `P[   23]` - OPCODE: Zeroing/Merging.
+  kX86ByteEvex = 0x62
+};
+
+// AsmJit specific (used to encode VVVVV field in XOP/VEX/EVEX).
+enum VexVVVVV : uint32_t {
+  kVexVVVVVShift = 7,
+  kVexVVVVVMask = 0x1F << kVexVVVVVShift
+};
+
+//! Instruction 2-byte/3-byte opcode prefix definition.
+struct X86OpcodeMM {
+  uint8_t size;
+  uint8_t data[3];
+};
+
+//! Mandatory prefixes used to encode legacy [66, F3, F2] or [9B] byte.
+static const uint8_t x86OpcodePP[8] = { 0x00, 0x66, 0xF3, 0xF2, 0x00, 0x00, 0x00, 0x9B };
+
+//! Instruction 2-byte/3-byte opcode prefix data.
+static const X86OpcodeMM x86OpcodeMM[] = {
+  { 0, { 0x00, 0x00, 0 } }, // #00 (0b0000).
+  { 1, { 0x0F, 0x00, 0 } }, // #01 (0b0001).
+  { 2, { 0x0F, 0x38, 0 } }, // #02 (0b0010).
+  { 2, { 0x0F, 0x3A, 0 } }, // #03 (0b0011).
+  { 2, { 0x0F, 0x01, 0 } }, // #04 (0b0100).
+  { 0, { 0x00, 0x00, 0 } }, // #05 (0b0101).
+  { 0, { 0x00, 0x00, 0 } }, // #06 (0b0110).
+  { 0, { 0x00, 0x00, 0 } }, // #07 (0b0111).
+  { 0, { 0x00, 0x00, 0 } }, // #08 (0b1000).
+  { 0, { 0x00, 0x00, 0 } }, // #09 (0b1001).
+  { 0, { 0x00, 0x00, 0 } }, // #0A (0b1010).
+  { 0, { 0x00, 0x00, 0 } }, // #0B (0b1011).
+  { 0, { 0x00, 0x00, 0 } }, // #0C (0b1100).
+  { 0, { 0x00, 0x00, 0 } }, // #0D (0b1101).
+  { 0, { 0x00, 0x00, 0 } }, // #0E (0b1110).
+  { 0, { 0x00, 0x00, 0 } }  // #0F (0b1111).
+};
+
+static const uint8_t x86SegmentPrefix[8] = {
+  0x00, // None.
+  0x26, // ES.
+  0x2E, // CS.
+  0x36, // SS.
+  0x3E, // DS.
+  0x64, // FS.
+  0x65  // GS.
+};
+
+static const uint32_t x86OpcodePushSReg[8] = {
+  Opcode::k000000 | 0x00, // None.
+  Opcode::k000000 | 0x06, // Push ES.
+  Opcode::k000000 | 0x0E, // Push CS.
+  Opcode::k000000 | 0x16, // Push SS.
+  Opcode::k000000 | 0x1E, // Push DS.
+  Opcode::k000F00 | 0xA0, // Push FS.
+  Opcode::k000F00 | 0xA8  // Push GS.
+};
+
+static const uint32_t x86OpcodePopSReg[8]  = {
+  Opcode::k000000 | 0x00, // None.
+  Opcode::k000000 | 0x07, // Pop ES.
+  Opcode::k000000 | 0x00, // Pop CS.
+  Opcode::k000000 | 0x17, // Pop SS.
+  Opcode::k000000 | 0x1F, // Pop DS.
+  Opcode::k000F00 | 0xA1, // Pop FS.
+  Opcode::k000F00 | 0xA9  // Pop GS.
+};
+
+// x86::Assembler - X86MemInfo | X86VEXPrefix | X86LLByRegType | X86CDisp8Table
+// ============================================================================
+
+//! Memory operand's info bits.
+//!
+//! A lookup table that contains various information based on the BASE and INDEX information of a memory operand. This
+//! is much better and safer than playing with IFs in the code and can check for errors must faster and better.
+enum X86MemInfo_Enum {
+  kX86MemInfo_0         = 0x00,
+
+  kX86MemInfo_BaseGp    = 0x01, //!< Has BASE reg, REX.B can be 1, compatible with REX.B byte.
+  kX86MemInfo_Index     = 0x02, //!< Has INDEX reg, REX.X can be 1, compatible with REX.X byte.
+
+  kX86MemInfo_BaseLabel = 0x10, //!< Base is Label.
+  kX86MemInfo_BaseRip   = 0x20, //!< Base is RIP.
+
+  kX86MemInfo_67H_X86   = 0x40, //!< Address-size override in 32-bit mode.
+  kX86MemInfo_67H_X64   = 0x80, //!< Address-size override in 64-bit mode.
+  kX86MemInfo_67H_Mask  = 0xC0  //!< Contains all address-size override bits.
+};
+
+template<uint32_t X>
+struct X86MemInfo_T {
+  enum : uint32_t {
+    B = (X     ) & 0x1F,
+    I = (X >> 5) & 0x1F,
+
+    kBase  = (B >= uint32_t(RegType::kX86_Gpw)  && B <= uint32_t(RegType::kX86_Gpq)) ? kX86MemInfo_BaseGp    :
+             (B == uint32_t(RegType::kX86_Rip)                                     ) ? kX86MemInfo_BaseRip   :
+             (B == uint32_t(RegType::kLabelTag)                                    ) ? kX86MemInfo_BaseLabel : 0,
+
+    kIndex = (I >= uint32_t(RegType::kX86_Gpw)  && I <= uint32_t(RegType::kX86_Gpq)) ? kX86MemInfo_Index     :
+             (I >= uint32_t(RegType::kX86_Xmm)  && I <= uint32_t(RegType::kX86_Zmm)) ? kX86MemInfo_Index     : 0,
+
+    k67H   = (B == uint32_t(RegType::kX86_Gpw)  && I == uint32_t(RegType::kNone)   ) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kX86_Gpd)  && I == uint32_t(RegType::kNone)   ) ? kX86MemInfo_67H_X64   :
+             (B == uint32_t(RegType::kNone)     && I == uint32_t(RegType::kX86_Gpw)) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kNone)     && I == uint32_t(RegType::kX86_Gpd)) ? kX86MemInfo_67H_X64   :
+             (B == uint32_t(RegType::kX86_Gpw)  && I == uint32_t(RegType::kX86_Gpw)) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kX86_Gpd)  && I == uint32_t(RegType::kX86_Gpd)) ? kX86MemInfo_67H_X64   :
+             (B == uint32_t(RegType::kX86_Gpw)  && I == uint32_t(RegType::kX86_Xmm)) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kX86_Gpd)  && I == uint32_t(RegType::kX86_Xmm)) ? kX86MemInfo_67H_X64   :
+             (B == uint32_t(RegType::kX86_Gpw)  && I == uint32_t(RegType::kX86_Ymm)) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kX86_Gpd)  && I == uint32_t(RegType::kX86_Ymm)) ? kX86MemInfo_67H_X64   :
+             (B == uint32_t(RegType::kX86_Gpw)  && I == uint32_t(RegType::kX86_Zmm)) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kX86_Gpd)  && I == uint32_t(RegType::kX86_Zmm)) ? kX86MemInfo_67H_X64   :
+             (B == uint32_t(RegType::kLabelTag) && I == uint32_t(RegType::kX86_Gpw)) ? kX86MemInfo_67H_X86   :
+             (B == uint32_t(RegType::kLabelTag) && I == uint32_t(RegType::kX86_Gpd)) ? kX86MemInfo_67H_X64   : 0,
+
+    kValue = kBase | kIndex | k67H | 0x04 | 0x08
+  };
+};
+
+// The result stored in the LUT is a combination of
+//   - 67H - Address override prefix - depends on BASE+INDEX register types and the target architecture.
+//   - REX - A possible combination of REX.[B|X|R|W] bits in REX prefix where REX.B and REX.X are possibly
+//           masked out, but REX.R and REX.W are kept as is.
+#define VALUE(x) X86MemInfo_T<x>::kValue
+static const uint8_t x86MemInfo[] = { ASMJIT_LOOKUP_TABLE_1024(VALUE, 0) };
+#undef VALUE
+
+// VEX3 or XOP xor bits applied to the opcode before emitted. The index to this table is 'mmmmm' value, which
+// contains all we need. This is only used by a 3 BYTE VEX and XOP prefixes, 2 BYTE VEX prefix is handled differently.
+// The idea is to minimize the difference between VEX3 vs XOP when encoding VEX or XOP instruction. This should
+// minimize the code required to emit such instructions and should also make it faster as we don't need any branch to
+// decide between VEX3 vs XOP.
+//            ____    ___
+// [_OPCODE_|WvvvvLpp|RXBmmmmm|VEX3_XOP]
+#define VALUE(x) ((x & 0x08) ? kX86ByteXop3 : kX86ByteVex3) | (0xF << 19) | (0x7 << 13)
+static const uint32_t x86VEXPrefix[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
+#undef VALUE
+
+// Table that contains LL opcode field addressed by a register size / 16. It's used to propagate L.256 or L.512 when
+// YMM or ZMM registers are used, respectively.
+#define VALUE(x) (x & (64 >> 4)) ? Opcode::kLL_2 : \
+                 (x & (32 >> 4)) ? Opcode::kLL_1 : Opcode::kLL_0
+static const uint32_t x86LLBySizeDiv16[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
+#undef VALUE
+
+// Table that contains LL opcode field addressed by a register size / 16. It's used to propagate L.256 or L.512 when
+// YMM or ZMM registers are used, respectively.
+#define VALUE(x) x == uint32_t(RegType::kX86_Zmm) ? Opcode::kLL_2 : \
+                 x == uint32_t(RegType::kX86_Ymm) ? Opcode::kLL_1 : Opcode::kLL_0
+static const uint32_t x86LLByRegType[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
+#undef VALUE
+
+// Table that contains a scale (shift left) based on 'TTWLL' field and the instruction's tuple-type (TT) field. The
+// scale is then applied to the BASE-N stored in each opcode to calculate the final compressed displacement used by
+// all EVEX encoded instructions.
+template<uint32_t X>
+struct X86CDisp8SHL_T {
+  enum {
+    TT = (X >> 3) << Opcode::kCDTT_Shift,
+    LL = (X >> 0) & 0x3,
+    W  = (X >> 2) & 0x1,
+
+    kValue = (TT == Opcode::kCDTT_None ? ((LL==0) ? 0 : (LL==1) ? 0   : 0  ) :
+              TT == Opcode::kCDTT_ByLL ? ((LL==0) ? 0 : (LL==1) ? 1   : 2  ) :
+              TT == Opcode::kCDTT_T1W  ? ((LL==0) ? W : (LL==1) ? 1+W : 2+W) :
+              TT == Opcode::kCDTT_DUP  ? ((LL==0) ? 0 : (LL==1) ? 2   : 3  ) : 0) << Opcode::kCDSHL_Shift
+  };
+};
+
+#define VALUE(x) X86CDisp8SHL_T<x>::kValue
+static const uint32_t x86CDisp8SHL[] = { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) };
+#undef VALUE
+
+// Table that contains MOD byte of a 16-bit [BASE + disp] address.
+//   0xFF == Invalid.
+static const uint8_t x86Mod16BaseTable[8] = {
+  0xFF, // AX -> N/A.
+  0xFF, // CX -> N/A.
+  0xFF, // DX -> N/A.
+  0x07, // BX -> 111.
+  0xFF, // SP -> N/A.
+  0x06, // BP -> 110.
+  0x04, // SI -> 100.
+  0x05  // DI -> 101.
+};
+
+// Table that contains MOD byte of a 16-bit [BASE + INDEX + disp] combination.
+//   0xFF == Invalid.
+template<uint32_t X>
+struct X86Mod16BaseIndexTable_T {
+  enum {
+    B = X >> 3,
+    I = X & 0x7,
+
+    kValue = ((B == Gp::kIdBx && I == Gp::kIdSi) || (B == Gp::kIdSi && I == Gp::kIdBx)) ? 0x00 :
+             ((B == Gp::kIdBx && I == Gp::kIdDi) || (B == Gp::kIdDi && I == Gp::kIdBx)) ? 0x01 :
+             ((B == Gp::kIdBp && I == Gp::kIdSi) || (B == Gp::kIdSi && I == Gp::kIdBp)) ? 0x02 :
+             ((B == Gp::kIdBp && I == Gp::kIdDi) || (B == Gp::kIdDi && I == Gp::kIdBp)) ? 0x03 : 0xFF
+  };
+};
+
+#define VALUE(x) X86Mod16BaseIndexTable_T<x>::kValue
+static const uint8_t x86Mod16BaseIndexTable[] = { ASMJIT_LOOKUP_TABLE_64(VALUE, 0) };
+#undef VALUE
+
+// x86::Assembler - Helpers
+// ========================
+
+static ASMJIT_FORCE_INLINE bool x86IsJmpOrCall(InstId instId) noexcept {
+  return instId == Inst::kIdJmp || instId == Inst::kIdCall;
+}
+
+static ASMJIT_FORCE_INLINE bool x86IsImplicitMem(const Operand_& op, uint32_t base) noexcept {
+  return op.isMem() && op.as<Mem>().baseId() == base && !op.as<Mem>().hasOffset();
+}
+
+//! Combine `regId` and `vvvvvId` into a single value (used by AVX and AVX-512).
+static ASMJIT_FORCE_INLINE uint32_t x86PackRegAndVvvvv(uint32_t regId, uint32_t vvvvvId) noexcept {
+  return regId + (vvvvvId << kVexVVVVVShift);
+}
+
+static ASMJIT_FORCE_INLINE uint32_t x86OpcodeLByVMem(const Operand_& op) noexcept {
+  return x86LLByRegType[size_t(op.as<Mem>().indexType())];
+}
+
+static ASMJIT_FORCE_INLINE uint32_t x86OpcodeLBySize(uint32_t size) noexcept {
+  return x86LLBySizeDiv16[size / 16];
+}
+
+//! Encode MOD byte.
+static ASMJIT_FORCE_INLINE uint32_t x86EncodeMod(uint32_t m, uint32_t o, uint32_t rm) noexcept {
+  ASMJIT_ASSERT(m <= 3);
+  ASMJIT_ASSERT(o <= 7);
+  ASMJIT_ASSERT(rm <= 7);
+  return (m << 6) + (o << 3) + rm;
+}
+
+//! Encode SIB byte.
+static ASMJIT_FORCE_INLINE uint32_t x86EncodeSib(uint32_t s, uint32_t i, uint32_t b) noexcept {
+  ASMJIT_ASSERT(s <= 3);
+  ASMJIT_ASSERT(i <= 7);
+  ASMJIT_ASSERT(b <= 7);
+  return (s << 6) + (i << 3) + b;
+}
+
+static ASMJIT_FORCE_INLINE bool x86IsRexInvalid(uint32_t rex) noexcept {
+  // Validates the following possibilities:
+  //   REX == 0x00      -> OKAY (X86_32 / X86_64).
+  //   REX == 0x40-0x4F -> OKAY (X86_64).
+  //   REX == 0x80      -> OKAY (X86_32 mode, rex prefix not used).
+  //   REX == 0x81-0xCF -> BAD  (X86_32 mode, rex prefix used).
+  return rex > kX86ByteInvalidRex;
+}
+
+static ASMJIT_FORCE_INLINE uint32_t x86GetForceEvex3MaskInLastBit(InstOptions options) noexcept {
+  constexpr uint32_t kVex3Bit = Support::ConstCTZ<uint32_t(InstOptions::kX86_Vex3)>::value;
+  return uint32_t(options & InstOptions::kX86_Vex3) << (31 - kVex3Bit);
+}
+
+template<typename T>
+static ASMJIT_FORCE_INLINE constexpr T x86SignExtendI32(T imm) noexcept { return T(int64_t(int32_t(imm & T(0xFFFFFFFF)))); }
+
+static ASMJIT_FORCE_INLINE uint32_t x86AltOpcodeOf(const InstDB::InstInfo* info) noexcept {
+  return InstDB::_altOpcodeTable[info->_altOpcodeIndex];
+}
+
+// x86::Assembler - X86BufferWriter
+// ================================
+
+class X86BufferWriter : public CodeWriter {
+public:
+  ASMJIT_FORCE_INLINE explicit X86BufferWriter(Assembler* a) noexcept
+    : CodeWriter(a) {}
+
+  ASMJIT_FORCE_INLINE void emitPP(uint32_t opcode) noexcept {
+    uint32_t ppIndex = (opcode              >> Opcode::kPP_Shift) &
+                       (Opcode::kPP_FPUMask >> Opcode::kPP_Shift) ;
+    emit8If(x86OpcodePP[ppIndex], ppIndex != 0);
+  }
+
+  ASMJIT_FORCE_INLINE void emitMMAndOpcode(uint32_t opcode) noexcept {
+    uint32_t mmIndex = (opcode & Opcode::kMM_Mask) >> Opcode::kMM_Shift;
+    const X86OpcodeMM& mmCode = x86OpcodeMM[mmIndex];
+
+    emit8If(mmCode.data[0], mmCode.size > 0);
+    emit8If(mmCode.data[1], mmCode.size > 1);
+    emit8(opcode);
+  }
+
+  ASMJIT_FORCE_INLINE void emitSegmentOverride(uint32_t segmentId) noexcept {
+    ASMJIT_ASSERT(segmentId < ASMJIT_ARRAY_SIZE(x86SegmentPrefix));
+
+    FastUInt8 prefix = x86SegmentPrefix[segmentId];
+    emit8If(prefix, prefix != 0);
+  }
+
+  template<typename CondT>
+  ASMJIT_FORCE_INLINE void emitAddressOverride(CondT condition) noexcept {
+    emit8If(0x67, condition);
+  }
+
+  ASMJIT_FORCE_INLINE void emitImmByteOrDWord(uint64_t immValue, FastUInt8 immSize) noexcept {
+    if (!immSize)
+      return;
+
+    ASMJIT_ASSERT(immSize == 1 || immSize == 4);
+
+#if ASMJIT_ARCH_BITS >= 64
+    uint64_t imm = uint64_t(immValue);
+#else
+    uint32_t imm = uint32_t(immValue & 0xFFFFFFFFu);
+#endif
+
+    // Many instructions just use a single byte immediate, so make it fast.
+    emit8(imm & 0xFFu);
+    if (immSize == 1) return;
+
+    imm >>= 8;
+    emit8(imm & 0xFFu);
+    imm >>= 8;
+    emit8(imm & 0xFFu);
+    imm >>= 8;
+    emit8(imm & 0xFFu);
+  }
+
+  ASMJIT_FORCE_INLINE void emitImmediate(uint64_t immValue, FastUInt8 immSize) noexcept {
+#if ASMJIT_ARCH_BITS >= 64
+    uint64_t imm = immValue;
+    if (immSize >= 4) {
+      emit32uLE(imm & 0xFFFFFFFFu);
+      imm >>= 32;
+      immSize = FastUInt8(immSize - 4u);
+    }
+#else
+    uint32_t imm = uint32_t(immValue & 0xFFFFFFFFu);
+    if (immSize >= 4) {
+      emit32uLE(imm);
+      imm = uint32_t(immValue >> 32);
+      immSize = FastUInt8(immSize - 4u);
+    }
+#endif
+
+    if (!immSize)
+      return;
+    emit8(imm & 0xFFu);
+    imm >>= 8;
+
+    if (--immSize == 0)
+      return;
+    emit8(imm & 0xFFu);
+    imm >>= 8;
+
+    if (--immSize == 0)
+      return;
+    emit8(imm & 0xFFu);
+    imm >>= 8;
+
+    if (--immSize == 0)
+      return;
+    emit8(imm & 0xFFu);
+  }
+};
+
+// If the operand is BPL|SPL|SIL|DIL|R8B-15B
+//   - Force REX prefix
+// If the operand is AH|BH|CH|DH
+//   - patch its index from 0..3 to 4..7 as encoded by X86.
+//   - Disallow REX prefix.
+#define FIXUP_GPB(REG_OP, REG_ID)                                \
+  do {                                                           \
+    if (!static_cast<const Gp&>(REG_OP).isGpbHi()) {             \
+      options |= (REG_ID) >= 4 ? InstOptions::kX86_Rex           \
+                               : InstOptions::kNone;             \
+    }                                                            \
+    else {                                                       \
+      options |= InstOptions::kX86_InvalidRex;                   \
+      REG_ID += 4;                                               \
+    }                                                            \
+  } while (0)
+
+#define ENC_OPS1(OP0) \
+  (uint32_t(OperandType::k##OP0))
+
+#define ENC_OPS2(OP0, OP1) \
+  (uint32_t(OperandType::k##OP0) + \
+  (uint32_t(OperandType::k##OP1) << 3))
+
+#define ENC_OPS3(OP0, OP1, OP2) \
+  (uint32_t(OperandType::k##OP0) + \
+  (uint32_t(OperandType::k##OP1) << 3) + \
+  (uint32_t(OperandType::k##OP2) << 6))
+
+#define ENC_OPS4(OP0, OP1, OP2, OP3) \
+  (uint32_t(OperandType::k##OP0) + \
+  (uint32_t(OperandType::k##OP1) << 3) + \
+  (uint32_t(OperandType::k##OP2) << 6) + \
+  (uint32_t(OperandType::k##OP3) << 9))
+
+// x86::Assembler - Movabs Heuristics
+// ==================================
+
+static ASMJIT_FORCE_INLINE uint32_t x86GetMovAbsInstSize64Bit(uint32_t regSize, InstOptions options, const Mem& rmRel) noexcept {
+  uint32_t segmentPrefixSize = rmRel.segmentId() != 0;
+  uint32_t _66hPrefixSize = regSize == 2;
+  uint32_t rexPrefixSize = regSize == 8 || Support::test(options, InstOptions::kX86_Rex);
+  uint32_t opCodeByteSize = 1;
+  uint32_t immediateSize = 8;
+
+  return segmentPrefixSize + _66hPrefixSize + rexPrefixSize + opCodeByteSize + immediateSize;
+}
+
+static ASMJIT_FORCE_INLINE bool x86ShouldUseMovabs(Assembler* self, X86BufferWriter& writer, uint32_t regSize, InstOptions options, const Mem& rmRel) noexcept {
+  if (self->is32Bit()) {
+    // There is no relative addressing, just decide whether to use MOV encoded with MOD R/M or absolute.
+    return !Support::test(options, InstOptions::kX86_ModMR | InstOptions::kX86_ModMR);
+  }
+  else {
+    // If the addressing type is REL or MOD R/M was specified then absolute mov won't be used.
+    if (rmRel.addrType() == Mem::AddrType::kRel || Support::test(options, InstOptions::kX86_ModMR))
+      return false;
+
+    int64_t addrValue = rmRel.offset();
+    uint64_t baseAddress = self->code()->baseAddress();
+
+    // If the address type is default, it means to basically check whether relative addressing is possible. However,
+    // this is only possible when the base address is known - relative encoding uses RIP+N it has to be calculated.
+    if (rmRel.addrType() == Mem::AddrType::kDefault && baseAddress != Globals::kNoBaseAddress && !rmRel.hasSegment()) {
+      uint32_t instructionSize = x86GetMovAbsInstSize64Bit(regSize, options, rmRel);
+      uint64_t virtualOffset = uint64_t(writer.offsetFrom(self->_bufferData));
+      uint64_t rip64 = baseAddress + self->_section->offset() + virtualOffset + instructionSize;
+      uint64_t rel64 = uint64_t(addrValue) - rip64;
+
+      if (Support::isInt32(int64_t(rel64)))
+        return false;
+    }
+    else {
+      if (Support::isInt32(addrValue))
+        return false;
+    }
+
+    return uint64_t(addrValue) > 0xFFFFFFFFu;
+  }
+}
+
+// x86::Assembler - Construction & Destruction
+// ===========================================
+
+Assembler::Assembler(CodeHolder* code) noexcept : BaseAssembler() {
+  _archMask = (uint64_t(1) << uint32_t(Arch::kX86)) |
+              (uint64_t(1) << uint32_t(Arch::kX64)) ;
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Assembler::~Assembler() noexcept {}
+
+// x86::Assembler - Emit (Low-Level)
+// =================================
+
+ASMJIT_FAVOR_SPEED Error Assembler::_emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) {
+  constexpr uint32_t kVSHR_W     = Opcode::kW_Shift  - 23;
+  constexpr uint32_t kVSHR_PP    = Opcode::kPP_Shift - 16;
+  constexpr uint32_t kVSHR_PP_EW = Opcode::kPP_Shift - 16;
+
+  constexpr InstOptions kRequiresSpecialHandling =
+    InstOptions::kReserved     |   // Logging/Validation/Error.
+    InstOptions::kX86_Rep      |   // REP/REPE prefix.
+    InstOptions::kX86_Repne    |   // REPNE prefix.
+    InstOptions::kX86_Lock     |   // LOCK prefix.
+    InstOptions::kX86_XAcquire |   // XACQUIRE prefix.
+    InstOptions::kX86_XRelease ;   // XRELEASE prefix.
+
+  Error err;
+
+  Opcode opcode;                   // Instruction opcode.
+  InstOptions options;             // Instruction options.
+  uint32_t isign3;                 // A combined signature of first 3 operands.
+
+  const Operand_* rmRel;           // Memory operand or operand that holds Label|Imm.
+  uint32_t rmInfo;                 // Memory operand's info based on x86MemInfo.
+  uint32_t rbReg;                  // Memory base or modRM register.
+  uint32_t rxReg;                  // Memory index register.
+  uint32_t opReg;                  // ModR/M opcode or register id.
+
+  LabelEntry* label;               // Label entry.
+  RelocEntry* re = nullptr;        // Relocation entry.
+  int32_t relOffset;               // Relative offset
+  FastUInt8 relSize = 0;           // Relative size.
+  uint8_t* memOpAOMark = nullptr;  // Marker that points before 'address-override prefix' is emitted.
+
+  int64_t immValue = 0;            // Immediate value (must be 64-bit).
+  FastUInt8 immSize = 0;           // Immediate size.
+
+  X86BufferWriter writer(this);
+
+  if (instId >= Inst::_kIdCount)
+    instId = 0;
+
+  const InstDB::InstInfo* instInfo = &InstDB::_instInfoTable[instId];
+  const InstDB::CommonInfo* commonInfo = &instInfo->commonInfo();
+
+  // Signature of the first 3 operands.
+  isign3 = (uint32_t(o0.opType())     ) +
+           (uint32_t(o1.opType()) << 3) +
+           (uint32_t(o2.opType()) << 6);
+
+  // Combine all instruction options and also check whether the instruction is valid. All options
+  // that require special handling (including invalid instruction) are handled by the next branch.
+  options = InstOptions((instId == 0) | ((size_t)(_bufferEnd - writer.cursor()) < 16)) | instOptions() | forcedInstOptions();
+
+  // Handle failure and rare cases first.
+  if (ASMJIT_UNLIKELY(Support::test(options, kRequiresSpecialHandling))) {
+    if (ASMJIT_UNLIKELY(!_code))
+      return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+    // Unknown instruction.
+    if (ASMJIT_UNLIKELY(instId == 0))
+      goto InvalidInstruction;
+
+    // Grow request, happens rarely.
+    err = writer.ensureSpace(this, 16);
+    if (ASMJIT_UNLIKELY(err))
+      goto Failed;
+
+#ifndef ASMJIT_NO_VALIDATION
+    // Strict validation.
+    if (hasDiagnosticOption(DiagnosticOptions::kValidateAssembler)) {
+      Operand_ opArray[Globals::kMaxOpCount];
+      EmitterUtils::opArrayFromEmitArgs(opArray, o0, o1, o2, opExt);
+
+      err = _funcs.validate(arch(), BaseInst(instId, options, _extraReg), opArray, Globals::kMaxOpCount, ValidationFlags::kNone);
+      if (ASMJIT_UNLIKELY(err))
+        goto Failed;
+    }
+#endif
+
+    InstDB::InstFlags iFlags = instInfo->flags();
+
+    // LOCK, XACQUIRE, and XRELEASE prefixes.
+    if (Support::test(options, InstOptions::kX86_Lock)) {
+      bool xAcqRel = Support::test(options, InstOptions::kX86_XAcquire | InstOptions::kX86_XRelease);
+
+      if (ASMJIT_UNLIKELY(!Support::test(iFlags, InstDB::InstFlags::kLock) && !xAcqRel))
+        goto InvalidLockPrefix;
+
+      if (xAcqRel) {
+        if (ASMJIT_UNLIKELY(Support::test(options, InstOptions::kX86_XAcquire) && !Support::test(iFlags, InstDB::InstFlags::kXAcquire)))
+          goto InvalidXAcquirePrefix;
+
+        if (ASMJIT_UNLIKELY(Support::test(options, InstOptions::kX86_XRelease) && !Support::test(iFlags, InstDB::InstFlags::kXRelease)))
+          goto InvalidXReleasePrefix;
+
+        writer.emit8(Support::test(options, InstOptions::kX86_XAcquire) ? 0xF2 : 0xF3);
+      }
+
+      writer.emit8(0xF0);
+    }
+
+    // REP and REPNE prefixes.
+    if (Support::test(options, InstOptions::kX86_Rep | InstOptions::kX86_Repne)) {
+      if (ASMJIT_UNLIKELY(!Support::test(iFlags, InstDB::InstFlags::kRep)))
+        goto InvalidRepPrefix;
+
+      if (ASMJIT_UNLIKELY(_extraReg.isReg() && (_extraReg.group() != RegGroup::kGp || _extraReg.id() != Gp::kIdCx)))
+        goto InvalidRepPrefix;
+
+      writer.emit8(Support::test(options, InstOptions::kX86_Repne) ? 0xF2 : 0xF3);
+    }
+  }
+
+  // This sequence seems to be the fastest.
+  opcode = InstDB::_mainOpcodeTable[instInfo->_mainOpcodeIndex];
+  opReg = opcode.extractModO();
+  rbReg = 0;
+  opcode |= instInfo->_mainOpcodeValue;
+
+  // Encoding Scope
+  // --------------
+
+  // How it works? Each case here represents a unique encoding of a group of instructions, which is handled
+  // separately. The handlers check instruction signature, possibly register types, etc, and process this
+  // information by writing some bits to opcode, opReg/rbReg, immValue/immSize, etc, and then at the end of
+  // the sequence it uses goto to jump into a lower level handler, that actually encodes the instruction.
+
+  switch (instInfo->_encoding) {
+    case InstDB::kEncodingNone:
+      goto EmitDone;
+
+    // Base Instructions
+    // -----------------
+
+    case InstDB::kEncodingX86Op:
+      goto EmitX86Op;
+
+    case InstDB::kEncodingX86Op_Mod11RM:
+      rbReg = opcode.extractModRM();
+      goto EmitX86R;
+
+    case InstDB::kEncodingX86Op_Mod11RM_I8:
+      // The first operand must be immediate, we don't care of other operands as they could be implicit.
+      if (!o0.isImm())
+        goto InvalidInstruction;
+
+      rbReg = opcode.extractModRM();
+      immValue = o0.as<Imm>().valueAs<uint8_t>();
+      immSize = 1;
+      goto EmitX86R;
+
+    case InstDB::kEncodingX86Op_xAddr:
+      if (ASMJIT_UNLIKELY(!o0.isReg()))
+        goto InvalidInstruction;
+
+      rmInfo = x86MemInfo[size_t(o0.as<Reg>().type())];
+      writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
+      goto EmitX86Op;
+
+    case InstDB::kEncodingX86Op_xAX:
+      if (isign3 == 0)
+        goto EmitX86Op;
+
+      if (isign3 == ENC_OPS1(Reg) && o0.id() == Gp::kIdAx)
+        goto EmitX86Op;
+      break;
+
+    case InstDB::kEncodingX86Op_xDX_xAX:
+      if (isign3 == 0)
+        goto EmitX86Op;
+
+      if (isign3 == ENC_OPS2(Reg, Reg) && o0.id() == Gp::kIdDx && o1.id() == Gp::kIdAx)
+        goto EmitX86Op;
+      break;
+
+    case InstDB::kEncodingX86Op_MemZAX:
+      if (isign3 == 0)
+        goto EmitX86Op;
+
+      rmRel = &o0;
+      if (isign3 == ENC_OPS1(Mem) && x86IsImplicitMem(o0, Gp::kIdAx))
+        goto EmitX86OpImplicitMem;
+
+      break;
+
+    case InstDB::kEncodingX86I_xAX:
+      // Implicit form.
+      if (isign3 == ENC_OPS1(Imm)) {
+        immValue = o0.as<Imm>().valueAs<uint8_t>();
+        immSize = 1;
+        goto EmitX86Op;
+      }
+
+      // Explicit form.
+      if (isign3 == ENC_OPS2(Reg, Imm) && o0.id() == Gp::kIdAx) {
+        immValue = o1.as<Imm>().valueAs<uint8_t>();
+        immSize = 1;
+        goto EmitX86Op;
+      }
+      break;
+
+    case InstDB::kEncodingX86M_NoMemSize:
+      if (o0.isReg())
+        opcode.addPrefixBySize(o0.size());
+      goto CaseX86M_NoSize;
+
+    case InstDB::kEncodingX86M:
+      opcode.addPrefixBySize(o0.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86M_NoSize:
+CaseX86M_NoSize:
+      rbReg = o0.id();
+      if (isign3 == ENC_OPS1(Reg))
+        goto EmitX86R;
+
+      rmRel = &o0;
+      if (isign3 == ENC_OPS1(Mem))
+        goto EmitX86M;
+      break;
+
+    case InstDB::kEncodingX86M_GPB_MulDiv:
+CaseX86M_GPB_MulDiv:
+      // Explicit form?
+      if (isign3 > 0x7) {
+        // [AX] <- [AX] div|mul r8.
+        if (isign3 == ENC_OPS2(Reg, Reg)) {
+          if (ASMJIT_UNLIKELY(!Reg::isGpw(o0, Gp::kIdAx) || !Reg::isGpb(o1)))
+            goto InvalidInstruction;
+
+          rbReg = o1.id();
+          FIXUP_GPB(o1, rbReg);
+          goto EmitX86R;
+        }
+
+        // [AX] <- [AX] div|mul m8.
+        if (isign3 == ENC_OPS2(Reg, Mem)) {
+          if (ASMJIT_UNLIKELY(!Reg::isGpw(o0, Gp::kIdAx)))
+            goto InvalidInstruction;
+
+          rmRel = &o1;
+          goto EmitX86M;
+        }
+
+        // [?DX:?AX] <- [?DX:?AX] div|mul r16|r32|r64
+        if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+          if (ASMJIT_UNLIKELY(o0.size() != o1.size()))
+            goto InvalidInstruction;
+
+          opcode.addArithBySize(o0.size());
+          rbReg = o2.id();
+          goto EmitX86R;
+        }
+
+        // [?DX:?AX] <- [?DX:?AX] div|mul m16|m32|m64
+        if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+          if (ASMJIT_UNLIKELY(o0.size() != o1.size()))
+            goto InvalidInstruction;
+
+          opcode.addArithBySize(o0.size());
+          rmRel = &o2;
+          goto EmitX86M;
+        }
+
+        goto InvalidInstruction;
+      }
+
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86M_GPB:
+      if (isign3 == ENC_OPS1(Reg)) {
+        opcode.addArithBySize(o0.size());
+        rbReg = o0.id();
+
+        if (o0.size() != 1)
+          goto EmitX86R;
+
+        FIXUP_GPB(o0, rbReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        if (ASMJIT_UNLIKELY(o0.size() == 0))
+          goto AmbiguousOperandSize;
+
+        opcode.addArithBySize(o0.size());
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86M_Only_EDX_EAX:
+      if (isign3 == ENC_OPS3(Mem, Reg, Reg) && Reg::isGpd(o1, Gp::kIdDx) && Reg::isGpd(o2, Gp::kIdAx)) {
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86M_Only:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86M_Nop:
+      if (isign3 == ENC_OPS1(None))
+        goto EmitX86Op;
+
+      // Single operand NOP instruction "0F 1F /0".
+      opcode = Opcode::k000F00 | 0x1F;
+      opReg = 0;
+
+      if (isign3 == ENC_OPS1(Reg)) {
+        opcode.addPrefixBySize(o0.size());
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        opcode.addPrefixBySize(o0.size());
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+
+      // Two operand NOP instruction "0F 1F /r".
+      opReg = o1.id();
+      opcode.addPrefixBySize(o1.size());
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86R_FromM:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        rbReg = o0.id();
+        goto EmitX86RFromM;
+      }
+      break;
+
+    case InstDB::kEncodingX86R32_EDX_EAX:
+      // Explicit form: R32, EDX, EAX.
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (!Reg::isGpd(o1, Gp::kIdDx) || !Reg::isGpd(o2, Gp::kIdAx))
+          goto InvalidInstruction;
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      // Implicit form: R32.
+      if (isign3 == ENC_OPS1(Reg)) {
+        if (!Reg::isGpd(o0))
+          goto InvalidInstruction;
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+      break;
+
+    case InstDB::kEncodingX86R_Native:
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+      break;
+
+    case InstDB::kEncodingX86Rm:
+      opcode.addPrefixBySize(o0.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86Rm_NoSize:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Rm_Raw66H:
+      // We normally emit either [66|F2|F3], this instruction requires 66+[F2|F3].
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        if (o0.size() == 2)
+          writer.emit8(0x66);
+        else
+          opcode.addWBySize(o0.size());
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+
+        if (o0.size() == 2)
+          writer.emit8(0x66);
+        else
+          opcode.addWBySize(o0.size());
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Mr:
+      opcode.addPrefixBySize(o0.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86Mr_NoSize:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        rbReg = o0.id();
+        opReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        rmRel = &o0;
+        opReg = o1.id();
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Arith:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opcode.addArithBySize(o0.size());
+
+        if (o0.size() != o1.size())
+          goto OperandSizeMismatch;
+
+        rbReg = o0.id();
+        opReg = o1.id();
+
+        if (o0.size() == 1) {
+          FIXUP_GPB(o0, rbReg);
+          FIXUP_GPB(o1, opReg);
+        }
+
+        // MOD/MR: The default encoding used if not instructed otherwise..
+        if (!Support::test(options, InstOptions::kX86_ModRM))
+          goto EmitX86R;
+
+        // MOD/RM: Alternative encoding selected via instruction options.
+        opcode += 2;
+        std::swap(opReg, rbReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode += 2;
+        opcode.addArithBySize(o0.size());
+
+        opReg = o0.id();
+        rmRel = &o1;
+
+        if (o0.size() != 1)
+          goto EmitX86M;
+
+        FIXUP_GPB(o0, opReg);
+        goto EmitX86M;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.addArithBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+
+        if (o1.size() != 1)
+          goto EmitX86M;
+
+        FIXUP_GPB(o1, opReg);
+        goto EmitX86M;
+      }
+
+      // The remaining instructions use 0x80 opcode.
+      opcode = 0x80;
+
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        uint32_t size = o0.size();
+
+        rbReg = o0.id();
+        immValue = o1.as<Imm>().value();
+
+        if (size == 1) {
+          FIXUP_GPB(o0, rbReg);
+          immSize = 1;
+        }
+        else {
+          if (size == 2) {
+            opcode |= Opcode::kPP_66;
+          }
+          else if (size == 4) {
+            // Sign extend so isInt8 returns the right result.
+            immValue = x86SignExtendI32<int64_t>(immValue);
+          }
+          else if (size == 8) {
+            bool canTransformTo32Bit = instId == Inst::kIdAnd && Support::isUInt32(immValue);
+
+            if (!Support::isInt32(immValue)) {
+              // We would do this by default when `kOptionOptimizedForSize` is
+              // enabled, however, in this case we just force this as otherwise
+              // we would have to fail.
+              if (canTransformTo32Bit)
+                size = 4;
+              else
+                goto InvalidImmediate;
+            }
+            else if (canTransformTo32Bit && hasEncodingOption(EncodingOptions::kOptimizeForSize)) {
+              size = 4;
+            }
+
+            opcode.addWBySize(size);
+          }
+
+          immSize = FastUInt8(Support::min<uint32_t>(size, 4));
+          if (Support::isInt8(immValue) && !Support::test(options, InstOptions::kLongForm))
+            immSize = 1;
+        }
+
+        // Short form - AL, AX, EAX, RAX.
+        if (rbReg == 0 && (size == 1 || immSize != 1) && !Support::test(options, InstOptions::kLongForm)) {
+          opcode &= Opcode::kPP_66 | Opcode::kW;
+          opcode |= ((opReg << 3) | (0x04 + (size != 1)));
+          immSize = FastUInt8(Support::min<uint32_t>(size, 4));
+          goto EmitX86Op;
+        }
+
+        opcode += size != 1 ? (immSize != 1 ? 1 : 3) : 0;
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Imm)) {
+        uint32_t memSize = o0.size();
+
+        if (ASMJIT_UNLIKELY(memSize == 0))
+          goto AmbiguousOperandSize;
+
+        immValue = o1.as<Imm>().value();
+        immSize = FastUInt8(Support::min<uint32_t>(memSize, 4));
+
+        // Sign extend so isInt8 returns the right result.
+        if (memSize == 4)
+          immValue = x86SignExtendI32<int64_t>(immValue);
+
+        if (Support::isInt8(immValue) && !Support::test(options, InstOptions::kLongForm))
+          immSize = 1;
+
+        opcode += memSize != 1 ? (immSize != 1 ? 1 : 3) : 0;
+        opcode.addPrefixBySize(memSize);
+
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Bswap:
+      if (isign3 == ENC_OPS1(Reg)) {
+        if (ASMJIT_UNLIKELY(o0.size() == 1))
+          goto InvalidInstruction;
+
+        opReg = o0.id();
+        opcode.addPrefixBySize(o0.size());
+        goto EmitX86OpReg;
+      }
+      break;
+
+    case InstDB::kEncodingX86Bt:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opcode.addPrefixBySize(o1.size());
+        opReg = o1.id();
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.addPrefixBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+
+      // The remaining instructions use the secondary opcode/r.
+      immValue = o1.as<Imm>().value();
+      immSize = 1;
+
+      opcode = x86AltOpcodeOf(instInfo);
+      opcode.addPrefixBySize(o0.size());
+      opReg = opcode.extractModO();
+
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Imm)) {
+        if (ASMJIT_UNLIKELY(o0.size() == 0))
+          goto AmbiguousOperandSize;
+
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Call:
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      rmRel = &o0;
+      if (isign3 == ENC_OPS1(Mem))
+        goto EmitX86M;
+
+      // Call with 32-bit displacement use 0xE8 opcode. Call with 8-bit displacement is not encodable so the
+      // alternative opcode field in X86DB must be zero.
+      opcode = 0xE8;
+      opReg = 0;
+      goto EmitJmpCall;
+
+    case InstDB::kEncodingX86Cmpxchg: {
+      // Convert explicit to implicit.
+      if (isign3 & (0x7 << 6)) {
+        if (!Reg::isGp(o2) || o2.id() != Gp::kIdAx)
+          goto InvalidInstruction;
+        isign3 &= 0x3F;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        if (o0.size() != o1.size())
+          goto OperandSizeMismatch;
+
+        opcode.addArithBySize(o0.size());
+        rbReg = o0.id();
+        opReg = o1.id();
+
+        if (o0.size() != 1)
+          goto EmitX86R;
+
+        FIXUP_GPB(o0, rbReg);
+        FIXUP_GPB(o1, opReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.addArithBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+
+        if (o1.size() != 1)
+          goto EmitX86M;
+
+        FIXUP_GPB(o1, opReg);
+        goto EmitX86M;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingX86Cmpxchg8b_16b: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const Operand_& o4 = opExt[EmitterUtils::kOp4];
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
+        if (o3.isReg() && o4.isReg()) {
+          rmRel = &o0;
+          goto EmitX86M;
+        }
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingX86Crc:
+      opReg = o0.id();
+      opcode.addWBySize(o0.size());
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        rbReg = o1.id();
+
+        if (o1.size() == 1) {
+          FIXUP_GPB(o1, rbReg);
+          goto EmitX86R;
+        }
+        else {
+          // This seems to be the only exception of encoding '66F2' prefix.
+          if (o1.size() == 2) writer.emit8(0x66);
+
+          opcode.add(1);
+          goto EmitX86R;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        rmRel = &o1;
+        if (o1.size() == 0)
+          goto AmbiguousOperandSize;
+
+        // This seems to be the only exception of encoding '66F2' prefix.
+        if (o1.size() == 2) writer.emit8(0x66);
+
+        opcode += o1.size() != 1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Enter:
+      if (isign3 == ENC_OPS2(Imm, Imm)) {
+        uint32_t iw = o0.as<Imm>().valueAs<uint16_t>();
+        uint32_t ib = o1.as<Imm>().valueAs<uint8_t>();
+
+        immValue = iw | (ib << 16);
+        immSize = 3;
+        goto EmitX86Op;
+      }
+      break;
+
+    case InstDB::kEncodingX86Imul:
+      // First process all forms distinct of `kEncodingX86M_OptB_MulDiv`.
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opcode = 0x6B;
+        opcode.addPrefixBySize(o0.size());
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+
+        if (!Support::isInt8(immValue) || Support::test(options, InstOptions::kLongForm)) {
+          opcode -= 2;
+          immSize = o0.size() == 2 ? 2 : 4;
+        }
+
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opcode = 0x6B;
+        opcode.addPrefixBySize(o0.size());
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+
+        // Sign extend so isInt8 returns the right result.
+        if (o0.size() == 4)
+          immValue = x86SignExtendI32<int64_t>(immValue);
+
+        if (!Support::isInt8(immValue) || Support::test(options, InstOptions::kLongForm)) {
+          opcode -= 2;
+          immSize = o0.size() == 2 ? 2 : 4;
+        }
+
+        opReg = o0.id();
+        rmRel = &o1;
+
+        goto EmitX86M;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        // Must be explicit 'ax, r8' form.
+        if (o1.size() == 1)
+          goto CaseX86M_GPB_MulDiv;
+
+        if (o0.size() != o1.size())
+          goto OperandSizeMismatch;
+
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        opcode = Opcode::k000F00 | 0xAF;
+        opcode.addPrefixBySize(o0.size());
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        // Must be explicit 'ax, m8' form.
+        if (o1.size() == 1)
+          goto CaseX86M_GPB_MulDiv;
+
+        opReg = o0.id();
+        rmRel = &o1;
+
+        opcode = Opcode::k000F00 | 0xAF;
+        opcode.addPrefixBySize(o0.size());
+        goto EmitX86M;
+      }
+
+      // Shorthand to imul 'reg, reg, imm'.
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        opcode = 0x6B;
+        opcode.addPrefixBySize(o0.size());
+
+        immValue = o1.as<Imm>().value();
+        immSize = 1;
+
+        // Sign extend so isInt8 returns the right result.
+        if (o0.size() == 4)
+          immValue = x86SignExtendI32<int64_t>(immValue);
+
+        if (!Support::isInt8(immValue) || Support::test(options, InstOptions::kLongForm)) {
+          opcode -= 2;
+          immSize = o0.size() == 2 ? 2 : 4;
+        }
+
+        opReg = rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      // Try implicit form.
+      goto CaseX86M_GPB_MulDiv;
+
+    case InstDB::kEncodingX86In:
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx))
+          goto InvalidInstruction;
+
+        immValue = o1.as<Imm>().valueAs<uint8_t>();
+        immSize = 1;
+
+        opcode = x86AltOpcodeOf(instInfo) + (o0.size() != 1);
+        opcode.add66hBySize(o0.size());
+        goto EmitX86Op;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx || o1.id() != Gp::kIdDx))
+          goto InvalidInstruction;
+
+        opcode += o0.size() != 1;
+        opcode.add66hBySize(o0.size());
+        goto EmitX86Op;
+      }
+      break;
+
+    case InstDB::kEncodingX86Ins:
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        if (ASMJIT_UNLIKELY(!x86IsImplicitMem(o0, Gp::kIdDi) || o1.id() != Gp::kIdDx))
+          goto InvalidInstruction;
+
+        uint32_t size = o0.size();
+        if (ASMJIT_UNLIKELY(size == 0))
+          goto AmbiguousOperandSize;
+
+        rmRel = &o0;
+        opcode += (size != 1);
+
+        opcode.add66hBySize(size);
+        goto EmitX86OpImplicitMem;
+      }
+      break;
+
+    case InstDB::kEncodingX86IncDec:
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+
+        if (o0.size() == 1) {
+          FIXUP_GPB(o0, rbReg);
+          goto EmitX86R;
+        }
+
+        if (is32Bit()) {
+          // INC r16|r32 is only encodable in 32-bit mode (collides with REX).
+          opcode = x86AltOpcodeOf(instInfo) + (rbReg & 0x07);
+          opcode.add66hBySize(o0.size());
+          goto EmitX86Op;
+        }
+        else {
+          opcode.addArithBySize(o0.size());
+          goto EmitX86R;
+        }
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        if (!o0.size())
+          goto AmbiguousOperandSize;
+        opcode.addArithBySize(o0.size());
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Int:
+      if (isign3 == ENC_OPS1(Imm)) {
+        immValue = o0.as<Imm>().value();
+        immSize = 1;
+        goto EmitX86Op;
+      }
+      break;
+
+    case InstDB::kEncodingX86Jcc:
+      if (Support::test(options, InstOptions::kTaken | InstOptions::kNotTaken) && hasEncodingOption(EncodingOptions::kPredictedJumps)) {
+        uint8_t prefix = Support::test(options, InstOptions::kTaken) ? uint8_t(0x3E) : uint8_t(0x2E);
+        writer.emit8(prefix);
+      }
+
+      rmRel = &o0;
+      opReg = 0;
+      goto EmitJmpCall;
+
+    case InstDB::kEncodingX86JecxzLoop:
+      rmRel = &o0;
+      // Explicit jecxz|loop [r|e]cx, dst
+      if (o0.isReg()) {
+        if (ASMJIT_UNLIKELY(!Reg::isGp(o0, Gp::kIdCx)))
+          goto InvalidInstruction;
+
+        writer.emitAddressOverride((is32Bit() && o0.size() == 2) || (is64Bit() && o0.size() == 4));
+        rmRel = &o1;
+      }
+
+      opReg = 0;
+      goto EmitJmpCall;
+
+    case InstDB::kEncodingX86Jmp:
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      rmRel = &o0;
+      if (isign3 == ENC_OPS1(Mem))
+        goto EmitX86M;
+
+      // Jump encoded with 32-bit displacement use 0xE9 opcode. Jump encoded with 8-bit displacement's opcode is
+      // stored as an alternative opcode.
+      opcode = 0xE9;
+      opReg = 0;
+      goto EmitJmpCall;
+
+    case InstDB::kEncodingX86JmpRel:
+      rmRel = &o0;
+      goto EmitJmpCall;
+
+    case InstDB::kEncodingX86LcallLjmp:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        uint32_t mSize = rmRel->size();
+        if (mSize == 0) {
+          mSize = registerSize();
+        }
+        else {
+          mSize -= 2;
+          if (mSize != 2 && mSize != 4 && mSize != registerSize())
+            goto InvalidAddress;
+        }
+        opcode.addPrefixBySize(mSize);
+        goto EmitX86M;
+      }
+
+      if (isign3 == ENC_OPS2(Imm, Imm)) {
+        if (!is32Bit())
+          goto InvalidInstruction;
+
+        const Imm& imm0 = o0.as<Imm>();
+        const Imm& imm1 = o1.as<Imm>();
+
+        if (imm0.value() > 0xFFFFu || imm1.value() > 0xFFFFFFFFu)
+          goto InvalidImmediate;
+
+        opcode = x86AltOpcodeOf(instInfo);
+        immValue = imm1.value() | (imm0.value() << 32);
+        immSize = 6;
+        goto EmitX86Op;
+      }
+      break;
+
+    case InstDB::kEncodingX86Lea:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode.addPrefixBySize(o0.size());
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Mov:
+      // Reg <- Reg
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        // Asmjit uses segment registers indexed from 1 to 6, leaving zero as "no segment register used". We have to
+        // fix this (decrement the index of the register) when emitting MOV instructions which move to/from a segment
+        // register. The segment register is always `opReg`, because the MOV instruction uses either RM or MR encoding.
+
+        // GP <- ??
+        if (Reg::isGp(o0)) {
+          rbReg = o0.id();
+          opReg = o1.id();
+
+          // GP <- GP
+          if (Reg::isGp(o1)) {
+            uint32_t opSize = o0.size();
+            if (opSize != o1.size())
+              goto InvalidInstruction;
+
+            if (opSize == 1) {
+              FIXUP_GPB(o0, rbReg);
+              FIXUP_GPB(o1, opReg);
+              opcode = 0x88;
+
+              if (!Support::test(options, InstOptions::kX86_ModRM))
+                goto EmitX86R;
+
+              opcode += 2;
+              std::swap(opReg, rbReg);
+              goto EmitX86R;
+            }
+            else {
+              opcode = 0x89;
+              opcode.addPrefixBySize(opSize);
+
+              if (!Support::test(options, InstOptions::kX86_ModRM))
+                goto EmitX86R;
+
+              opcode += 2;
+              std::swap(opReg, rbReg);
+              goto EmitX86R;
+            }
+          }
+
+          // GP <- SReg
+          if (Reg::isSReg(o1)) {
+            opcode = 0x8C;
+            opcode.addPrefixBySize(o0.size());
+            opReg--;
+            goto EmitX86R;
+          }
+
+          // GP <- CReg
+          if (Reg::isCReg(o1)) {
+            opcode = Opcode::k000F00 | 0x20;
+
+            // Use `LOCK MOV` in 32-bit mode if CR8+ register is accessed (AMD extension).
+            if ((opReg & 0x8) && is32Bit()) {
+              writer.emit8(0xF0);
+              opReg &= 0x7;
+            }
+            goto EmitX86R;
+          }
+
+          // GP <- DReg
+          if (Reg::isDReg(o1)) {
+            opcode = Opcode::k000F00 | 0x21;
+            goto EmitX86R;
+          }
+        }
+        else {
+          opReg = o0.id();
+          rbReg = o1.id();
+
+          // ?? <- GP
+          if (!Reg::isGp(o1))
+            goto InvalidInstruction;
+
+          // SReg <- GP
+          if (Reg::isSReg(o0)) {
+            opcode = 0x8E;
+            opcode.addPrefixBySize(o1.size());
+            opReg--;
+            goto EmitX86R;
+          }
+
+          // CReg <- GP
+          if (Reg::isCReg(o0)) {
+            opcode = Opcode::k000F00 | 0x22;
+
+            // Use `LOCK MOV` in 32-bit mode if CR8+ register is accessed (AMD extension).
+            if ((opReg & 0x8) && is32Bit()) {
+              writer.emit8(0xF0);
+              opReg &= 0x7;
+            }
+            goto EmitX86R;
+          }
+
+          // DReg <- GP
+          if (Reg::isDReg(o0)) {
+            opcode = Opcode::k000F00 | 0x23;
+            goto EmitX86R;
+          }
+        }
+
+        goto InvalidInstruction;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+
+        // SReg <- Mem
+        if (Reg::isSReg(o0)) {
+          opcode = 0x8E;
+          opcode.addPrefixBySize(o1.size());
+          opReg--;
+          goto EmitX86M;
+        }
+        // Reg <- Mem
+        else {
+          opcode = 0;
+          opcode.addArithBySize(o0.size());
+
+          // Handle a special form of `mov al|ax|eax|rax, [ptr64]` that doesn't use MOD.
+          if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) {
+            if (x86ShouldUseMovabs(this, writer, o0.size(), options, rmRel->as<Mem>())) {
+              opcode += 0xA0;
+              immValue = rmRel->as<Mem>().offset();
+              goto EmitX86OpMovAbs;
+            }
+          }
+
+          if (o0.size() == 1)
+            FIXUP_GPB(o0, opReg);
+
+          opcode += 0x8A;
+          goto EmitX86M;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+
+        // Mem <- SReg
+        if (Reg::isSReg(o1)) {
+          opcode = 0x8C;
+          opcode.addPrefixBySize(o0.size());
+          opReg--;
+          goto EmitX86M;
+        }
+        // Mem <- Reg
+        else {
+          opcode = 0;
+          opcode.addArithBySize(o1.size());
+
+          // Handle a special form of `mov [ptr64], al|ax|eax|rax` that doesn't use MOD.
+          if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) {
+            if (x86ShouldUseMovabs(this, writer, o1.size(), options, rmRel->as<Mem>())) {
+              opcode += 0xA2;
+              immValue = rmRel->as<Mem>().offset();
+              goto EmitX86OpMovAbs;
+            }
+          }
+
+          if (o1.size() == 1)
+            FIXUP_GPB(o1, opReg);
+
+          opcode += 0x88;
+          goto EmitX86M;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        opReg = o0.id();
+        immSize = FastUInt8(o0.size());
+
+        if (immSize == 1) {
+          FIXUP_GPB(o0, opReg);
+
+          opcode = 0xB0;
+          immValue = o1.as<Imm>().valueAs<uint8_t>();
+          goto EmitX86OpReg;
+        }
+        else {
+          // 64-bit immediate in 64-bit mode is allowed.
+          immValue = o1.as<Imm>().value();
+
+          // Optimize the instruction size by using a 32-bit immediate if possible.
+          if (immSize == 8 && !Support::test(options, InstOptions::kLongForm)) {
+            if (Support::isUInt32(immValue) && hasEncodingOption(EncodingOptions::kOptimizeForSize)) {
+              // Zero-extend by using a 32-bit GPD destination instead of a 64-bit GPQ.
+              immSize = 4;
+            }
+            else if (Support::isInt32(immValue)) {
+              // Sign-extend, uses 'C7 /0' opcode.
+              rbReg = opReg;
+
+              opcode = Opcode::kW | 0xC7;
+              opReg = 0;
+
+              immSize = 4;
+              goto EmitX86R;
+            }
+          }
+
+          opcode = 0xB8;
+          opcode.addPrefixBySize(immSize);
+          goto EmitX86OpReg;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Imm)) {
+        uint32_t memSize = o0.size();
+        if (ASMJIT_UNLIKELY(memSize == 0))
+          goto AmbiguousOperandSize;
+
+        opcode = 0xC6 + (memSize != 1);
+        opcode.addPrefixBySize(memSize);
+        opReg = 0;
+        rmRel = &o0;
+
+        immValue = o1.as<Imm>().value();
+        immSize = FastUInt8(Support::min<uint32_t>(memSize, 4));
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Movabs:
+      // Reg <- Mem
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+
+        opcode = 0xA0;
+        opcode.addArithBySize(o0.size());
+
+        if (ASMJIT_UNLIKELY(!o0.as<Reg>().isGp()) || opReg != Gp::kIdAx)
+          goto InvalidInstruction;
+
+        if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasBaseOrIndex()))
+          goto InvalidAddress;
+
+        if (ASMJIT_UNLIKELY(rmRel->as<Mem>().addrType() == Mem::AddrType::kRel))
+          goto InvalidAddress;
+
+        immValue = rmRel->as<Mem>().offset();
+        goto EmitX86OpMovAbs;
+      }
+
+      // Mem <- Reg
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+
+        opcode = 0xA2;
+        opcode.addArithBySize(o1.size());
+
+        if (ASMJIT_UNLIKELY(!o1.as<Reg>().isGp()) || opReg != Gp::kIdAx)
+          goto InvalidInstruction;
+
+        if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasBaseOrIndex()))
+          goto InvalidAddress;
+
+        immValue = rmRel->as<Mem>().offset();
+        goto EmitX86OpMovAbs;
+      }
+
+      // Reg <- Imm.
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        if (ASMJIT_UNLIKELY(!o0.as<Reg>().isGpq()))
+          goto InvalidInstruction;
+
+        opReg = o0.id();
+        opcode = 0xB8;
+
+        immSize = 8;
+        immValue = o1.as<Imm>().value();
+
+        opcode.addPrefixBySize(8);
+        goto EmitX86OpReg;
+      }
+      break;
+
+    case InstDB::kEncodingX86MovsxMovzx:
+      opcode.add(o1.size() != 1);
+      opcode.addPrefixBySize(o0.size());
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        if (o1.size() != 1)
+          goto EmitX86R;
+
+        FIXUP_GPB(o1, rbReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86MovntiMovdiri:
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.addWIf(Reg::isGpq(o1));
+
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86EnqcmdMovdir64b:
+      if (isign3 == ENC_OPS2(Mem, Mem)) {
+        const Mem& m0 = o0.as<Mem>();
+        // This is the only required validation, the rest is handled afterwards.
+        if (ASMJIT_UNLIKELY(m0.baseType() != o1.as<Mem>().baseType() ||
+                            m0.hasIndex() ||
+                            m0.hasOffset() ||
+                            (m0.hasSegment() && m0.segmentId() != SReg::kIdEs)))
+          goto InvalidInstruction;
+
+        // The first memory operand is passed via register, the second memory operand is RM.
+        opReg = o0.as<Mem>().baseId();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Out:
+      if (isign3 == ENC_OPS2(Imm, Reg)) {
+        if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdAx))
+          goto InvalidInstruction;
+
+        opcode = x86AltOpcodeOf(instInfo) + (o1.size() != 1);
+        opcode.add66hBySize(o1.size());
+
+        immValue = o0.as<Imm>().valueAs<uint8_t>();
+        immSize = 1;
+        goto EmitX86Op;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdDx || o1.id() != Gp::kIdAx))
+          goto InvalidInstruction;
+
+        opcode.add(o1.size() != 1);
+        opcode.add66hBySize(o1.size());
+        goto EmitX86Op;
+      }
+      break;
+
+    case InstDB::kEncodingX86Outs:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdDx || !x86IsImplicitMem(o1, Gp::kIdSi)))
+          goto InvalidInstruction;
+
+        uint32_t size = o1.size();
+        if (ASMJIT_UNLIKELY(size == 0))
+          goto AmbiguousOperandSize;
+
+        rmRel = &o1;
+        opcode.add(size != 1);
+        opcode.add66hBySize(size);
+        goto EmitX86OpImplicitMem;
+      }
+      break;
+
+    case InstDB::kEncodingX86Push:
+      if (isign3 == ENC_OPS1(Reg)) {
+        if (Reg::isSReg(o0)) {
+          uint32_t segment = o0.id();
+          if (ASMJIT_UNLIKELY(segment >= SReg::kIdCount))
+            goto InvalidSegment;
+
+          opcode = x86OpcodePushSReg[segment];
+          goto EmitX86Op;
+        }
+        else {
+          goto CaseX86PushPop_Gp;
+        }
+      }
+
+      if (isign3 == ENC_OPS1(Imm)) {
+        immValue = o0.as<Imm>().value();
+        immSize = 4;
+
+        if (Support::isInt8(immValue) && !Support::test(options, InstOptions::kLongForm))
+          immSize = 1;
+
+        opcode = immSize == 1 ? 0x6A : 0x68;
+        goto EmitX86Op;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86Pop:
+      if (isign3 == ENC_OPS1(Reg)) {
+        if (Reg::isSReg(o0)) {
+          uint32_t segment = o0.id();
+          if (ASMJIT_UNLIKELY(segment == SReg::kIdCs || segment >= SReg::kIdCount))
+            goto InvalidSegment;
+
+          opcode = x86OpcodePopSReg[segment];
+          goto EmitX86Op;
+        }
+        else {
+CaseX86PushPop_Gp:
+          // We allow 2 byte, 4 byte, and 8 byte register sizes, although PUSH and POP only allow 2 bytes or
+          // native size. On 64-bit we simply PUSH/POP 64-bit register even if 32-bit register was given.
+          if (ASMJIT_UNLIKELY(o0.size() < 2))
+            goto InvalidInstruction;
+
+          opcode = x86AltOpcodeOf(instInfo);
+          opcode.add66hBySize(o0.size());
+          opReg = o0.id();
+          goto EmitX86OpReg;
+        }
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        if (ASMJIT_UNLIKELY(o0.size() == 0))
+          goto AmbiguousOperandSize;
+
+        if (ASMJIT_UNLIKELY(o0.size() != 2 && o0.size() != registerSize()))
+          goto InvalidInstruction;
+
+        opcode.add66hBySize(o0.size());
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Ret:
+      if (isign3 == 0) {
+        // 'ret' without immediate, change C2 to C3.
+        opcode.add(1);
+        goto EmitX86Op;
+      }
+
+      if (isign3 == ENC_OPS1(Imm)) {
+        immValue = o0.as<Imm>().value();
+        if (immValue == 0 && !Support::test(options, InstOptions::kLongForm)) {
+          // 'ret' without immediate, change C2 to C3.
+          opcode.add(1);
+          goto EmitX86Op;
+        }
+        else {
+          immSize = 2;
+          goto EmitX86Op;
+        }
+      }
+      break;
+
+    case InstDB::kEncodingX86Rot:
+      if (o0.isReg()) {
+        opcode.addArithBySize(o0.size());
+        rbReg = o0.id();
+
+        if (o0.size() == 1)
+          FIXUP_GPB(o0, rbReg);
+
+        if (isign3 == ENC_OPS2(Reg, Reg)) {
+          if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdCx))
+            goto InvalidInstruction;
+
+          opcode += 2;
+          goto EmitX86R;
+        }
+
+        if (isign3 == ENC_OPS2(Reg, Imm)) {
+          immValue = o1.as<Imm>().value() & 0xFF;
+          immSize = 0;
+
+          if (immValue == 1 && !Support::test(options, InstOptions::kLongForm))
+            goto EmitX86R;
+
+          opcode -= 0x10;
+          immSize = 1;
+          goto EmitX86R;
+        }
+      }
+      else {
+        if (ASMJIT_UNLIKELY(o0.size() == 0))
+          goto AmbiguousOperandSize;
+        opcode.addArithBySize(o0.size());
+
+        if (isign3 == ENC_OPS2(Mem, Reg)) {
+          if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdCx))
+            goto InvalidInstruction;
+
+          opcode += 2;
+          rmRel = &o0;
+          goto EmitX86M;
+        }
+
+        if (isign3 == ENC_OPS2(Mem, Imm)) {
+          rmRel = &o0;
+          immValue = o1.as<Imm>().value() & 0xFF;
+          immSize = 0;
+
+          if (immValue == 1 && !Support::test(options, InstOptions::kLongForm))
+            goto EmitX86M;
+
+          opcode -= 0x10;
+          immSize = 1;
+          goto EmitX86M;
+        }
+      }
+      break;
+
+    case InstDB::kEncodingX86Set:
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+        FIXUP_GPB(o0, rbReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86ShldShrd:
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opcode.addPrefixBySize(o0.size());
+        opReg = o1.id();
+        rbReg = o0.id();
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
+        opcode.addPrefixBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+        goto EmitX86M;
+      }
+
+      // The following instructions use opcode + 1.
+      opcode.add(1);
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        if (ASMJIT_UNLIKELY(o2.id() != Gp::kIdCx))
+          goto InvalidInstruction;
+
+        opcode.addPrefixBySize(o0.size());
+        opReg = o1.id();
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
+        if (ASMJIT_UNLIKELY(o2.id() != Gp::kIdCx))
+          goto InvalidInstruction;
+
+        opcode.addPrefixBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86StrRm:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        rmRel = &o1;
+        if (ASMJIT_UNLIKELY(rmRel->as<Mem>().offsetLo32() || !Reg::isGp(o0.as<Reg>(), Gp::kIdAx)))
+          goto InvalidInstruction;
+
+        uint32_t size = o0.size();
+        if (o1.hasSize() && ASMJIT_UNLIKELY(o1.size() != size))
+          goto OperandSizeMismatch;
+
+        opcode.addArithBySize(size);
+        goto EmitX86OpImplicitMem;
+      }
+      break;
+
+    case InstDB::kEncodingX86StrMr:
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        rmRel = &o0;
+        if (ASMJIT_UNLIKELY(rmRel->as<Mem>().offsetLo32() || !Reg::isGp(o1.as<Reg>(), Gp::kIdAx)))
+          goto InvalidInstruction;
+
+        uint32_t size = o1.size();
+        if (o0.hasSize() && ASMJIT_UNLIKELY(o0.size() != size))
+          goto OperandSizeMismatch;
+
+        opcode.addArithBySize(size);
+        goto EmitX86OpImplicitMem;
+      }
+      break;
+
+    case InstDB::kEncodingX86StrMm:
+      if (isign3 == ENC_OPS2(Mem, Mem)) {
+        if (ASMJIT_UNLIKELY(o0.as<Mem>().baseAndIndexTypes() !=
+                            o1.as<Mem>().baseAndIndexTypes()))
+          goto InvalidInstruction;
+
+        rmRel = &o1;
+        if (ASMJIT_UNLIKELY(o0.as<Mem>().hasOffset()))
+          goto InvalidInstruction;
+
+        uint32_t size = o1.size();
+        if (ASMJIT_UNLIKELY(size == 0))
+          goto AmbiguousOperandSize;
+
+        if (ASMJIT_UNLIKELY(o0.size() != size))
+          goto OperandSizeMismatch;
+
+        opcode.addArithBySize(size);
+        goto EmitX86OpImplicitMem;
+      }
+      break;
+
+    case InstDB::kEncodingX86Test:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        if (o0.size() != o1.size())
+          goto OperandSizeMismatch;
+
+        opcode.addArithBySize(o0.size());
+        rbReg = o0.id();
+        opReg = o1.id();
+
+        if (o0.size() != 1)
+          goto EmitX86R;
+
+        FIXUP_GPB(o0, rbReg);
+        FIXUP_GPB(o1, opReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.addArithBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+
+        if (o1.size() != 1)
+          goto EmitX86M;
+
+        FIXUP_GPB(o1, opReg);
+        goto EmitX86M;
+      }
+
+      // The following instructions use the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+      opReg = opcode.extractModO();
+
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        opcode.addArithBySize(o0.size());
+        rbReg = o0.id();
+
+        if (o0.size() == 1) {
+          FIXUP_GPB(o0, rbReg);
+          immValue = o1.as<Imm>().valueAs<uint8_t>();
+          immSize = 1;
+        }
+        else {
+          immValue = o1.as<Imm>().value();
+          immSize = FastUInt8(Support::min<uint32_t>(o0.size(), 4));
+        }
+
+        // Short form - AL, AX, EAX, RAX.
+        if (rbReg == 0 && !Support::test(options, InstOptions::kLongForm)) {
+          opcode &= Opcode::kPP_66 | Opcode::kW;
+          opcode |= 0xA8 + (o0.size() != 1);
+          goto EmitX86Op;
+        }
+
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Imm)) {
+        if (ASMJIT_UNLIKELY(o0.size() == 0))
+          goto AmbiguousOperandSize;
+
+        opcode.addArithBySize(o0.size());
+        rmRel = &o0;
+
+        immValue = o1.as<Imm>().value();
+        immSize = FastUInt8(Support::min<uint32_t>(o0.size(), 4));
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Xchg:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode.addArithBySize(o0.size());
+        opReg = o0.id();
+        rmRel = &o1;
+
+        if (o0.size() != 1)
+          goto EmitX86M;
+
+        FIXUP_GPB(o0, opReg);
+        goto EmitX86M;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingX86Xadd:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        rbReg = o0.id();
+        opReg = o1.id();
+
+        uint32_t opSize = o0.size();
+        if (opSize != o1.size())
+          goto OperandSizeMismatch;
+
+        if (opSize == 1) {
+          FIXUP_GPB(o0, rbReg);
+          FIXUP_GPB(o1, opReg);
+          goto EmitX86R;
+        }
+
+        // Special cases for 'xchg ?ax, reg'.
+        if (instId == Inst::kIdXchg && (opReg == 0 || rbReg == 0)) {
+          if (is64Bit() && opReg == rbReg && opSize >= 4) {
+            if (opSize == 8) {
+              // Encode 'xchg rax, rax' as '90' (REX and other prefixes are optional).
+              opcode &= Opcode::kW;
+              opcode |= 0x90;
+              goto EmitX86OpReg;
+            }
+            else {
+              // Encode 'xchg eax, eax' by by using a generic path.
+            }
+          }
+          else if (!Support::test(options, InstOptions::kLongForm)) {
+            // The special encoding encodes only one register, which is non-zero.
+            opReg += rbReg;
+
+            opcode.addArithBySize(opSize);
+            opcode &= Opcode::kW | Opcode::kPP_66;
+            opcode |= 0x90;
+            goto EmitX86OpReg;
+          }
+        }
+
+        opcode.addArithBySize(opSize);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.addArithBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+
+        if (o1.size() == 1) {
+          FIXUP_GPB(o1, opReg);
+        }
+
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingX86Fence:
+      rbReg = 0;
+      goto EmitX86R;
+
+    case InstDB::kEncodingX86Bndmov:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        // ModRM encoding:
+        if (!Support::test(options, InstOptions::kX86_ModMR))
+          goto EmitX86R;
+
+        // ModMR encoding:
+        opcode = x86AltOpcodeOf(instInfo);
+        std::swap(opReg, rbReg);
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode = x86AltOpcodeOf(instInfo);
+
+        rmRel = &o0;
+        opReg = o1.id();
+        goto EmitX86M;
+      }
+      break;
+
+    // FPU Instructions
+    // ----------------
+
+    case InstDB::kEncodingFpuOp:
+      goto EmitFpuOp;
+
+    case InstDB::kEncodingFpuArith:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        // We switch to the alternative opcode if the first operand is zero.
+        if (opReg == 0) {
+CaseFpuArith_Reg:
+          opcode = ((0xD8   << Opcode::kFPU_2B_Shift)       ) +
+                   ((opcode >> Opcode::kFPU_2B_Shift) & 0xFF) + rbReg;
+          goto EmitFpuOp;
+        }
+        else if (rbReg == 0) {
+          rbReg = opReg;
+          opcode = ((0xDC   << Opcode::kFPU_2B_Shift)       ) +
+                   ((opcode                         ) & 0xFF) + rbReg;
+          goto EmitFpuOp;
+        }
+        else {
+          goto InvalidInstruction;
+        }
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+CaseFpuArith_Mem:
+        // 0xD8/0xDC, depends on the size of the memory operand; opReg is valid.
+        opcode = (o0.size() == 4) ? 0xD8 : 0xDC;
+        // Clear compressed displacement before going to EmitX86M.
+        opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
+
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingFpuCom:
+      if (isign3 == 0) {
+        rbReg = 1;
+        goto CaseFpuArith_Reg;
+      }
+
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+        goto CaseFpuArith_Reg;
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        goto CaseFpuArith_Mem;
+      }
+      break;
+
+    case InstDB::kEncodingFpuFldFst:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+
+        if (o0.size() == 4 && commonInfo->hasFlag(InstDB::InstFlags::kFpuM32)) {
+          goto EmitX86M;
+        }
+
+        if (o0.size() == 8 && commonInfo->hasFlag(InstDB::InstFlags::kFpuM64)) {
+          opcode += 4;
+          goto EmitX86M;
+        }
+
+        if (o0.size() == 10 && commonInfo->hasFlag(InstDB::InstFlags::kFpuM80)) {
+          opcode = x86AltOpcodeOf(instInfo);
+          opReg  = opcode.extractModO();
+          goto EmitX86M;
+        }
+      }
+
+      if (isign3 == ENC_OPS1(Reg)) {
+        if (instId == Inst::kIdFld ) { opcode = (0xD9 << Opcode::kFPU_2B_Shift) + 0xC0 + o0.id(); goto EmitFpuOp; }
+        if (instId == Inst::kIdFst ) { opcode = (0xDD << Opcode::kFPU_2B_Shift) + 0xD0 + o0.id(); goto EmitFpuOp; }
+        if (instId == Inst::kIdFstp) { opcode = (0xDD << Opcode::kFPU_2B_Shift) + 0xD8 + o0.id(); goto EmitFpuOp; }
+      }
+      break;
+
+    case InstDB::kEncodingFpuM:
+      if (isign3 == ENC_OPS1(Mem)) {
+        // Clear compressed displacement before going to EmitX86M.
+        opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
+
+        rmRel = &o0;
+        if (o0.size() == 2 && commonInfo->hasFlag(InstDB::InstFlags::kFpuM16)) {
+          opcode += 4;
+          goto EmitX86M;
+        }
+
+        if (o0.size() == 4 && commonInfo->hasFlag(InstDB::InstFlags::kFpuM32)) {
+          goto EmitX86M;
+        }
+
+        if (o0.size() == 8 && commonInfo->hasFlag(InstDB::InstFlags::kFpuM64)) {
+          opcode = x86AltOpcodeOf(instInfo) & ~uint32_t(Opcode::kCDSHL_Mask);
+          opReg  = opcode.extractModO();
+          goto EmitX86M;
+        }
+      }
+      break;
+
+    case InstDB::kEncodingFpuRDef:
+      if (isign3 == 0) {
+        opcode += 1;
+        goto EmitFpuOp;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingFpuR:
+      if (isign3 == ENC_OPS1(Reg)) {
+        opcode += o0.id();
+        goto EmitFpuOp;
+      }
+      break;
+
+    case InstDB::kEncodingFpuStsw:
+      if (isign3 == ENC_OPS1(Reg)) {
+        if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx))
+          goto InvalidInstruction;
+
+        opcode = x86AltOpcodeOf(instInfo);
+        goto EmitFpuOp;
+      }
+
+      if (isign3 == ENC_OPS1(Mem)) {
+        // Clear compressed displacement before going to EmitX86M.
+        opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
+
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    // Ext Instructions (Legacy Extensions)
+    // ------------------------------------
+
+    case InstDB::kEncodingExtPextrw:
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opcode.add66hIf(Reg::isXmm(o1));
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
+        // Secondary opcode of 'pextrw' instruction (SSE4.1).
+        opcode = x86AltOpcodeOf(instInfo);
+        opcode.add66hIf(Reg::isXmm(o1));
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtExtract:
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opcode.add66hIf(Reg::isXmm(o1));
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+
+        opReg = o1.id();
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
+        opcode.add66hIf(Reg::isXmm(o1));
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtMov:
+      // GP|MM|XMM <- GP|MM|XMM
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        if (!Support::test(options, InstOptions::kX86_ModMR) || !instInfo->_altOpcodeIndex)
+          goto EmitX86R;
+
+        opcode = x86AltOpcodeOf(instInfo);
+        std::swap(opReg, rbReg);
+        goto EmitX86R;
+      }
+
+      // GP|MM|XMM <- Mem
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+
+      // The following instruction uses opcode[1].
+      opcode = x86AltOpcodeOf(instInfo);
+
+      // Mem <- GP|MM|XMM
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtMovbe:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        if (o0.size() == 1)
+          goto InvalidInstruction;
+
+        opcode.addPrefixBySize(o0.size());
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        if (o1.size() == 1)
+          goto InvalidInstruction;
+
+        opcode.addPrefixBySize(o1.size());
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtMovd:
+CaseExtMovd:
+      opReg = o0.id();
+      opcode.add66hIf(Reg::isXmm(o0));
+
+      // MM/XMM <- Gp
+      if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1)) {
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      // MM/XMM <- Mem
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+
+      // The following instructions use the secondary opcode.
+      opcode &= Opcode::kW;
+      opcode |= x86AltOpcodeOf(instInfo);
+      opReg = o1.id();
+      opcode.add66hIf(Reg::isXmm(o1));
+
+      // GP <- MM/XMM
+      if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o0)) {
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+
+      // Mem <- MM/XMM
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        rmRel = &o0;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtMovq:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        // MM <- MM
+        if (Reg::isMm(o0) && Reg::isMm(o1)) {
+          opcode = Opcode::k000F00 | 0x6F;
+
+          if (!Support::test(options, InstOptions::kX86_ModMR))
+            goto EmitX86R;
+
+          opcode += 0x10;
+          std::swap(opReg, rbReg);
+          goto EmitX86R;
+        }
+
+        // XMM <- XMM
+        if (Reg::isXmm(o0) && Reg::isXmm(o1)) {
+          opcode = Opcode::kF30F00 | 0x7E;
+
+          if (!Support::test(options, InstOptions::kX86_ModMR))
+            goto EmitX86R;
+
+          opcode = Opcode::k660F00 | 0xD6;
+          std::swap(opReg, rbReg);
+          goto EmitX86R;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+
+        // MM <- Mem
+        if (Reg::isMm(o0)) {
+          opcode = Opcode::k000F00 | 0x6F;
+          goto EmitX86M;
+        }
+
+        // XMM <- Mem
+        if (Reg::isXmm(o0)) {
+          opcode = Opcode::kF30F00 | 0x7E;
+          goto EmitX86M;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+
+        // Mem <- MM
+        if (Reg::isMm(o1)) {
+          opcode = Opcode::k000F00 | 0x7F;
+          goto EmitX86M;
+        }
+
+        // Mem <- XMM
+        if (Reg::isXmm(o1)) {
+          opcode = Opcode::k660F00 | 0xD6;
+          goto EmitX86M;
+        }
+      }
+
+      // MOVQ in other case is simply a MOVD instruction promoted to 64-bit.
+      opcode |= Opcode::kW;
+      goto CaseExtMovd;
+
+    case InstDB::kEncodingExtRm_XMM0:
+      if (ASMJIT_UNLIKELY(!o2.isNone() && !Reg::isXmm(o2, 0)))
+        goto InvalidInstruction;
+
+      isign3 &= 0x3F;
+      goto CaseExtRm;
+
+    case InstDB::kEncodingExtRm_ZDI:
+      if (ASMJIT_UNLIKELY(!o2.isNone() && !x86IsImplicitMem(o2, Gp::kIdDi)))
+        goto InvalidInstruction;
+
+      isign3 &= 0x3F;
+      goto CaseExtRm;
+
+    case InstDB::kEncodingExtRm_Wx:
+      opcode.addWIf(o1.size() == 8);
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingExtRm_Wx_GpqOnly:
+      opcode.addWIf(Reg::isGpq(o0));
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingExtRm:
+CaseExtRm:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtRm_P:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
+
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode.add66hIf(Reg::isXmm(o0));
+
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtRmRi:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+      opReg  = opcode.extractModO();
+
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        immValue = o1.as<Imm>().value();
+        immSize = 1;
+
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+      break;
+
+    case InstDB::kEncodingExtRmRi_P:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
+
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode.add66hIf(Reg::isXmm(o0));
+
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+      opReg  = opcode.extractModO();
+
+      if (isign3 == ENC_OPS2(Reg, Imm)) {
+        opcode.add66hIf(Reg::isXmm(o0));
+
+        immValue = o1.as<Imm>().value();
+        immSize = 1;
+
+        rbReg = o0.id();
+        goto EmitX86R;
+      }
+      break;
+
+    case InstDB::kEncodingExtRmi:
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    case InstDB::kEncodingExtRmi_P:
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
+
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opcode.add66hIf(Reg::isXmm(o0));
+
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    // Extrq & Insertq (SSE4A)
+    // -----------------------
+
+    case InstDB::kEncodingExtExtrq:
+      opReg = o0.id();
+      rbReg = o1.id();
+
+      if (isign3 == ENC_OPS2(Reg, Reg))
+        goto EmitX86R;
+
+      if (isign3 == ENC_OPS3(Reg, Imm, Imm)) {
+        // This variant of the instruction uses the secondary opcode.
+        opcode = x86AltOpcodeOf(instInfo);
+        rbReg = opReg;
+        opReg = opcode.extractModO();
+
+        immValue = (uint32_t(o1.as<Imm>().valueAs<uint8_t>())     ) +
+                   (uint32_t(o2.as<Imm>().valueAs<uint8_t>()) << 8) ;
+        immSize = 2;
+        goto EmitX86R;
+      }
+      break;
+
+    case InstDB::kEncodingExtInsertq: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+
+      opReg = o0.id();
+      rbReg = o1.id();
+
+      if (isign4 == ENC_OPS2(Reg, Reg))
+        goto EmitX86R;
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
+        // This variant of the instruction uses the secondary opcode.
+        opcode = x86AltOpcodeOf(instInfo);
+
+        immValue = (uint32_t(o2.as<Imm>().valueAs<uint8_t>())     ) +
+                   (uint32_t(o3.as<Imm>().valueAs<uint8_t>()) << 8) ;
+        immSize = 2;
+        goto EmitX86R;
+      }
+      break;
+    }
+
+    // 3DNOW Instructions
+    // ------------------
+
+    case InstDB::kEncodingExt3dNow:
+      // Every 3dNow instruction starts with 0x0F0F and the actual opcode is
+      // stored as 8-bit immediate.
+      immValue = opcode.v & 0xFFu;
+      immSize = 1;
+
+      opcode = Opcode::k000F00 | 0x0F;
+      opReg = o0.id();
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        rbReg = o1.id();
+        goto EmitX86R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        rmRel = &o1;
+        goto EmitX86M;
+      }
+      break;
+
+    // VEX/EVEX Instructions
+    // ---------------------
+
+    case InstDB::kEncodingVexOp:
+      goto EmitVexOp;
+
+    case InstDB::kEncodingVexOpMod:
+      rbReg = 0;
+      goto EmitVexEvexR;
+
+    case InstDB::kEncodingVexKmov:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        // Form 'k, reg'.
+        if (Reg::isGp(o1)) {
+          opcode = x86AltOpcodeOf(instInfo);
+          goto EmitVexEvexR;
+        }
+
+        // Form 'reg, k'.
+        if (Reg::isGp(o0)) {
+          opcode = x86AltOpcodeOf(instInfo) + 1;
+          goto EmitVexEvexR;
+        }
+
+        // Form 'k, k'.
+        if (!Support::test(options, InstOptions::kX86_ModMR))
+          goto EmitVexEvexR;
+
+        opcode.add(1);
+        std::swap(opReg, rbReg);
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+
+        goto EmitVexEvexM;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode.add(1);
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexR_Wx:
+      if (isign3 == ENC_OPS1(Reg)) {
+        rbReg = o0.id();
+        opcode.addWIf(o0.as<Reg>().isGpq());
+        goto EmitVexEvexR;
+      }
+      break;
+
+    case InstDB::kEncodingVexM:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexM_VM:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexMr_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o1.id();
+        rbReg = o0.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexMr_VM:
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode |= Support::max(x86OpcodeLByVMem(o0), x86OpcodeLBySize(o1.size()));
+
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexMri_Vpextrw:
+      // Use 'vpextrw reg, xmm1, i8' when possible.
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opcode = Opcode::k660F00 | 0xC5;
+
+        opReg = o0.id();
+        rbReg = o1.id();
+
+        immValue = o2.as<Imm>().value();
+        immSize = 1;
+        goto EmitVexEvexR;
+      }
+
+      goto CaseVexMri;
+
+    case InstDB::kEncodingVexMri_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexMri:
+CaseVexMri:
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = o1.id();
+        rbReg = o0.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRm_ZDI:
+      if (ASMJIT_UNLIKELY(!o2.isNone() && !x86IsImplicitMem(o2, Gp::kIdDi)))
+        goto InvalidInstruction;
+
+      isign3 &= 0x3F;
+      goto CaseVexRm;
+
+    case InstDB::kEncodingVexRm_Wx:
+      opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
+      goto CaseVexRm;
+
+    case InstDB::kEncodingVexRm_Lx_Narrow:
+      if (o1.size())
+        opcode |= x86OpcodeLBySize(o1.size());
+      else if (o0.size() == 32)
+        opcode |= Opcode::kLL_2;
+      goto CaseVexRm;
+
+    case InstDB::kEncodingVexRm_Lx_Bcst:
+      if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1.as<Reg>())) {
+        opcode = x86AltOpcodeOf(instInfo) | x86OpcodeLBySize(o0.size() | o1.size());
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRm_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRm:
+CaseVexRm:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRm_VM:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size()));
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRm_T1_4X: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const Operand_& o4 = opExt[EmitterUtils::kOp4];
+      const Operand_& o5 = opExt[EmitterUtils::kOp5];
+
+      if (Reg::isVec(o0) && Reg::isVec(o1) && Reg::isVec(o2) && Reg::isVec(o3) && Reg::isVec(o4) && o5.isMem()) {
+        // Registers [o1, o2, o3, o4] must start aligned and must be consecutive.
+        uint32_t i1 = o1.id();
+        uint32_t i2 = o2.id();
+        uint32_t i3 = o3.id();
+        uint32_t i4 = o4.id();
+
+        if (ASMJIT_UNLIKELY((i1 & 0x3) != 0 || i2 != i1 + 1 || i3 != i1 + 2 || i4 != i1 + 3))
+          goto NotConsecutiveRegs;
+
+        opReg = x86PackRegAndVvvvv(o0.id(), i1);
+        rmRel = &o5;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexRmi_Wx:
+      opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
+      goto CaseVexRmi;
+
+    case InstDB::kEncodingVexRmi_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRmi:
+CaseVexRmi:
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvm:
+CaseVexRvm:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+CaseVexRvm_R:
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvm_ZDX_Wx: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      if (ASMJIT_UNLIKELY(!o3.isNone() && !Reg::isGp(o3, Gp::kIdDx)))
+        goto InvalidInstruction;
+      ASMJIT_FALLTHROUGH;
+    }
+
+    case InstDB::kEncodingVexRvm_Wx: {
+      opcode.addWIf(Reg::isGpq(o0) | (o2.size() == 8));
+      goto CaseVexRvm;
+    }
+
+    case InstDB::kEncodingVexRvm_Lx_KEvex: {
+      opcode.forceEvexIf(Reg::isKReg(o0));
+      ASMJIT_FALLTHROUGH;
+    }
+
+    case InstDB::kEncodingVexRvm_Lx: {
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      goto CaseVexRvm;
+    }
+
+    case InstDB::kEncodingVexRvm_Lx_2xK: {
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        // Two registers are encoded as a single register.
+        //   - First K register must be even.
+        //   - Second K register must be first+1.
+        if ((o0.id() & 1) != 0 || o0.id() + 1 != o1.id())
+          goto InvalidPhysId;
+
+        const Operand_& o3 = opExt[EmitterUtils::kOp3];
+
+        opcode |= x86OpcodeLBySize(o2.size());
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+
+        if (o3.isReg()) {
+          rbReg = o3.id();
+          goto EmitVexEvexR;
+        }
+
+        if (o3.isMem()) {
+          rmRel = &o3;
+          goto EmitVexEvexM;
+        }
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexRvmr_Lx: {
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+    }
+
+    case InstDB::kEncodingVexRvmr: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+
+      immValue = o3.id() << 4;
+      immSize = 1;
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexRvmi_KEvex:
+      opcode.forceEvexIf(Reg::isKReg(o0));
+      goto VexRvmi;
+
+    case InstDB::kEncodingVexRvmi_Lx_KEvex:
+      opcode.forceEvexIf(Reg::isKReg(o0));
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvmi_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvmi:
+VexRvmi:
+    {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+
+      immValue = o3.as<Imm>().value();
+      immSize = 1;
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Imm)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexRmv_Wx:
+      opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o2));
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRmv:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRmvRm_VM:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opcode  = x86AltOpcodeOf(instInfo);
+        opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size()));
+
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRmv_VM:
+      if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
+        opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size() | o2.size()));
+
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+
+    case InstDB::kEncodingVexRmvi: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+
+      immValue = o3.as<Imm>().value();
+      immSize = 1;
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Mem, Reg, Imm)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexMovdMovq:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        if (Reg::isGp(o0)) {
+          opcode = x86AltOpcodeOf(instInfo);
+          opcode.addWBySize(o0.size());
+          opReg = o1.id();
+          rbReg = o0.id();
+          goto EmitVexEvexR;
+        }
+
+        if (Reg::isGp(o1)) {
+          opcode.addWBySize(o1.size());
+          opReg = o0.id();
+          rbReg = o1.id();
+          goto EmitVexEvexR;
+        }
+
+        // If this is a 'W' version (movq) then allow also vmovq 'xmm|xmm' form.
+        if (opcode & Opcode::kEvex_W_1) {
+          opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
+          opcode |=  (Opcode::kF30F00 | 0x7E);
+
+          opReg = o0.id();
+          rbReg = o1.id();
+          goto EmitVexEvexR;
+        }
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        if (opcode & Opcode::kEvex_W_1) {
+          opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
+          opcode |=  (Opcode::kF30F00 | 0x7E);
+        }
+
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        if (opcode & Opcode::kEvex_W_1) {
+          opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
+          opcode |=  (Opcode::k660F00 | 0xD6);
+        }
+
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRmMr_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRmMr:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode &= Opcode::kLL_Mask;
+      opcode |= x86AltOpcodeOf(instInfo);
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvmRmv:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rbReg = o1.id();
+
+        if (!Support::test(options, InstOptions::kX86_ModMR))
+          goto EmitVexEvexR;
+
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvmRmi_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvmRmi:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+
+      // The following instructions use the secondary opcode.
+      opcode &= Opcode::kLL_Mask;
+      opcode |= x86AltOpcodeOf(instInfo);
+
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvmRmvRmi:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rbReg = o1.id();
+
+        if (!Support::test(options, InstOptions::kX86_ModMR))
+          goto EmitVexEvexR;
+
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+
+      // The following instructions use the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = o0.id();
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvmMr:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+
+      // The following instructions use the secondary opcode.
+      opcode = x86AltOpcodeOf(instInfo);
+
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = o1.id();
+        rbReg = o0.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvmMvr_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvmMvr:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode &= Opcode::kLL_Mask;
+      opcode |= x86AltOpcodeOf(instInfo);
+
+      if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o2.id(), o1.id());
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexRvmVmi_Lx_MEvex:
+      opcode.forceEvexIf(o1.isMem());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvmVmi_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvmVmi:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+        goto EmitVexEvexM;
+      }
+
+      // The following instruction uses the secondary opcode.
+      opcode &= Opcode::kLL_Mask | Opcode::kMM_ForceEvex;
+      opcode |= x86AltOpcodeOf(instInfo);
+      opReg = opcode.extractModO();
+
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = x86PackRegAndVvvvv(opReg, o0.id());
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opReg = x86PackRegAndVvvvv(opReg, o0.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexVm_Wx:
+      opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexVm:
+      if (isign3 == ENC_OPS2(Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(opReg, o0.id());
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = x86PackRegAndVvvvv(opReg, o0.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexVmi_Lx_MEvex:
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm))
+        opcode.forceEvex();
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexVmi_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexVmi:
+      immValue = o2.as<Imm>().value();
+      immSize = 1;
+
+CaseVexVmi_AfterImm:
+      if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
+        opReg = x86PackRegAndVvvvv(opReg, o0.id());
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+
+      if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
+        opReg = x86PackRegAndVvvvv(opReg, o0.id());
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingVexVmi4_Wx:
+      opcode.addWIf(Reg::isGpq(o0) || o1.size() == 8);
+      immValue = o2.as<Imm>().value();
+      immSize = 4;
+      goto CaseVexVmi_AfterImm;
+
+    case InstDB::kEncodingVexRvrmRvmr_Lx:
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingVexRvrmRvmr: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+
+        immValue = o3.id() << 4;
+        immSize = 1;
+        goto EmitVexEvexR;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o3;
+
+        immValue = o2.id() << 4;
+        immSize = 1;
+        goto EmitVexEvexM;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+
+        immValue = o3.id() << 4;
+        immSize = 1;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexRvrmiRvmri_Lx: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const Operand_& o4 = opExt[EmitterUtils::kOp4];
+
+      if (ASMJIT_UNLIKELY(!o4.isImm()))
+        goto InvalidInstruction;
+
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size() | o2.size() | o3.size());
+
+      immValue = o4.as<Imm>().valueAs<uint8_t>() & 0x0F;
+      immSize = 1;
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rbReg = o2.id();
+
+        immValue |= o3.id() << 4;
+        goto EmitVexEvexR;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o3;
+
+        immValue |= o2.id() << 4;
+        goto EmitVexEvexM;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+
+        immValue |= o3.id() << 4;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    case InstDB::kEncodingVexMovssMovsd:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        goto CaseVexRvm_R;
+      }
+
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opcode = x86AltOpcodeOf(instInfo);
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    // FMA4 Instructions
+    // -----------------
+
+    case InstDB::kEncodingFma4_Lx:
+      // It's fine to just check the first operand, second is just for sanity.
+      opcode |= x86OpcodeLBySize(o0.size() | o1.size());
+      ASMJIT_FALLTHROUGH;
+
+    case InstDB::kEncodingFma4: {
+      const Operand_& o3 = opExt[EmitterUtils::kOp3];
+      const uint32_t isign4 = isign3 + (uint32_t(o3.opType()) << 9);
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+
+        if (!Support::test(options, InstOptions::kX86_ModMR)) {
+          // MOD/RM - Encoding preferred by LLVM.
+          opcode.addW();
+          rbReg = o3.id();
+
+          immValue = o2.id() << 4;
+          immSize = 1;
+          goto EmitVexEvexR;
+        }
+        else {
+          // MOD/MR - Alternative encoding.
+          rbReg = o2.id();
+
+          immValue = o3.id() << 4;
+          immSize = 1;
+          goto EmitVexEvexR;
+        }
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
+        opcode.addW();
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o3;
+
+        immValue = o2.id() << 4;
+        immSize = 1;
+        goto EmitVexEvexM;
+      }
+
+      if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
+        rmRel = &o2;
+
+        immValue = o3.id() << 4;
+        immSize = 1;
+        goto EmitVexEvexM;
+      }
+      break;
+    }
+
+    // AMX Instructions
+    // ----------------
+
+    case InstDB::kEncodingAmxCfg:
+      if (isign3 == ENC_OPS1(Mem)) {
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingAmxR:
+      if (isign3 == ENC_OPS1(Reg)) {
+        opReg = o0.id();
+        rbReg = 0;
+        goto EmitVexEvexR;
+      }
+      break;
+
+    case InstDB::kEncodingAmxRm:
+      if (isign3 == ENC_OPS2(Reg, Mem)) {
+        opReg = o0.id();
+        rmRel = &o1;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingAmxMr:
+      if (isign3 == ENC_OPS2(Mem, Reg)) {
+        opReg = o1.id();
+        rmRel = &o0;
+        goto EmitVexEvexM;
+      }
+      break;
+
+    case InstDB::kEncodingAmxRmv:
+      if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
+        opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
+        rbReg = o1.id();
+        goto EmitVexEvexR;
+      }
+      break;
+  }
+
+  goto InvalidInstruction;
+
+  // Emit - X86 Opcode
+  // -----------------
+
+EmitX86OpMovAbs:
+  immSize = FastUInt8(registerSize());
+  writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
+
+EmitX86Op:
+  // Emit mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // Emit REX prefix (64-bit only).
+  {
+    uint32_t rex = opcode.extractRex(options);
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+  }
+
+  // Emit instruction opcodes.
+  writer.emitMMAndOpcode(opcode.v);
+  writer.emitImmediate(uint64_t(immValue), immSize);
+  goto EmitDone;
+
+  // Emit - X86 - Opcode + Reg
+  // -------------------------
+
+EmitX86OpReg:
+  // Emit mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // Emit REX prefix (64-bit only).
+  {
+    uint32_t rex = opcode.extractRex(options) | (opReg >> 3); // Rex.B (0x01).
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+
+    opReg &= 0x7;
+  }
+
+  // Emit instruction opcodes.
+  opcode += opReg;
+  writer.emitMMAndOpcode(opcode.v);
+  writer.emitImmediate(uint64_t(immValue), immSize);
+  goto EmitDone;
+
+  // Emit - X86 - Opcode with Implicit <mem> Operand
+  // -----------------------------------------------
+
+EmitX86OpImplicitMem:
+  rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
+  if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasOffset() || (rmInfo & kX86MemInfo_Index)))
+    goto InvalidInstruction;
+
+  // Emit mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // Emit REX prefix (64-bit only).
+  {
+    uint32_t rex = opcode.extractRex(options);
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+  }
+
+  // Emit override prefixes.
+  writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
+  writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
+
+  // Emit instruction opcodes.
+  writer.emitMMAndOpcode(opcode.v);
+
+  // Emit immediate value.
+  writer.emitImmediate(uint64_t(immValue), immSize);
+  goto EmitDone;
+
+  // Emit - X86 - Opcode /r - Register
+  // ---------------------------------
+
+EmitX86R:
+  // Mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // Emit REX prefix (64-bit only).
+  {
+    uint32_t rex = opcode.extractRex(options) |
+                   ((opReg & 0x08) >> 1) | // REX.R (0x04).
+                   ((rbReg & 0x08) >> 3) ; // REX.B (0x01).
+
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+
+    opReg &= 0x07;
+    rbReg &= 0x07;
+  }
+
+  // Emit instruction opcodes.
+  writer.emitMMAndOpcode(opcode.v);
+
+  // Emit ModR.
+  writer.emit8(x86EncodeMod(3, opReg, rbReg));
+
+  // Emit immediate value.
+  writer.emitImmediate(uint64_t(immValue), immSize);
+  goto EmitDone;
+
+  // Emit - X86 - Opcode /r - Memory Base
+  // ------------------------------------
+
+EmitX86RFromM:
+  rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
+  if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasOffset() || (rmInfo & kX86MemInfo_Index)))
+    goto InvalidInstruction;
+
+  // Emit mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // Emit REX prefix (64-bit only).
+  {
+    uint32_t rex = opcode.extractRex(options) |
+                   ((opReg & 0x08) >> 1) | // REX.R (0x04).
+                   ((rbReg       ) >> 3) ; // REX.B (0x01).
+
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+
+    opReg &= 0x07;
+    rbReg &= 0x07;
+  }
+
+  // Emit override prefixes.
+  writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
+  writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
+
+  // Emit instruction opcodes.
+  writer.emitMMAndOpcode(opcode.v);
+
+  // Emit ModR/M.
+  writer.emit8(x86EncodeMod(3, opReg, rbReg));
+
+  // Emit immediate value.
+  writer.emitImmediate(uint64_t(immValue), immSize);
+  goto EmitDone;
+
+  // Emit - X86 - Opcode /r - memory Operand
+  // ---------------------------------------
+
+EmitX86M:
+  // `rmRel` operand must be memory.
+  ASMJIT_ASSERT(rmRel != nullptr);
+  ASMJIT_ASSERT(rmRel->opType() == OperandType::kMem);
+  ASMJIT_ASSERT((opcode & Opcode::kCDSHL_Mask) == 0);
+
+  // Emit override prefixes.
+  rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
+  writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
+
+  memOpAOMark = writer.cursor();
+  writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
+
+  // Emit mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // Emit REX prefix (64-bit only).
+  rbReg = rmRel->as<Mem>().baseId();
+  rxReg = rmRel->as<Mem>().indexId();
+  {
+    uint32_t rex;
+
+    rex  = (rbReg >> 3) & 0x01; // REX.B (0x01).
+    rex |= (rxReg >> 2) & 0x02; // REX.X (0x02).
+    rex |= (opReg >> 1) & 0x04; // REX.R (0x04).
+
+    rex &= rmInfo;
+    rex |= opcode.extractRex(options);
+
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+
+    opReg &= 0x07;
+  }
+
+  // Emit instruction opcodes.
+  writer.emitMMAndOpcode(opcode.v);
+
+  // ... Fall through ...
+
+  // Emit - MOD/SIB
+  // --------------
+
+EmitModSib:
+  if (!(rmInfo & (kX86MemInfo_Index | kX86MemInfo_67H_X86))) {
+    // ==========|> [BASE + DISP8|DISP32].
+    if (rmInfo & kX86MemInfo_BaseGp) {
+      rbReg &= 0x7;
+      relOffset = rmRel->as<Mem>().offsetLo32();
+
+      uint32_t mod = x86EncodeMod(0, opReg, rbReg);
+      bool forceSIB = commonInfo->isTsibOp();
+
+      if (rbReg == Gp::kIdSp || forceSIB) {
+        // TSIB or [XSP|R12].
+        mod = (mod & 0xF8u) | 0x04u;
+        if (rbReg != Gp::kIdBp && relOffset == 0) {
+          writer.emit8(mod);
+          writer.emit8(x86EncodeSib(0, 4, rbReg));
+        }
+        // TSIB or [XSP|R12 + DISP8|DISP32].
+        else {
+          uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
+          int32_t cdOffset = relOffset >> cdShift;
+
+          if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
+            writer.emit8(mod + 0x40); // <- MOD(1, opReg, rbReg).
+            writer.emit8(x86EncodeSib(0, 4, rbReg));
+            writer.emit8(cdOffset & 0xFF);
+          }
+          else {
+            writer.emit8(mod + 0x80); // <- MOD(2, opReg, rbReg).
+            writer.emit8(x86EncodeSib(0, 4, rbReg));
+            writer.emit32uLE(uint32_t(relOffset));
+          }
+        }
+      }
+      else if (rbReg != Gp::kIdBp && relOffset == 0) {
+        // [BASE].
+        writer.emit8(mod);
+      }
+      else {
+        // [BASE + DISP8|DISP32].
+        uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
+        int32_t cdOffset = relOffset >> cdShift;
+
+        if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
+          writer.emit8(mod + 0x40);
+          writer.emit8(cdOffset & 0xFF);
+        }
+        else {
+          writer.emit8(mod + 0x80);
+          writer.emit32uLE(uint32_t(relOffset));
+        }
+      }
+    }
+    // ==========|> [ABSOLUTE | DISP32].
+    else if (!(rmInfo & (kX86MemInfo_BaseLabel | kX86MemInfo_BaseRip))) {
+      Mem::AddrType addrType = rmRel->as<Mem>().addrType();
+      relOffset = rmRel->as<Mem>().offsetLo32();
+
+      if (is32Bit()) {
+        // Explicit relative addressing doesn't work in 32-bit mode.
+        if (ASMJIT_UNLIKELY(addrType == Mem::AddrType::kRel))
+          goto InvalidAddress;
+
+        writer.emit8(x86EncodeMod(0, opReg, 5));
+        writer.emit32uLE(uint32_t(relOffset));
+      }
+      else {
+        bool isOffsetI32 = rmRel->as<Mem>().offsetHi32() == (relOffset >> 31);
+        bool isOffsetU32 = rmRel->as<Mem>().offsetHi32() == 0;
+        uint64_t baseAddress = code()->baseAddress();
+
+        // If relative addressing was not explicitly set then we can try to guess. By guessing we check some
+        // properties of the memory operand and try to base the decision on the segment prefix and the address type.
+        if (addrType == Mem::AddrType::kDefault) {
+          if (baseAddress == Globals::kNoBaseAddress) {
+            // Prefer absolute addressing mode if the offset is 32-bit.
+            addrType = isOffsetI32 || isOffsetU32 ? Mem::AddrType::kAbs
+                                                  : Mem::AddrType::kRel;
+          }
+          else {
+            // Prefer absolute addressing mode if FS|GS segment override is present.
+            bool hasFsGs = rmRel->as<Mem>().segmentId() >= SReg::kIdFs;
+            // Prefer absolute addressing mode if this is LEA with 32-bit immediate.
+            bool isLea32 = (instId == Inst::kIdLea) && (isOffsetI32 || isOffsetU32);
+
+            addrType = hasFsGs || isLea32 ? Mem::AddrType::kAbs
+                                          : Mem::AddrType::kRel;
+          }
+        }
+
+        if (addrType == Mem::AddrType::kRel) {
+          uint32_t kModRel32Size = 5;
+          uint64_t virtualOffset = uint64_t(writer.offsetFrom(_bufferData)) + immSize + kModRel32Size;
+
+          if (baseAddress == Globals::kNoBaseAddress || _section->id() != 0) {
+            // Create a new RelocEntry as we cannot calculate the offset right now.
+            err = _code->newRelocEntry(&re, RelocType::kAbsToRel);
+            if (ASMJIT_UNLIKELY(err))
+              goto Failed;
+
+            writer.emit8(x86EncodeMod(0, opReg, 5));
+
+            re->_sourceSectionId = _section->id();
+            re->_sourceOffset = offset();
+            re->_format.resetToSimpleValue(OffsetType::kSignedOffset, 4);
+            re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
+            re->_payload = uint64_t(rmRel->as<Mem>().offset());
+
+            writer.emit32uLE(0);
+            writer.emitImmediate(uint64_t(immValue), immSize);
+            goto EmitDone;
+          }
+          else {
+            uint64_t rip64 = baseAddress + _section->offset() + virtualOffset;
+            uint64_t rel64 = uint64_t(rmRel->as<Mem>().offset()) - rip64;
+
+            if (Support::isInt32(int64_t(rel64))) {
+              writer.emit8(x86EncodeMod(0, opReg, 5));
+              writer.emit32uLE(uint32_t(rel64 & 0xFFFFFFFFu));
+              writer.emitImmediate(uint64_t(immValue), immSize);
+              goto EmitDone;
+            }
+            else {
+              // We must check the original address type as we have modified
+              // `addrType`. We failed if the original address type is 'rel'.
+              if (ASMJIT_UNLIKELY(rmRel->as<Mem>().isRel()))
+                goto InvalidAddress;
+            }
+          }
+        }
+
+        // Handle unsigned 32-bit address that doesn't work with sign extension. Consider the following instructions:
+        //
+        //   1. lea rax, [-1]         - Sign extended to 0xFFFFFFFFFFFFFFFF
+        //   2. lea rax, [0xFFFFFFFF] - Zero extended to 0x00000000FFFFFFFF
+        //   3. add rax, [-1]         - Sign extended to 0xFFFFFFFFFFFFFFFF
+        //   4. add rax, [0xFFFFFFFF] - Zero extended to 0x00000000FFFFFFFF
+        //
+        // Sign extension is naturally performed by the CPU so we don't have to bother, however, zero extension
+        // requires address-size override prefix, which we probably don't have at this moment. So to make the address
+        // valid we need to insert it at `memOpAOMark` if it's not already there.
+        //
+        // If this is 'lea' instruction then it's possible to remove REX.W part from REX prefix (if it's there), which
+        // would be one-byte shorter than inserting address-size override.
+        //
+        // NOTE: If we don't do this then these instructions are unencodable.
+        if (!isOffsetI32) {
+          // 64-bit absolute address is unencodable.
+          if (ASMJIT_UNLIKELY(!isOffsetU32))
+            goto InvalidAddress64Bit;
+
+          // We only patch the existing code if we don't have address-size override.
+          if (*memOpAOMark != 0x67) {
+            if (instId == Inst::kIdLea) {
+              // LEA: Remove REX.W, if present. This is easy as we know that 'lea' doesn't use any PP prefix so if REX
+              // prefix was emitted it would be at `memOpAOMark`.
+              uint32_t rex = *memOpAOMark;
+              if (rex & kX86ByteRex) {
+                rex &= (~kX86ByteRexW) & 0xFF;
+                *memOpAOMark = uint8_t(rex);
+
+                // We can remove the REX prefix completely if it was not forced.
+                if (rex == kX86ByteRex && !Support::test(options, InstOptions::kX86_Rex))
+                  writer.remove8(memOpAOMark);
+              }
+            }
+            else {
+              // Any other instruction: Insert address-size override prefix.
+              writer.insert8(memOpAOMark, 0x67);
+            }
+          }
+        }
+
+        // Emit 32-bit absolute address.
+        writer.emit8(x86EncodeMod(0, opReg, 4));
+        writer.emit8(x86EncodeSib(0, 4, 5));
+        writer.emit32uLE(uint32_t(relOffset));
+      }
+    }
+    // ==========|> [LABEL|RIP + DISP32]
+    else {
+      writer.emit8(x86EncodeMod(0, opReg, 5));
+
+      if (is32Bit()) {
+EmitModSib_LabelRip_X86:
+        if (ASMJIT_UNLIKELY(_code->_relocations.willGrow(_code->allocator()) != kErrorOk))
+          goto OutOfMemory;
+
+        relOffset = rmRel->as<Mem>().offsetLo32();
+        if (rmInfo & kX86MemInfo_BaseLabel) {
+          // [LABEL->ABS].
+          label = _code->labelEntry(rmRel->as<Mem>().baseId());
+          if (ASMJIT_UNLIKELY(!label))
+            goto InvalidLabel;
+
+          err = _code->newRelocEntry(&re, RelocType::kRelToAbs);
+          if (ASMJIT_UNLIKELY(err))
+            goto Failed;
+
+          re->_sourceSectionId = _section->id();
+          re->_sourceOffset = offset();
+          re->_format.resetToSimpleValue(OffsetType::kUnsignedOffset, 4);
+          re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
+          re->_payload = uint64_t(int64_t(relOffset));
+
+          if (label->isBound()) {
+            // Label bound to the current section.
+            re->_payload += label->offset();
+            re->_targetSectionId = label->section()->id();
+            writer.emit32uLE(0);
+          }
+          else {
+            // Non-bound label or label bound to a different section.
+            relOffset = -4 - immSize;
+            relSize = 4;
+            goto EmitRel;
+          }
+        }
+        else {
+          // [RIP->ABS].
+          err = _code->newRelocEntry(&re, RelocType::kRelToAbs);
+          if (ASMJIT_UNLIKELY(err))
+            goto Failed;
+
+          re->_sourceSectionId = _section->id();
+          re->_targetSectionId = _section->id();
+          re->_format.resetToSimpleValue(OffsetType::kUnsignedOffset, 4);
+          re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
+          re->_sourceOffset = offset();
+          re->_payload = re->_sourceOffset + re->_format.regionSize() + uint64_t(int64_t(relOffset));
+
+          writer.emit32uLE(0);
+        }
+      }
+      else {
+        relOffset = rmRel->as<Mem>().offsetLo32();
+        if (rmInfo & kX86MemInfo_BaseLabel) {
+          // [RIP].
+          label = _code->labelEntry(rmRel->as<Mem>().baseId());
+          if (ASMJIT_UNLIKELY(!label))
+            goto InvalidLabel;
+
+          relOffset -= (4 + immSize);
+          if (label->isBoundTo(_section)) {
+            // Label bound to the current section.
+            relOffset += int32_t(label->offset() - writer.offsetFrom(_bufferData));
+            writer.emit32uLE(uint32_t(relOffset));
+          }
+          else {
+            // Non-bound label or label bound to a different section.
+            relSize = 4;
+            goto EmitRel;
+          }
+        }
+        else {
+          // [RIP].
+          writer.emit32uLE(uint32_t(relOffset));
+        }
+      }
+    }
+  }
+  else if (!(rmInfo & kX86MemInfo_67H_X86)) {
+    // ESP|RSP can't be used as INDEX in pure SIB mode, however, VSIB mode allows XMM4|YMM4|ZMM4 (that's why the
+    // check is before the label).
+    if (ASMJIT_UNLIKELY(rxReg == Gp::kIdSp))
+      goto InvalidAddressIndex;
+
+EmitModVSib:
+    rxReg &= 0x7;
+
+    // ==========|> [BASE + INDEX + DISP8|DISP32].
+    if (rmInfo & kX86MemInfo_BaseGp) {
+      rbReg &= 0x7;
+      relOffset = rmRel->as<Mem>().offsetLo32();
+
+      uint32_t mod = x86EncodeMod(0, opReg, 4);
+      uint32_t sib = x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, rbReg);
+
+      if (relOffset == 0 && rbReg != Gp::kIdBp) {
+        // [BASE + INDEX << SHIFT].
+        writer.emit8(mod);
+        writer.emit8(sib);
+      }
+      else {
+        uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
+        int32_t cdOffset = relOffset >> cdShift;
+
+        if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
+          // [BASE + INDEX << SHIFT + DISP8].
+          writer.emit8(mod + 0x40); // <- MOD(1, opReg, 4).
+          writer.emit8(sib);
+          writer.emit8(uint32_t(cdOffset));
+        }
+        else {
+          // [BASE + INDEX << SHIFT + DISP32].
+          writer.emit8(mod + 0x80); // <- MOD(2, opReg, 4).
+          writer.emit8(sib);
+          writer.emit32uLE(uint32_t(relOffset));
+        }
+      }
+    }
+    // ==========|> [INDEX + DISP32].
+    else if (!(rmInfo & (kX86MemInfo_BaseLabel | kX86MemInfo_BaseRip))) {
+      // [INDEX << SHIFT + DISP32].
+      writer.emit8(x86EncodeMod(0, opReg, 4));
+      writer.emit8(x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, 5));
+
+      relOffset = rmRel->as<Mem>().offsetLo32();
+      writer.emit32uLE(uint32_t(relOffset));
+    }
+    // ==========|> [LABEL|RIP + INDEX + DISP32].
+    else {
+      if (is32Bit()) {
+        writer.emit8(x86EncodeMod(0, opReg, 4));
+        writer.emit8(x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, 5));
+        goto EmitModSib_LabelRip_X86;
+      }
+      else {
+        // NOTE: This also handles VSIB+RIP, which is not allowed in 64-bit mode.
+        goto InvalidAddress;
+      }
+    }
+  }
+  else {
+    // 16-bit address mode (32-bit mode with 67 override prefix).
+    relOffset = (int32_t(rmRel->as<Mem>().offsetLo32()) << 16) >> 16;
+
+    // NOTE: 16-bit addresses don't use SIB byte and their encoding differs. We use a table-based approach to
+    // calculate the proper MOD byte as it's easier. Also, not all BASE [+ INDEX] combinations are supported
+    // in 16-bit mode, so this may fail.
+    const uint32_t kBaseGpIdx = (kX86MemInfo_BaseGp | kX86MemInfo_Index);
+
+    if (rmInfo & kBaseGpIdx) {
+      // ==========|> [BASE + INDEX + DISP16].
+      uint32_t mod;
+
+      rbReg &= 0x7;
+      rxReg &= 0x7;
+
+      if ((rmInfo & kBaseGpIdx) == kBaseGpIdx) {
+        uint32_t shf = rmRel->as<Mem>().shift();
+        if (ASMJIT_UNLIKELY(shf != 0))
+          goto InvalidAddress;
+        mod = x86Mod16BaseIndexTable[(rbReg << 3) + rxReg];
+      }
+      else {
+        if (rmInfo & kX86MemInfo_Index)
+          rbReg = rxReg;
+        mod = x86Mod16BaseTable[rbReg];
+      }
+
+      if (ASMJIT_UNLIKELY(mod == 0xFF))
+        goto InvalidAddress;
+
+      mod += opReg << 3;
+      if (relOffset == 0 && mod != 0x06) {
+        writer.emit8(mod);
+      }
+      else if (Support::isInt8(relOffset)) {
+        writer.emit8(mod + 0x40);
+        writer.emit8(uint32_t(relOffset));
+      }
+      else {
+        writer.emit8(mod + 0x80);
+        writer.emit16uLE(uint32_t(relOffset));
+      }
+    }
+    else {
+      // Not supported in 16-bit addresses.
+      if (rmInfo & (kX86MemInfo_BaseRip | kX86MemInfo_BaseLabel))
+        goto InvalidAddress;
+
+      // ==========|> [DISP16].
+      writer.emit8(opReg | 0x06);
+      writer.emit16uLE(uint32_t(relOffset));
+    }
+  }
+
+  writer.emitImmediate(uint64_t(immValue), immSize);
+  goto EmitDone;
+
+  // Emit - FPU
+  // ----------
+
+EmitFpuOp:
+  // Mandatory instruction prefix.
+  writer.emitPP(opcode.v);
+
+  // FPU instructions consist of two opcodes.
+  writer.emit8(opcode.v >> Opcode::kFPU_2B_Shift);
+  writer.emit8(opcode.v);
+  goto EmitDone;
+
+  // Emit - VEX Opcode
+  // -----------------
+
+EmitVexOp:
+  {
+    // These don't use immediate.
+    ASMJIT_ASSERT(immSize == 0);
+
+    // Only 'vzeroall' and 'vzeroupper' instructions use this encoding, they don't define 'W' to be '1' so we can
+    // just check the 'mmmmm' field. Both functions can encode by using VEX2 prefix so VEX3 is basically only used
+    // when specified as instruction option.
+    ASMJIT_ASSERT((opcode & Opcode::kW) == 0);
+
+    uint32_t x = (uint32_t(opcode  & Opcode::kMM_Mask      ) >> (Opcode::kMM_Shift     )) |
+                 (uint32_t(opcode  & Opcode::kLL_Mask      ) >> (Opcode::kLL_Shift - 10)) |
+                 (uint32_t(opcode  & Opcode::kPP_VEXMask   ) >> (Opcode::kPP_Shift -  8)) ;
+
+    if (Support::test(options, InstOptions::kX86_Vex3)) {
+      x  = (x & 0xFFFF) << 8;                               // [00000000|00000Lpp|000mmmmm|00000000].
+      x ^= (kX86ByteVex3) |                                 // [........|00000Lpp|000mmmmm|__VEX3__].
+           (0x07u  << 13) |                                 // [........|00000Lpp|111mmmmm|__VEX3__].
+           (0x0Fu  << 19) |                                 // [........|01111Lpp|111mmmmm|__VEX3__].
+           (opcode << 24) ;                                 // [_OPCODE_|01111Lpp|111mmmmm|__VEX3__].
+
+      writer.emit32uLE(x);
+      goto EmitDone;
+    }
+    else {
+      x = ((x >> 8) ^ x) ^ 0xF9;
+      writer.emit8(kX86ByteVex2);
+      writer.emit8(x);
+      writer.emit8(opcode.v);
+      goto EmitDone;
+    }
+  }
+
+  // Emit - VEX|EVEX - /r - Register
+  // -------------------------------
+
+EmitVexEvexR:
+  {
+    // Construct `x` - a complete EVEX|VEX prefix.
+    uint32_t x = ((opReg << 4) & 0xF980u) |                 // [........|........|Vvvvv..R|R.......].
+                 ((rbReg << 2) & 0x0060u) |                 // [........|........|........|.BB.....].
+                 (opcode.extractLLMMMMM(options)) |         // [........|.LL.....|Vvvvv..R|RBBmmmmm].
+                 (_extraReg.id() << 16);                    // [........|.LL..aaa|Vvvvv..R|RBBmmmmm].
+    opReg &= 0x7;
+
+    // Handle AVX512 options by a single branch.
+    const InstOptions kAvx512Options = InstOptions::kX86_ZMask | InstOptions::kX86_ER | InstOptions::kX86_SAE;
+    if (Support::test(options, kAvx512Options)) {
+      static constexpr uint32_t kBcstMask = 0x1 << 20;
+      static constexpr uint32_t kLLMask10 = 0x2 << 21;
+      static constexpr uint32_t kLLMask11 = 0x3 << 21;
+
+      // Designed to be easily encodable so the position must be exact. The {rz-sae} is encoded as {11},
+      // so it should match the mask.
+      static_assert(uint32_t(InstOptions::kX86_RZ_SAE) == kLLMask11,
+                    "This code requires InstOptions::X86_RZ_SAE to match kLLMask11 to work properly");
+
+      x |= uint32_t(options & InstOptions::kX86_ZMask);     // [........|zLLb.aaa|Vvvvv..R|RBBmmmmm].
+
+      // Support embedded-rounding {er} and suppress-all-exceptions {sae}.
+      if (Support::test(options, InstOptions::kX86_ER | InstOptions::kX86_SAE)) {
+        // Embedded rounding is only encodable if the instruction is either scalar or it's a 512-bit
+        // operation as the {er} rounding predicate collides with LL part of the instruction.
+        if ((x & kLLMask11) != kLLMask10) {
+          // Ok, so LL is not 10, thus the instruction must be scalar. Scalar instructions don't
+          // support broadcast so if this instruction supports it {er} nor {sae} would be encodable.
+          if (ASMJIT_UNLIKELY(commonInfo->hasAvx512B()))
+            goto InvalidEROrSAE;
+        }
+
+        if (Support::test(options, InstOptions::kX86_ER)) {
+          if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512ER()))
+            goto InvalidEROrSAE;
+
+          x &=~kLLMask11;                                   // [........|.00..aaa|Vvvvv..R|RBBmmmmm].
+          x |= kBcstMask | (uint32_t(options) & kLLMask11); // [........|.LLb.aaa|Vvvvv..R|RBBmmmmm].
+        }
+        else {
+          if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512SAE()))
+            goto InvalidEROrSAE;
+
+          x &=~kLLMask11;                                   // [........|.00..aaa|Vvvvv..R|RBBmmmmm].
+          x |= kBcstMask;                                   // [........|.00b.aaa|Vvvvv..R|RBBmmmmm].
+        }
+      }
+    }
+
+    // These bits would force EVEX prefix.
+    constexpr uint32_t kEvexForce = 0x00000010u;            // [........|........|........|...x....].
+    constexpr uint32_t kEvexBits = 0x00D78150u;             // [........|xx.x.xxx|x......x|.x.x....].
+
+    // Force EVEX prefix even in case the instruction has VEX encoding, because EVEX encoding is preferred. At the
+    // moment this is only required by AVX_VNNI instructions, which were added after AVX512_VNNI instructions. If
+    // such instruction doesn't specify prefix, EVEX (AVX512_VNNI) is selected by default.
+    if (commonInfo->preferEvex()) {
+      if ((x & kEvexBits) == 0 && !Support::test(options, InstOptions::kX86_Vex | InstOptions::kX86_Vex3)) {
+        x |= kEvexForce;
+      }
+    }
+
+    // Check if EVEX is required by checking bits in `x` :     [........|xx.x.xxx|x......x|.x.x....].
+    if (x & kEvexBits) {
+      uint32_t y = ((x << 4) & 0x00080000u) |               // [........|...bV...|........|........].
+                   ((x >> 4) & 0x00000010u) ;               // [........|...bV...|........|...R....].
+      x  = (x & 0x00FF78EFu) | y;                           // [........|zLLbVaaa|0vvvv000|RBBRmmmm].
+      x  = x << 8;                                          // [zLLbVaaa|0vvvv000|RBBRmmmm|00000000].
+      x |= (opcode >> kVSHR_W    ) & 0x00800000u;           // [zLLbVaaa|Wvvvv000|RBBRmmmm|00000000].
+      x |= (opcode >> kVSHR_PP_EW) & 0x00830000u;           // [zLLbVaaa|Wvvvv0pp|RBBRmmmm|00000000] (added PP and EVEX.W).
+                                                            //      _     ____    ____
+      x ^= 0x087CF000u | kX86ByteEvex;                      // [zLLbVaaa|Wvvvv1pp|RBBRmmmm|01100010].
+
+      writer.emit32uLE(x);
+      writer.emit8(opcode.v);
+
+      rbReg &= 0x7;
+      writer.emit8(x86EncodeMod(3, opReg, rbReg));
+      writer.emitImmByteOrDWord(uint64_t(immValue), immSize);
+      goto EmitDone;
+    }
+
+    // Not EVEX, prepare `x` for VEX2 or VEX3:             x = [........|00L00000|0vvvv000|R0Bmmmmm].
+    x |= ((opcode >> (kVSHR_W  + 8)) & 0x8000u) |           // [00000000|00L00000|Wvvvv000|R0Bmmmmm].
+         ((opcode >> (kVSHR_PP + 8)) & 0x0300u) |           // [00000000|00L00000|0vvvv0pp|R0Bmmmmm].
+         ((x      >> 11            ) & 0x0400u) ;           // [00000000|00L00000|WvvvvLpp|R0Bmmmmm].
+    x |= x86GetForceEvex3MaskInLastBit(options);            // [x0000000|00L00000|WvvvvLpp|R0Bmmmmm].
+
+    // Check if VEX3 is required / forced:                     [x.......|........|x.......|..xxxxx.].
+    if (x & 0x8000803Eu) {
+      uint32_t xorMsk = x86VEXPrefix[x & 0xF] | (opcode << 24);
+
+      // Clear all high bits.
+      x  = (x & 0xFFFF) << 8;                               // [00000000|WvvvvLpp|R0Bmmmmm|00000000].
+                                                            //            ____    _ _
+      x ^= xorMsk;                                          // [_OPCODE_|WvvvvLpp|R1Bmmmmm|VEX3|XOP].
+      writer.emit32uLE(x);
+
+      rbReg &= 0x7;
+      writer.emit8(x86EncodeMod(3, opReg, rbReg));
+      writer.emitImmByteOrDWord(uint64_t(immValue), immSize);
+      goto EmitDone;
+    }
+    else {
+      // 'mmmmm' must be '00001'.
+      ASMJIT_ASSERT((x & 0x1F) == 0x01);
+
+      x = ((x >> 8) ^ x) ^ 0xF9;
+      writer.emit8(kX86ByteVex2);
+      writer.emit8(x);
+      writer.emit8(opcode.v);
+
+      rbReg &= 0x7;
+      writer.emit8(x86EncodeMod(3, opReg, rbReg));
+      writer.emitImmByteOrDWord(uint64_t(immValue), immSize);
+      goto EmitDone;
+    }
+  }
+
+  // Emit - VEX|EVEX - /r - Memory
+  // -----------------------------
+
+EmitVexEvexM:
+  ASMJIT_ASSERT(rmRel != nullptr);
+  ASMJIT_ASSERT(rmRel->opType() == OperandType::kMem);
+
+  rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
+  writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
+
+  memOpAOMark = writer.cursor();
+  writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
+
+  rbReg = rmRel->as<Mem>().hasBaseReg()  ? rmRel->as<Mem>().baseId()  : uint32_t(0);
+  rxReg = rmRel->as<Mem>().hasIndexReg() ? rmRel->as<Mem>().indexId() : uint32_t(0);
+
+  {
+    uint32_t broadcastBit = uint32_t(rmRel->as<Mem>().hasBroadcast());
+
+    // Construct `x` - a complete EVEX|VEX prefix.
+    uint32_t x = ((opReg <<  4) & 0x0000F980u)  |           // [........|........|Vvvvv..R|R.......].
+                 ((rxReg <<  3) & 0x00000040u)  |           // [........|........|........|.X......].
+                 ((rxReg << 15) & 0x00080000u)  |           // [........|....X...|........|........].
+                 ((rbReg <<  2) & 0x00000020u)  |           // [........|........|........|..B.....].
+                 opcode.extractLLMMMMM(options) |           // [........|.LL.X...|Vvvvv..R|RXBmmmmm].
+                 (_extraReg.id()    << 16)      |           // [........|.LL.Xaaa|Vvvvv..R|RXBmmmmm].
+                 (broadcastBit      << 20)      ;           // [........|.LLbXaaa|Vvvvv..R|RXBmmmmm].
+    opReg &= 0x07u;
+
+    // Mark invalid VEX (force EVEX) case:                  // [@.......|.LLbXaaa|Vvvvv..R|RXBmmmmm].
+    x |= uint32_t(~commonInfo->flags() & InstDB::InstFlags::kVex) << (31 - Support::ConstCTZ<uint32_t(InstDB::InstFlags::kVex)>::value);
+
+    // Handle AVX512 options by a single branch.
+    const InstOptions kAvx512Options = InstOptions::kX86_ZMask   |
+                                       InstOptions::kX86_ER      |
+                                       InstOptions::kX86_SAE     ;
+    if (Support::test(options, kAvx512Options)) {
+      // {er} and {sae} are both invalid if memory operand is used.
+      if (ASMJIT_UNLIKELY(Support::test(options, InstOptions::kX86_ER | InstOptions::kX86_SAE)))
+        goto InvalidEROrSAE;
+
+      x |= uint32_t(options & InstOptions::kX86_ZMask);     // [@.......|zLLbXaaa|Vvvvv..R|RXBmmmmm].
+    }
+
+    // If these bits are used then EVEX prefix is required.
+    constexpr uint32_t kEvexForce = 0x00000010u;            // [........|........|........|...x....].
+    constexpr uint32_t kEvexBits = 0x80DF8110u;             // [@.......|xx.xxxxx|x......x|...x....].
+
+    // Force EVEX prefix even in case the instruction has VEX encoding, because EVEX encoding is preferred. At the
+    // moment this is only required for AVX_VNNI instructions, which were added after AVX512_VNNI instructions. If
+    // such instruction doesn't specify prefix, EVEX (AVX512_VNNI) would be used by default,
+    if (commonInfo->preferEvex()) {
+      if ((x & kEvexBits) == 0 && !Support::test(options, InstOptions::kX86_Vex | InstOptions::kX86_Vex3)) {
+        x |= kEvexForce;
+      }
+    }
+
+    // Check if EVEX is required by checking bits in `x` :     [@.......|xx.xxxxx|x......x|...x....].
+    if (x & kEvexBits) {
+      uint32_t y = ((x << 4) & 0x00080000u) |               // [@.......|....V...|........|........].
+                   ((x >> 4) & 0x00000010u) ;               // [@.......|....V...|........|...R....].
+      x  = (x & 0x00FF78EFu) | y;                           // [........|zLLbVaaa|0vvvv000|RXBRmmmm].
+      x  = x << 8;                                          // [zLLbVaaa|0vvvv000|RBBRmmmm|00000000].
+      x |= (opcode >> kVSHR_W    ) & 0x00800000u;           // [zLLbVaaa|Wvvvv000|RBBRmmmm|00000000].
+      x |= (opcode >> kVSHR_PP_EW) & 0x00830000u;           // [zLLbVaaa|Wvvvv0pp|RBBRmmmm|00000000] (added PP and EVEX.W).
+                                                            //      _     ____    ____
+      x ^= 0x087CF000u | kX86ByteEvex;                      // [zLLbVaaa|Wvvvv1pp|RBBRmmmm|01100010].
+
+      if (x & 0x10000000u) {
+        // Broadcast support.
+        //
+        // 1. Verify our LL field is correct as broadcast changes the "size" of the source operand. For example if
+        //    a broadcasted operand is qword_ptr[X] {1to8} the source size becomes 64 and not 8 as the memory operand
+        //    would report.
+        //
+        // 2. Change the compressed displacement scale to either x2 (SHL1), x4 (SHL 2), or x8 (SHL 3) depending on
+        //    the broadcast unit/element size.
+        uint32_t broadcastUnitSize = commonInfo->broadcastSize();
+        uint32_t broadcastVectorSize = broadcastUnitSize << uint32_t(rmRel->as<Mem>().getBroadcast());
+
+        if (ASMJIT_UNLIKELY(broadcastUnitSize == 0))
+          goto InvalidBroadcast;
+
+        // LL was already shifted 8 bits right.
+        constexpr uint32_t kLLShift = 21 + 8;
+
+        uint32_t currentLL = x & (0x3u << kLLShift);
+        uint32_t broadcastLL = (Support::max<uint32_t>(Support::ctz(broadcastVectorSize), 4) - 4) << kLLShift;
+
+        if (broadcastLL > (2u << kLLShift))
+          goto InvalidBroadcast;
+
+        uint32_t newLL = Support::max(currentLL, broadcastLL);
+        x = (x & ~(uint32_t(0x3) << kLLShift)) | newLL;
+
+        opcode &=~uint32_t(Opcode::kCDSHL_Mask);
+        opcode |= Support::ctz(broadcastUnitSize) << Opcode::kCDSHL_Shift;
+      }
+      else {
+        // Add the compressed displacement 'SHF' to the opcode based on 'TTWLL'.
+        // The index to `x86CDisp8SHL` is composed as `CDTT[4:3] | W[2] | LL[1:0]`.
+        uint32_t TTWLL = ((opcode >> (Opcode::kCDTT_Shift - 3)) & 0x18) +
+                         ((opcode >> (Opcode::kW_Shift    - 2)) & 0x04) +
+                         ((x >> 29) & 0x3);
+        opcode += x86CDisp8SHL[TTWLL];
+      }
+
+      writer.emit32uLE(x);
+      writer.emit8(opcode.v);
+    }
+    else {
+      // Not EVEX, prepare `x` for VEX2 or VEX3:           x = [........|00L00000|0vvvv000|RXBmmmmm].
+      x |= ((opcode >> (kVSHR_W  + 8)) & 0x8000u) |         // [00000000|00L00000|Wvvvv000|RXBmmmmm].
+           ((opcode >> (kVSHR_PP + 8)) & 0x0300u) |         // [00000000|00L00000|Wvvvv0pp|RXBmmmmm].
+           ((x      >> 11            ) & 0x0400u) ;         // [00000000|00L00000|WvvvvLpp|RXBmmmmm].
+      x |= x86GetForceEvex3MaskInLastBit(options);          // [x0000000|00L00000|WvvvvLpp|RXBmmmmm].
+
+      // Clear a possible CDisp specified by EVEX.
+      opcode &= ~Opcode::kCDSHL_Mask;
+
+      // Check if VEX3 is required / forced:                   [x.......|........|x.......|.xxxxxx.].
+      if (x & 0x8000807Eu) {
+        uint32_t xorMsk = x86VEXPrefix[x & 0xF] | (opcode << 24);
+
+        // Clear all high bits.
+        x  = (x & 0xFFFF) << 8;                             // [00000000|WvvvvLpp|RXBmmmmm|00000000].
+                                                            //            ____    ___
+        x ^= xorMsk;                                        // [_OPCODE_|WvvvvLpp|RXBmmmmm|VEX3_XOP].
+        writer.emit32uLE(x);
+      }
+      else {
+        // 'mmmmm' must be '00001'.
+        ASMJIT_ASSERT((x & 0x1F) == 0x01);
+
+        x = ((x >> 8) ^ x) ^ 0xF9;
+        writer.emit8(kX86ByteVex2);
+        writer.emit8(x);
+        writer.emit8(opcode.v);
+      }
+    }
+  }
+
+  // MOD|SIB address.
+  if (!commonInfo->hasFlag(InstDB::InstFlags::kVsib))
+    goto EmitModSib;
+
+  // MOD|VSIB address without INDEX is invalid.
+  if (rmInfo & kX86MemInfo_Index)
+    goto EmitModVSib;
+  goto InvalidInstruction;
+
+  // Emit - Jmp/Jcc/Call
+  // -------------------
+
+EmitJmpCall:
+  {
+    // Emit REX prefix if asked for (64-bit only).
+    uint32_t rex = opcode.extractRex(options);
+    if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
+      goto InvalidRexPrefix;
+    rex &= ~kX86ByteInvalidRex & 0xFF;
+    writer.emit8If(rex | kX86ByteRex, rex != 0);
+
+    uint64_t ip = uint64_t(writer.offsetFrom(_bufferData));
+    uint32_t rel32 = 0;
+    uint32_t opCode8 = x86AltOpcodeOf(instInfo);
+
+    uint32_t inst8Size  = 1 + 1; //          OPCODE + REL8 .
+    uint32_t inst32Size = 1 + 4; // [PREFIX] OPCODE + REL32.
+
+    // Jcc instructions with 32-bit displacement use 0x0F prefix,
+    // other instructions don't. No other prefixes are used by X86.
+    ASMJIT_ASSERT((opCode8 & Opcode::kMM_Mask) == 0);
+    ASMJIT_ASSERT((opcode  & Opcode::kMM_Mask) == 0 ||
+                  (opcode  & Opcode::kMM_Mask) == Opcode::kMM_0F);
+
+    // Only one of these should be used at the same time.
+    inst32Size += uint32_t(opReg != 0);
+    inst32Size += uint32_t((opcode & Opcode::kMM_Mask) == Opcode::kMM_0F);
+
+    if (rmRel->isLabel()) {
+      label = _code->labelEntry(rmRel->as<Label>());
+      if (ASMJIT_UNLIKELY(!label))
+        goto InvalidLabel;
+
+      if (label->isBoundTo(_section)) {
+        // Label bound to the current section.
+        rel32 = uint32_t((label->offset() - ip - inst32Size) & 0xFFFFFFFFu);
+        goto EmitJmpCallRel;
+      }
+      else {
+        // Non-bound label or label bound to a different section.
+        if (opCode8 && (!opcode.v || Support::test(options, InstOptions::kShortForm))) {
+          writer.emit8(opCode8);
+
+          // Record DISP8 (non-bound label).
+          relOffset = -1;
+          relSize = 1;
+          goto EmitRel;
+        }
+        else {
+          // Refuse also 'short' prefix, if specified.
+          if (ASMJIT_UNLIKELY(!opcode.v || Support::test(options, InstOptions::kShortForm)))
+            goto InvalidDisplacement;
+
+          writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0);// Emit 0F prefix.
+          writer.emit8(opcode.v);                                // Emit opcode.
+          writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0); // Emit MOD.
+
+          // Record DISP32 (non-bound label).
+          relOffset = -4;
+          relSize = 4;
+          goto EmitRel;
+        }
+      }
+    }
+
+    if (rmRel->isImm()) {
+      uint64_t baseAddress = code()->baseAddress();
+      uint64_t jumpAddress = rmRel->as<Imm>().valueAs<uint64_t>();
+
+      // If the base-address is known calculate a relative displacement and check if it fits in 32 bits (which is
+      // always true in 32-bit mode). Emit relative displacement as it was a bound label if all checks are ok.
+      if (baseAddress != Globals::kNoBaseAddress) {
+        uint64_t rel64 = jumpAddress - (ip + baseAddress) - inst32Size;
+        if (Environment::is32Bit(arch()) || Support::isInt32(int64_t(rel64))) {
+          rel32 = uint32_t(rel64 & 0xFFFFFFFFu);
+          goto EmitJmpCallRel;
+        }
+        else {
+          // Relative displacement exceeds 32-bits - relocator can only insert trampoline for jmp/call, but not
+          // for jcc/jecxz.
+          if (ASMJIT_UNLIKELY(!x86IsJmpOrCall(instId)))
+            goto InvalidDisplacement;
+        }
+      }
+
+      err = _code->newRelocEntry(&re, RelocType::kAbsToRel);
+      if (ASMJIT_UNLIKELY(err))
+        goto Failed;
+
+      re->_sourceOffset = offset();
+      re->_sourceSectionId = _section->id();
+      re->_payload = jumpAddress;
+
+      if (ASMJIT_LIKELY(opcode.v)) {
+        // 64-bit: Emit REX prefix so the instruction can be patched later. REX prefix does nothing if not patched,
+        // but allows to patch the instruction to use MOD/M and to point to a memory where the final 64-bit address
+        // is stored.
+        if (Environment::is64Bit(arch()) && x86IsJmpOrCall(instId)) {
+          if (!rex)
+            writer.emit8(kX86ByteRex);
+
+          err = _code->addAddressToAddressTable(jumpAddress);
+          if (ASMJIT_UNLIKELY(err))
+            goto Failed;
+
+          re->_relocType = RelocType::kX64AddressEntry;
+        }
+
+        writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0);  // Emit 0F prefix.
+        writer.emit8(opcode.v);                                  // Emit opcode.
+        writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0);   // Emit MOD.
+        re->_format.resetToSimpleValue(OffsetType::kSignedOffset, 4);
+        re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
+        writer.emit32uLE(0);                                     // Emit DISP32.
+      }
+      else {
+        writer.emit8(opCode8);                                   // Emit opcode.
+        re->_format.resetToSimpleValue(OffsetType::kSignedOffset, 1);
+        re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
+        writer.emit8(0);                                         // Emit DISP8 (zero).
+      }
+      goto EmitDone;
+    }
+
+    // Not Label|Imm -> Invalid.
+    goto InvalidInstruction;
+
+    // Emit jmp/call with relative displacement known at assembly-time. Decide between 8-bit and 32-bit displacement
+    // encoding. Some instructions only allow either 8-bit or 32-bit encoding, others allow both encodings.
+EmitJmpCallRel:
+    if (Support::isInt8(int32_t(rel32 + inst32Size - inst8Size)) && opCode8 && !Support::test(options, InstOptions::kLongForm)) {
+      options |= InstOptions::kShortForm;
+      writer.emit8(opCode8);                                     // Emit opcode
+      writer.emit8(rel32 + inst32Size - inst8Size);              // Emit DISP8.
+      goto EmitDone;
+    }
+    else {
+      if (ASMJIT_UNLIKELY(!opcode.v || Support::test(options, InstOptions::kShortForm)))
+        goto InvalidDisplacement;
+
+      options &= ~InstOptions::kShortForm;
+      writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0);    // Emit 0x0F prefix.
+      writer.emit8(opcode.v);                                    // Emit Opcode.
+      writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0);     // Emit MOD.
+      writer.emit32uLE(rel32);                                   // Emit DISP32.
+      goto EmitDone;
+    }
+  }
+
+  // Emit - Relative
+  // ---------------
+
+EmitRel:
+  {
+    ASMJIT_ASSERT(relSize == 1 || relSize == 4);
+
+    // Chain with label.
+    size_t offset = size_t(writer.offsetFrom(_bufferData));
+    OffsetFormat of;
+    of.resetToSimpleValue(OffsetType::kSignedOffset, relSize);
+
+    LabelLink* link = _code->newLabelLink(label, _section->id(), offset, relOffset, of);
+    if (ASMJIT_UNLIKELY(!link))
+      goto OutOfMemory;
+
+    if (re)
+      link->relocId = re->id();
+
+    // Emit dummy zeros, must be patched later when the reference becomes known.
+    writer.emitZeros(relSize);
+  }
+  writer.emitImmediate(uint64_t(immValue), immSize);
+
+  // Emit - Done
+  // -----------
+
+EmitDone:
+  if (Support::test(options, InstOptions::kReserved)) {
+#ifndef ASMJIT_NO_LOGGING
+    if (_logger)
+      EmitterUtils::logInstructionEmitted(this, instId, options, o0, o1, o2, opExt, relSize, immSize, writer.cursor());
+#endif
+  }
+
+  resetExtraReg();
+  resetInstOptions();
+  resetInlineComment();
+
+  writer.done(this);
+  return kErrorOk;
+
+  // Error Handler
+  // -------------
+
+#define ERROR_HANDLER(ERR) ERR: err = DebugUtils::errored(kError##ERR); goto Failed;
+  ERROR_HANDLER(OutOfMemory)
+  ERROR_HANDLER(InvalidLabel)
+  ERROR_HANDLER(InvalidInstruction)
+  ERROR_HANDLER(InvalidLockPrefix)
+  ERROR_HANDLER(InvalidXAcquirePrefix)
+  ERROR_HANDLER(InvalidXReleasePrefix)
+  ERROR_HANDLER(InvalidRepPrefix)
+  ERROR_HANDLER(InvalidRexPrefix)
+  ERROR_HANDLER(InvalidEROrSAE)
+  ERROR_HANDLER(InvalidAddress)
+  ERROR_HANDLER(InvalidAddressIndex)
+  ERROR_HANDLER(InvalidAddress64Bit)
+  ERROR_HANDLER(InvalidDisplacement)
+  ERROR_HANDLER(InvalidPhysId)
+  ERROR_HANDLER(InvalidSegment)
+  ERROR_HANDLER(InvalidImmediate)
+  ERROR_HANDLER(InvalidBroadcast)
+  ERROR_HANDLER(OperandSizeMismatch)
+  ERROR_HANDLER(AmbiguousOperandSize)
+  ERROR_HANDLER(NotConsecutiveRegs)
+#undef ERROR_HANDLER
+
+Failed:
+#ifndef ASMJIT_NO_LOGGING
+  return EmitterUtils::logInstructionFailed(this, err, instId, options, o0, o1, o2, opExt);
+#else
+  resetExtraReg();
+  resetInstOptions();
+  resetInlineComment();
+  return reportError(err);
+#endif
+}
+
+//x86::Assembler - Align
+// =====================
+
+Error Assembler::align(AlignMode alignMode, uint32_t alignment) {
+  if (ASMJIT_UNLIKELY(!_code))
+    return reportError(DebugUtils::errored(kErrorNotInitialized));
+
+  if (ASMJIT_UNLIKELY(uint32_t(alignMode) > uint32_t(AlignMode::kMaxValue)))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  if (alignment <= 1)
+    return kErrorOk;
+
+  if (ASMJIT_UNLIKELY(!Support::isPowerOf2(alignment) || alignment > Globals::kMaxAlignment))
+    return reportError(DebugUtils::errored(kErrorInvalidArgument));
+
+  uint32_t i = uint32_t(Support::alignUpDiff<size_t>(offset(), alignment));
+  if (i > 0) {
+    CodeWriter writer(this);
+    ASMJIT_PROPAGATE(writer.ensureSpace(this, i));
+
+    uint8_t pattern = 0x00;
+    switch (alignMode) {
+      case AlignMode::kCode: {
+        if (hasEncodingOption(EncodingOptions::kOptimizedAlign)) {
+          // Intel 64 and IA-32 Architectures Software Developer's Manual - Volume 2B (NOP).
+          enum { kMaxNopSize = 9 };
+
+          static const uint8_t nopData[kMaxNopSize][kMaxNopSize] = {
+            { 0x90 },
+            { 0x66, 0x90 },
+            { 0x0F, 0x1F, 0x00 },
+            { 0x0F, 0x1F, 0x40, 0x00 },
+            { 0x0F, 0x1F, 0x44, 0x00, 0x00 },
+            { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 },
+            { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 },
+            { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
+            { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }
+          };
+
+          do {
+            uint32_t n = Support::min<uint32_t>(i, kMaxNopSize);
+            const uint8_t* src = nopData[n - 1];
+
+            i -= n;
+            do {
+              writer.emit8(*src++);
+            } while (--n);
+          } while (i);
+        }
+
+        pattern = 0x90;
+        break;
+      }
+
+      case AlignMode::kData:
+        pattern = 0xCC;
+        break;
+
+      case AlignMode::kZero:
+        // Pattern already set to zero.
+        break;
+    }
+
+    while (i) {
+      writer.emit8(pattern);
+      i--;
+    }
+
+    writer.done(this);
+  }
+
+#ifndef ASMJIT_NO_LOGGING
+  if (_logger) {
+    StringTmp<128> sb;
+    sb.appendChars(' ', _logger->indentation(FormatIndentationGroup::kCode));
+    sb.appendFormat("align %u\n", alignment);
+    _logger->log(sb);
+  }
+#endif
+
+  return kErrorOk;
+}
+
+// x86::Assembler - Events
+// =======================
+
+Error Assembler::onAttach(CodeHolder* code) noexcept {
+  Arch arch = code->arch();
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+
+  if (Environment::is32Bit(arch)) {
+    // 32 bit architecture - X86.
+    _forcedInstOptions |= InstOptions::kX86_InvalidRex;
+    _setAddressOverrideMask(kX86MemInfo_67H_X86);
+  }
+  else {
+    // 64 bit architecture - X64.
+    _forcedInstOptions &= ~InstOptions::kX86_InvalidRex;
+    _setAddressOverrideMask(kX86MemInfo_67H_X64);
+  }
+
+  return kErrorOk;
+}
+
+Error Assembler::onDetach(CodeHolder* code) noexcept {
+  _forcedInstOptions &= ~InstOptions::kX86_InvalidRex;
+  _setAddressOverrideMask(0);
+  return Base::onDetach(code);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
diff --git a/lib/lepton/asmjit/x86/x86assembler.h b/lib/lepton/asmjit/x86/x86assembler.h
new file mode 100644
index 0000000000..dbffae6289
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86assembler.h
@@ -0,0 +1,685 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
+#define ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
+
+#include "../core/assembler.h"
+#include "../x86/x86emitter.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86/X64 assembler implementation.
+//!
+//! x86::Assembler is a code emitter that emits machine code directly into the \ref CodeBuffer. The assembler is capable
+//! of targeting both 32-bit and 64-bit instruction sets, the instruction set can be configured through \ref CodeHolder.
+//!
+//! ### Basics
+//!
+//! The following example shows a basic use of `x86::Assembler`, how to generate a function that works in both 32-bit
+//! and 64-bit modes, and how to connect \ref JitRuntime, \ref CodeHolder, and `x86::Assembler`.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef int (*SumFunc)(const int* arr, size_t count);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Create a runtime specialized for JIT.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Decide between 32-bit CDECL, WIN64, and SysV64 calling conventions:
+//!   //   32-BIT - passed all arguments by stack.
+//!   //   WIN64  - passes first 4 arguments by RCX, RDX, R8, and R9.
+//!   //   UNIX64 - passes first 6 arguments by RDI, RSI, RCX, RDX, R8, and R9.
+//!   x86::Gp arr, cnt;
+//!   x86::Gp sum = x86::eax;           // Use EAX as 'sum' as it's a return register.
+//!
+//!   if (ASMJIT_ARCH_BITS == 64) {
+//!   #if defined(_WIN32)
+//!     arr = x86::rcx;                 // First argument (array ptr).
+//!     cnt = x86::rdx;                 // Second argument (number of elements)
+//!   #else
+//!     arr = x86::rdi;                 // First argument (array ptr).
+//!     cnt = x86::rsi;                 // Second argument (number of elements)
+//!   #endif
+//!   }
+//!   else {
+//!     arr = x86::edx;                 // Use EDX to hold the array pointer.
+//!     cnt = x86::ecx;                 // Use ECX to hold the counter.
+//!     // Fetch first and second arguments from [ESP + 4] and [ESP + 8].
+//!     a.mov(arr, x86::ptr(x86::esp, 4));
+//!     a.mov(cnt, x86::ptr(x86::esp, 8));
+//!   }
+//!
+//!   Label Loop = a.newLabel();        // To construct the loop, we need some labels.
+//!   Label Exit = a.newLabel();
+//!
+//!   a.xor_(sum, sum);                 // Clear 'sum' register (shorter than 'mov').
+//!   a.test(cnt, cnt);                 // Border case:
+//!   a.jz(Exit);                       //   If 'cnt' is zero jump to 'Exit' now.
+//!
+//!   a.bind(Loop);                     // Start of a loop iteration.
+//!   a.add(sum, x86::dword_ptr(arr));  // Add int at [arr] to 'sum'.
+//!   a.add(arr, 4);                    // Increment 'arr' pointer.
+//!   a.dec(cnt);                       // Decrease 'cnt'.
+//!   a.jnz(Loop);                      // If not zero jump to 'Loop'.
+//!
+//!   a.bind(Exit);                     // Exit to handle the border case.
+//!   a.ret();                          // Return from function ('sum' == 'eax').
+//!   // ----> x86::Assembler is no longer needed from here and can be destroyed <----
+//!
+//!   SumFunc fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   static const int array[6] = { 4, 8, 15, 16, 23, 42 };
+//!
+//!   int result = fn(array, 6);        // Execute the generated code.
+//!   printf("%d\n", result);           // Print sum of array (108).
+//!
+//!   rt.release(fn);                   // Explicitly remove the function from the runtime
+//!   return 0;                         // Everything successful...
+//! }
+//! ```
+//!
+//! The example should be self-explanatory. It shows how to work with labels, how to use operands, and how to emit
+//! instructions that can use different registers based on runtime selection. It implements 32-bit CDECL, WIN64,
+//! and SysV64 caling conventions and will work on most X86/X64 environments.
+//!
+//! Although functions prologs / epilogs can be implemented manually, AsmJit provides utilities that can be used
+//! to create function prologs and epilogs automatically, see \ref asmjit_function for more details.
+//!
+//! ### Instruction Validation
+//!
+//! Assembler prefers speed over strictness by default. The implementation checks the type of operands and fails
+//! if the signature of types is invalid, however, it does only basic checks regarding registers and their groups
+//! used in instructions. It's possible to pass operands that don't form any valid signature to the implementation
+//! and succeed. This is usually not a problem as Assembler provides typed API so operand types are normally checked
+//! by C++ compiler at compile time, however, Assembler is fully dynamic and its \ref emit() function can be called
+//! with any instruction id, options, and operands. Moreover, it's also possible to form instructions that will be
+//! accepted by the typed API, for example by calling `mov(x86::eax, x86::al)` - the C++ compiler won't see a problem
+//! as both EAX and AL are \ref Gp registers.
+//!
+//! To help with common mistakes AsmJit allows to activate instruction validation. This feature instruments
+//! the Assembler to call \ref InstAPI::validate() before it attempts to encode any instruction.
+//!
+//! The example below illustrates how validation can be turned on:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! int main(int argc, char* argv[]) {
+//!   JitRuntime rt;                    // Create a runtime specialized for JIT.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Enable strict validation.
+//!   a.addDiagnosticOptions(DiagnosticOptions::kValidateAssembler);
+//!
+//!   // Try to encode invalid or ill-formed instructions.
+//!   Error err;
+//!
+//!   // Invalid instruction.
+//!   err = a.mov(x86::eax, x86::al);
+//!   printf("Status: %s\n", DebugUtils::errorAsString(err));
+//!
+//!   // Invalid instruction.
+//!   err = a.emit(x86::Inst::kIdMovss, x86::eax, x86::xmm0);
+//!   printf("Status: %s\n", DebugUtils::errorAsString(err));
+//!
+//!   // Ambiguous operand size - the pointer requires size.
+//!   err = a.inc(x86::ptr(x86::rax), 1);
+//!   printf("Status: %s\n", DebugUtils::errorAsString(err));
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Native Registers
+//!
+//! All emitters provide functions to construct machine-size registers depending on the target. This feature is
+//! for users that want to write code targeting both 32-bit and 64-bit architectures at the same time. In AsmJit
+//! terminology such registers have prefix `z`, so for example on X86 architecture the following native registers
+//! are provided:
+//!
+//!   - `zax` - mapped to either `eax` or `rax`
+//!   - `zbx` - mapped to either `ebx` or `rbx`
+//!   - `zcx` - mapped to either `ecx` or `rcx`
+//!   - `zdx` - mapped to either `edx` or `rdx`
+//!   - `zsp` - mapped to either `esp` or `rsp`
+//!   - `zbp` - mapped to either `ebp` or `rbp`
+//!   - `zsi` - mapped to either `esi` or `rsi`
+//!   - `zdi` - mapped to either `edi` or `rdi`
+//!
+//! They are accessible through \ref x86::Assembler, \ref x86::Builder, and \ref x86::Compiler. The example below
+//! illustrates how to use this feature:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef int (*Func)(void);
+//!
+//! int main(int argc, char* argv[]) {
+//!   JitRuntime rt;                    // Create a runtime specialized for JIT.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Let's get these registers from x86::Assembler.
+//!   x86::Gp zbp = a.zbp();
+//!   x86::Gp zsp = a.zsp();
+//!
+//!   int stackSize = 32;
+//!
+//!   // Function prolog.
+//!   a.push(zbp);
+//!   a.mov(zbp, zsp);
+//!   a.sub(zsp, stackSize);
+//!
+//!   // ... emit some code (this just sets return value to zero) ...
+//!   a.xor_(x86::eax, x86::eax);
+//!
+//!   // Function epilog and return.
+//!   a.mov(zsp, zbp);
+//!   a.pop(zbp);
+//!   a.ret();
+//!
+//!   // To make the example complete let's call it.
+//!   Func fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!
+//!   int result = fn();                // Execute the generated code.
+//!   printf("%d\n", result);           // Print the resulting "0".
+//!
+//!   rt.release(fn);                   // Remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! The example just returns `0`, but the function generated contains a standard prolog and epilog sequence and the
+//! function itself reserves 32 bytes of local stack. The advantage is clear - a single code-base can handle multiple
+//! targets easily. If you want to create a register of native size dynamically by specifying its id it's also possible:
+//!
+//! ```
+//! void example(x86::Assembler& a) {
+//!   x86::Gp zax = a.gpz(x86::Gp::kIdAx);
+//!   x86::Gp zbx = a.gpz(x86::Gp::kIdBx);
+//!   x86::Gp zcx = a.gpz(x86::Gp::kIdCx);
+//!   x86::Gp zdx = a.gpz(x86::Gp::kIdDx);
+//!
+//!   // You can also change register's id easily.
+//!   x86::Gp zsp = zax;
+//!   zsp.setId(4); // or x86::Gp::kIdSp.
+//! }
+//! ```
+//!
+//! ### Data Embedding
+//!
+//! x86::Assembler extends the standard \ref BaseAssembler with X86/X64 specific conventions that are often used by
+//! assemblers to embed data next to the code. The following functions can be used to embed data:
+//!
+//!   - \ref BaseAssembler::embedInt8() - embeds int8_t (portable naming).
+//!   - \ref BaseAssembler::embedUInt8() - embeds uint8_t (portable naming).
+//!   - \ref BaseAssembler::embedInt16() - embeds int16_t (portable naming).
+//!   - \ref BaseAssembler::embedUInt16() - embeds uint16_t (portable naming).
+//!   - \ref BaseAssembler::embedInt32() - embeds int32_t (portable naming).
+//!   - \ref BaseAssembler::embedUInt32() - embeds uint32_t (portable naming).
+//!   - \ref BaseAssembler::embedInt64() - embeds int64_t (portable naming).
+//!   - \ref BaseAssembler::embedUInt64() - embeds uint64_t (portable naming).
+//!   - \ref BaseAssembler::embedFloat() - embeds float (portable naming).
+//!   - \ref BaseAssembler::embedDouble() - embeds double (portable naming).
+//!
+//!   - \ref x86::Assembler::db() - embeds byte (8 bits) (x86 naming).
+//!   - \ref x86::Assembler::dw() - embeds word (16 bits) (x86 naming).
+//!   - \ref x86::Assembler::dd() - embeds dword (32 bits) (x86 naming).
+//!   - \ref x86::Assembler::dq() - embeds qword (64 bits) (x86 naming).
+//!
+//! The following example illustrates how embed works:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! using namespace asmjit;
+//!
+//! void embedData(x86::Assembler& a) {
+//!   a.db(0xFF);         // Embeds 0xFF byte.
+//!   a.dw(0xFF00);       // Embeds 0xFF00 word (little-endian).
+//!   a.dd(0xFF000000);   // Embeds 0xFF000000 dword (little-endian).
+//!   a.embedFloat(0.4f); // Embeds 0.4f (32-bit float, little-endian).
+//! }
+//! ```
+//!
+//! Sometimes it's required to read the data that is embedded after code, for example. This can be done through
+//! \ref Label as shown below:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! using namespace asmjit;
+//!
+//! void embedData(x86::Assembler& a, const Label& L_Data) {
+//!   x86::Gp addr = a.zax();  // EAX or RAX.
+//!   x86::Gp val = x86::edi;  // Where to store some value...
+//!
+//!   // Approach 1 - Load the address to register through LEA. This approach
+//!   //              is flexible as the address can be then manipulated, for
+//!   //              example if you have a data array, which would need index.
+//!   a.lea(addr, L_Data);     // Loads the address of the label to EAX or RAX.
+//!   a.mov(val, dword_ptr(addr));
+//!
+//!   // Approach 2 - Load the data directly by using L_Data in address. It's
+//!   //              worth noting that this doesn't work with indexes in X64
+//!   //              mode. It will use absolute address in 32-bit mode and
+//!   //              relative address (RIP) in 64-bit mode.
+//!   a.mov(val, dword_ptr(L_Data));
+//! }
+//! ```
+//!
+//! ### Label Embedding
+//!
+//! It's also possible to embed labels. In general AsmJit provides the following options:
+//!
+//!   - \ref BaseEmitter::embedLabel() - Embeds absolute address of a label. This is target dependent and would
+//!     embed either 32-bit or 64-bit data that embeds absolute label address. This kind of embedding cannot be
+//!     used in a position independent code.
+//!
+//!   - \ref BaseEmitter::embedLabelDelta() - Embeds a difference between two labels. The size of the difference
+//!     can be specified so it's possible to embed 8-bit, 16-bit, 32-bit, and 64-bit difference, which is sufficient
+//!     for most purposes.
+//!
+//! The following example demonstrates how to embed labels and their differences:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! using namespace asmjit;
+//!
+//! void embedLabel(x86::Assembler& a, const Label& L_Data) {
+//!   // [1] Embed L_Data - the size of the data will be dependent on the target.
+//!   a.embedLabel(L_Data);
+//!
+//!   // [2] Embed a 32-bit difference of two labels.
+//!   Label L_Here = a.newLabel();
+//!   a.bind(L_Here);
+//!   // Embeds int32_t(L_Data - L_Here).
+//!   a.embedLabelDelta(L_Data, L_Here, 4);
+//! }
+//! ```
+//!
+//! ### Using FuncFrame and FuncDetail with x86::Assembler
+//!
+//! The example below demonstrates how \ref FuncFrame and \ref FuncDetail can be used together with \ref x86::Assembler
+//! to generate a function that will use platform dependent calling conventions automatically depending on the target:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
+//!
+//! int main(int argc, char* argv[]) {
+//!   JitRuntime rt;                    // Create JIT Runtime.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Decide which registers will be mapped to function arguments. Try changing
+//!   // registers of dst, src_a, and src_b and see what happens in function's
+//!   // prolog and epilog.
+//!   x86::Gp dst   = a.zax();
+//!   x86::Gp src_a = a.zcx();
+//!   x86::Gp src_b = a.zdx();
+//!
+//!   X86::Xmm vec0 = x86::xmm0;
+//!   X86::Xmm vec1 = x86::xmm1;
+//!
+//!   // Create/initialize FuncDetail and FuncFrame.
+//!   FuncDetail func;
+//!   func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost));
+//!
+//!   FuncFrame frame;
+//!   frame.init(func);
+//!
+//!   // Make XMM0 and XMM1 dirty - RegGroup::kVec describes XMM|YMM|ZMM registers.
+//!   frame.setDirtyRegs(RegGroup::kVec, IntUtils::mask(0, 1));
+//!
+//!   // Alternatively, if you don't want to use register masks you can pass BaseReg
+//!   // to addDirtyRegs(). The following code would add both xmm0 and xmm1.
+//!   frame.addDirtyRegs(x86::xmm0, x86::xmm1);
+//!
+//!   FuncArgsAssignment args(&func);   // Create arguments assignment context.
+//!   args.assignAll(dst, src_a, src_b);// Assign our registers to arguments.
+//!   args.updateFrameInfo(frame);      // Reflect our args in FuncFrame.
+//!   frame.finalize();                 // Finalize the FuncFrame (updates it).
+//!
+//!   a.emitProlog(frame);              // Emit function prolog.
+//!   a.emitArgsAssignment(frame, args);// Assign arguments to registers.
+//!   a.movdqu(vec0, x86::ptr(src_a));  // Load 4 ints from [src_a] to XMM0.
+//!   a.movdqu(vec1, x86::ptr(src_b));  // Load 4 ints from [src_b] to XMM1.
+//!   a.paddd(vec0, vec1);              // Add 4 ints in XMM1 to XMM0.
+//!   a.movdqu(x86::ptr(dst), vec0);    // Store the result to [dst].
+//!   a.emitEpilog(frame);              // Emit function epilog and return.
+//!
+//!   SumIntsFunc fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error case.
+//!
+//!   // Execute the generated function.
+//!   int inA[4] = { 4, 3, 2, 1 };
+//!   int inB[4] = { 1, 5, 2, 8 };
+//!   int out[4];
+//!   fn(out, inA, inB);
+//!
+//!   // Prints {5 8 4 9}
+//!   printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
+//!
+//!   rt.release(fn);
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Using x86::Assembler as Code-Patcher
+//!
+//! This is an advanced topic that is sometimes unavoidable. AsmJit by default appends machine code it generates
+//! into a \ref CodeBuffer, however, it also allows to set the offset in \ref CodeBuffer explicitly and to overwrite
+//! its content. This technique is extremely dangerous as X86 instructions have variable length (see below), so you
+//! should in general only patch code to change instruction's immediate values or some other details not known the
+//! at a time the instruction was emitted. A typical scenario that requires code-patching is when you start emitting
+//! function and you don't know how much stack you want to reserve for it.
+//!
+//! Before we go further it's important to introduce instruction options, because they can help with code-patching
+//! (and not only patching, but that will be explained in AVX-512 section):
+//!
+//!   - Many general-purpose instructions (especially arithmetic ones) on X86 have multiple encodings - in AsmJit
+//!     this is usually called 'short form' and 'long form'.
+//!
+//!   - AsmJit always tries to use 'short form' as it makes the resulting machine-code smaller, which is always
+//!     good - this decision is used by majority of assemblers out there.
+//!
+//!   - AsmJit allows to override the default decision by using `short_()` and `long_()` instruction options to force
+//!     short or long form, respectively. The most useful is `long_()` as it basically forces AsmJit to always emit
+//!     the longest form. The `short_()` is not that useful as it's automatic (except jumps to non-bound labels). Note
+//!     that the underscore after each function name avoids collision with built-in C++ types.
+//!
+//! To illustrate what short form and long form means in binary let's assume we want to emit "add esp, 16" instruction,
+//! which has two possible binary encodings:
+//!
+//!   - `83C410` - This is a short form aka `short add esp, 16` - You can see opcode byte (0x8C), MOD/RM byte (0xC4)
+//!     and an 8-bit immediate value representing `16`.
+//!
+//!   - `81C410000000` - This is a long form aka `long add esp, 16` - You can see a different opcode byte (0x81), the
+//!     same Mod/RM byte (0xC4) and a 32-bit immediate in little-endian representing `16`.
+//!
+//! It should be obvious that patching an existing instruction into an instruction having a different size may create
+//! various problems. So it's recommended to be careful and to only patch instructions into instructions having the
+//! same size. The example below demonstrates how instruction options can be used to guarantee the size of an
+//! instruction by forcing the assembler to use long-form encoding:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef int (*Func)(void);
+//!
+//! int main(int argc, char* argv[]) {
+//!   JitRuntime rt;                    // Create a runtime specialized for JIT.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Let's get these registers from x86::Assembler.
+//!   x86::Gp zbp = a.zbp();
+//!   x86::Gp zsp = a.zsp();
+//!
+//!   // Function prolog.
+//!   a.push(zbp);
+//!   a.mov(zbp, zsp);
+//!
+//!   // This is where we are gonna patch the code later, so let's get the offset
+//!   // (the current location) from the beginning of the code-buffer.
+//!   size_t patchOffset = a.offset();
+//!   // Let's just emit 'sub zsp, 0' for now, but don't forget to use LONG form.
+//!   a.long_().sub(zsp, 0);
+//!
+//!   // ... emit some code (this just sets return value to zero) ...
+//!   a.xor_(x86::eax, x86::eax);
+//!
+//!   // Function epilog and return.
+//!   a.mov(zsp, zbp);
+//!   a.pop(zbp);
+//!   a.ret();
+//!
+//!   // Now we know how much stack size we want to reserve. I have chosen 128
+//!   // bytes on purpose as it's encodable only in long form that we have used.
+//!
+//!   int stackSize = 128;              // Number of bytes to reserve on the stack.
+//!   a.setOffset(patchOffset);         // Move the current cursor to `patchOffset`.
+//!   a.long_().sub(zsp, stackSize);    // Patch the code; don't forget to use LONG form.
+//!
+//!   // Now the code is ready to be called
+//!   Func fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!
+//!   int result = fn();                // Execute the generated code.
+//!   printf("%d\n", result);           // Print the resulting "0".
+//!
+//!   rt.release(fn);                   // Remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! If you run the example it will just work, because both instructions have the same size. As an experiment you can
+//! try removing `long_()` form to see what happens when wrong code is generated.
+//!
+//! ### Code Patching and REX Prefix
+//!
+//! In 64-bit mode there is one more thing to worry about when patching code: REX prefix. It's a single byte prefix
+//! designed to address registers with ids from 9 to 15 and to override the default width of operation from 32 to 64
+//! bits. AsmJit, like other assemblers, only emits REX prefix when it's necessary. If the patched code only changes
+//! the immediate value as shown in the previous example then there is nothing to worry about as it doesn't change
+//! the logic behind emitting REX prefix, however, if the patched code changes register id or overrides the operation
+//! width then it's important to take care of REX prefix as well.
+//!
+//! AsmJit contains another instruction option that controls (forces) REX prefix - `rex()`. If you use it the
+//! instruction emitted will always use REX prefix even when it's encodable without it. The following list contains
+//! some instructions and their binary representations to illustrate when it's emitted:
+//!
+//!   - `__83C410` - `add esp, 16`     - 32-bit operation in 64-bit mode doesn't require REX prefix.
+//!   - `4083C410` - `rex add esp, 16` - 32-bit operation in 64-bit mode with forced REX prefix (0x40).
+//!   - `4883C410` - `add rsp, 16`     - 64-bit operation in 64-bit mode requires REX prefix (0x48).
+//!   - `4183C410` - `add r12d, 16`    - 32-bit operation in 64-bit mode using R12D requires REX prefix (0x41).
+//!   - `4983C410` - `add r12, 16`     - 64-bit operation in 64-bit mode using R12 requires REX prefix (0x49).
+//!
+//! ### More Prefixes
+//!
+//! X86 architecture is known for its prefixes. AsmJit supports all prefixes
+//! that can affect how the instruction is encoded:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void prefixesExample(x86::Assembler& a) {
+//!   // Lock prefix for implementing atomics:
+//!   //   lock add dword ptr [dst], 1
+//!   a.lock().add(x86::dword_ptr(dst), 1);
+//!
+//!   // Similarly, XAcquire/XRelease prefixes are also available:
+//!   //   xacquire add dword ptr [dst], 1
+//!   a.xacquire().add(x86::dword_ptr(dst), 1);
+//!
+//!   // Rep prefix (see also repe/repz and repne/repnz):
+//!   //   rep movs byte ptr [dst], byte ptr [src]
+//!   a.rep().movs(x86::byte_ptr(dst), x86::byte_ptr(src));
+//!
+//!   // Forcing REX prefix in 64-bit mode.
+//!   //   rex mov eax, 1
+//!   a.rex().mov(x86::eax, 1);
+//!
+//!   // AVX instruction without forced prefix uses the shortest encoding:
+//!   //   vaddpd xmm0, xmm1, xmm2 -> [C5|F1|58|C2]
+//!   a.vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
+//!
+//!   // Forcing VEX3 prefix (AVX):
+//!   //   vex3 vaddpd xmm0, xmm1, xmm2 -> [C4|E1|71|58|C2]
+//!   a.vex3().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
+//!
+//!   // Forcing EVEX prefix (AVX512):
+//!   //   evex vaddpd xmm0, xmm1, xmm2 -> [62|F1|F5|08|58|C2]
+//!   a.evex().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
+//!
+//!   // Some instructions accept prefixes not originally intended to:
+//!   //   rep ret
+//!   a.rep().ret();
+//! }
+//! ```
+//!
+//! It's important to understand that prefixes are part of instruction options. When a member function that involves
+//! adding a prefix is called the prefix is combined with existing instruction options, which will affect the next
+//! instruction generated.
+//!
+//! ### Generating AVX512 code.
+//!
+//! x86::Assembler can generate AVX512+ code including the use of opmask registers. Opmask can be specified through
+//! \ref x86::Assembler::k() function, which stores it as an extra register, which will be used by the next
+//! instruction. AsmJit uses such concept for manipulating instruction options as well.
+//!
+//! The following AVX512 features are supported:
+//!
+//!   - Opmask selector {k} and zeroing {z}.
+//!   - Rounding modes {rn|rd|ru|rz} and suppress-all-exceptions {sae} option.
+//!   - AVX512 broadcasts {1toN}.
+//!
+//! The following example demonstrates how AVX512 features can be used:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void generateAVX512Code(x86::Assembler& a) {
+//!   using namespace x86;
+//!
+//!   // Opmask Selectors
+//!   // ----------------
+//!   //
+//!   //   - Opmask / zeroing is part of the instruction options / extraReg.
+//!   //   - k(reg) is like {kreg} in Intel syntax.
+//!   //   - z() is like {z} in Intel syntax.
+//!
+//!   // vaddpd zmm {k1} {z}, zmm1, zmm2
+//!   a.k(k1).z().vaddpd(zmm0, zmm1, zmm2);
+//!
+//!   // Memory Broadcasts
+//!   // -----------------
+//!   //
+//!   //   - Broadcast data is part of memory operand.
+//!   //   - Use x86::Mem::_1toN(), which returns a new x86::Mem operand.
+//!
+//!   // vaddpd zmm0 {k1} {z}, zmm1, [rcx] {1to8}
+//!   a.k(k1).z().vaddpd(zmm0, zmm1, x86::mem(rcx)._1to8());
+//!
+//!   // Embedded Rounding & Suppress-All-Exceptoins
+//!   // -------------------------------------------
+//!   //
+//!   //   - Rounding mode and {sae} are part of instruction options.
+//!   //   - Use sae() to enable exception suppression.
+//!   //   - Use rn_sae(), rd_sae(), ru_sae(), and rz_sae() - to enable rounding.
+//!   //   - Embedded rounding implicitly sets {sae} as well, that's why the API
+//!   //     also has sae() suffix, to make it clear.
+//!
+//!   // vcmppd k1, zmm1, zmm2, 0x00 {sae}
+//!   a.sae().vcmppd(k1, zmm1, zmm2, 0);
+//!
+//!   // vaddpd zmm0, zmm1, zmm2 {rz}
+//!   a.rz_sae().vaddpd(zmm0, zmm1, zmm2);
+//! }
+//! ```
+class ASMJIT_VIRTAPI Assembler
+  : public BaseAssembler,
+    public EmitterImplicitT<Assembler> {
+public:
+  ASMJIT_NONCOPYABLE(Assembler)
+  typedef BaseAssembler Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Assembler(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Assembler() noexcept;
+
+  //! \}
+
+  //! \cond INTERNAL
+  //! \name Internal
+  //! \{
+
+  // NOTE: x86::Assembler uses _privateData to store 'address-override' bit that is used to decide whether to emit
+  // address-override (67H) prefix based on the memory BASE+INDEX registers. It's either `kX86MemInfo_67H_X86` or
+  // `kX86MemInfo_67H_X64`.
+  inline uint32_t _addressOverrideMask() const noexcept { return _privateData; }
+  inline void _setAddressOverrideMask(uint32_t m) noexcept { _privateData = m; }
+
+  //! \}
+  //! \endcond
+
+  //! \name Emit
+  //! \{
+
+  ASMJIT_API Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override;
+
+  //! \}
+  //! \endcond
+
+  //! \name Align
+  //! \{
+
+  ASMJIT_API Error align(AlignMode alignMode, uint32_t alignment) override;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86builder.cpp b/lib/lepton/asmjit/x86/x86builder.cpp
new file mode 100644
index 0000000000..a27948b65d
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86builder.cpp
@@ -0,0 +1,52 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86) && !defined(ASMJIT_NO_BUILDER)
+
+#include "../x86/x86assembler.h"
+#include "../x86/x86builder.h"
+#include "../x86/x86emithelper_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::Builder - Construction & Destruction
+// =========================================
+
+Builder::Builder(CodeHolder* code) noexcept : BaseBuilder() {
+  _archMask = (uint64_t(1) << uint32_t(Arch::kX86)) |
+              (uint64_t(1) << uint32_t(Arch::kX64)) ;
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Builder::~Builder() noexcept {}
+
+// x86::Builder - Events
+// =====================
+
+Error Builder::onAttach(CodeHolder* code) noexcept {
+  return Base::onAttach(code);
+}
+
+Error Builder::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+// x86::Builder - Finalize
+// =======================
+
+Error Builder::finalize() {
+  ASMJIT_PROPAGATE(runPasses());
+  Assembler a(_code);
+  a.addEncodingOptions(encodingOptions());
+  a.addDiagnosticOptions(diagnosticOptions());
+  return serializeTo(&a);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86 && !ASMJIT_NO_BUILDER
diff --git a/lib/lepton/asmjit/x86/x86builder.h b/lib/lepton/asmjit/x86/x86builder.h
new file mode 100644
index 0000000000..f3bb11a0ca
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86builder.h
@@ -0,0 +1,351 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86BUILDER_H_INCLUDED
+#define ASMJIT_X86_X86BUILDER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_BUILDER
+
+#include "../core/builder.h"
+#include "../x86/x86emitter.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86/X64 builder implementation.
+//!
+//! The code representation used by \ref BaseBuilder is compatible with everything AsmJit provides. Each instruction
+//! is stored as \ref InstNode, which contains instruction id, options, and operands. Each instruction emitted will
+//! create a new \ref InstNode instance and add it to the current cursor in the double-linked list of nodes. Since
+//! the instruction stream used by \ref BaseBuilder can be manipulated, we can rewrite the SumInts example from
+//! \ref asmjit_assembler into the following:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
+//!
+//! // Small helper function to print the current content of `cb`.
+//! static void dumpCode(BaseBuilder& builder, const char* phase) {
+//!   String sb;
+//!   builder.dump(sb);
+//!   printf("%s:\n%s\n", phase, sb.data());
+//! }
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Create JIT Runtime.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Builder cb(&code);           // Create and attach x86::Builder to `code`.
+//!
+//!   // Decide which registers will be mapped to function arguments. Try changing registers
+//!   // of `dst`, `srcA`, and `srcB` and see what happens in function's prolog and epilog.
+//!   x86::Gp dst = cb.zax();
+//!   x86::Gp srcA = cb.zcx();
+//!   x86::Gp srcB = cb.zdx();
+//!
+//!   X86::Xmm vec0 = x86::xmm0;
+//!   X86::Xmm vec1 = x86::xmm1;
+//!
+//!   // Create and initialize `FuncDetail`.
+//!   FuncDetail func;
+//!   func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost));
+//!
+//!   // Remember prolog insertion point.
+//!   BaseNode* prologInsertionPoint = cb.cursor();
+//!
+//!   // Emit function body:
+//!   cb.movdqu(vec0, x86::ptr(srcA));  // Load 4 ints from [srcA] to XMM0.
+//!   cb.movdqu(vec1, x86::ptr(srcB));  // Load 4 ints from [srcB] to XMM1.
+//!   cb.paddd(vec0, vec1);             // Add 4 ints in XMM1 to XMM0.
+//!   cb.movdqu(x86::ptr(dst), vec0);   // Store the result to [dst].
+//!
+//!   // Remember epilog insertion point.
+//!   BaseNode* epilogInsertionPoint = cb.cursor();
+//!
+//!   // Let's see what we have now.
+//!   dumpCode(cb, "Raw Function");
+//!
+//!   // Now, after we emitted the function body, we can insert the prolog, arguments
+//!   // allocation, and epilog. This is not possible with using pure x86::Assembler.
+//!   FuncFrame frame;
+//!   frame.init(func);
+//!
+//!   // Make XMM0 and XMM1 dirty; RegGroup::kVec describes XMM|YMM|ZMM registers.
+//!   frame.setDirtyRegs(RegGroup::kVec, IntUtils::mask(0, 1));
+//!
+//!   FuncArgsAssignment args(&func);   // Create arguments assignment context.
+//!   args.assignAll(dst, srcA, srcB);  // Assign our registers to arguments.
+//!   args.updateFrame(frame);          // Reflect our args in FuncFrame.
+//!   frame.finalize();                 // Finalize the FuncFrame (updates it).
+//!
+//!   // Insert function prolog and allocate arguments to registers.
+//!   cb.setCursor(prologInsertionPoint);
+//!   cb.emitProlog(frame);
+//!   cb.emitArgsAssignment(frame, args);
+//!
+//!   // Insert function epilog.
+//!   cb.setCursor(epilogInsertionPoint);
+//!   cb.emitEpilog(frame);
+//!
+//!   // Let's see how the function's prolog and epilog looks.
+//!   dumpCode(cb, "Prolog & Epilog");
+//!
+//!   // IMPORTANT: Builder requires finalize() to be called to serialize its
+//!   // code to the Assembler (it automatically creates one if not attached).
+//!   cb.finalize();
+//!
+//!   SumIntsFunc fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error case.
+//!
+//!   // Execute the generated function.
+//!   int inA[4] = { 4, 3, 2, 1 };
+//!   int inB[4] = { 1, 5, 2, 8 };
+//!   int out[4];
+//!   fn(out, inA, inB);
+//!
+//!   // Prints {5 8 4 9}
+//!   printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
+//!
+//!   rt.release(fn);                   // Explicitly remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! When the example is executed it should output the following (this one using AMD64-SystemV ABI):
+//!
+//! ```
+//! Raw Function:
+//! movdqu xmm0, [rcx]
+//! movdqu xmm1, [rdx]
+//! paddd xmm0, xmm1
+//! movdqu [rax], xmm0
+//!
+//! Prolog & Epilog:
+//! mov rax, rdi
+//! mov rcx, rsi
+//! movdqu xmm0, [rcx]
+//! movdqu xmm1, [rdx]
+//! paddd xmm0, xmm1
+//! movdqu [rax], xmm0
+//! ret
+//!
+//! {5 8 4 9}
+//! ```
+//!
+//! The number of use-cases of \ref BaseBuilder is not limited and highly depends on your creativity and experience.
+//! The previous example can be easily improved to collect all dirty registers inside the function programmatically
+//! and to pass them to \ref FuncFrame::setDirtyRegs().
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! // NOTE: This function doesn't cover all possible constructs. It ignores instructions that write
+//! // to implicit registers that are not part of the operand list. It also counts read-only registers.
+//! // Real implementation would be a bit more complicated, but still relatively easy to implement.
+//! static void collectDirtyRegs(const BaseNode* first,
+//!                              const BaseNode* last,
+//!                              Support::Array<RegMask, Globals::kNumVirtGroups>& regMask) {
+//!   const BaseNode* node = first;
+//!   while (node) {
+//!     if (node->actsAsInst()) {
+//!       const InstNode* inst = node->as<InstNode>();
+//!       const Operand* opArray = inst->operands();
+//!
+//!       for (uint32_t i = 0, opCount = inst->opCount(); i < opCount; i++) {
+//!         const Operand& op = opArray[i];
+//!         if (op.isReg()) {
+//!           const x86::Reg& reg = op.as<x86::Reg>();
+//!           if (reg.group() <= RegGroup::kMaxVirt) {
+//!             regMask[reg.group()] |= 1u << reg.id();
+//!           }
+//!         }
+//!       }
+//!     }
+//!
+//!     if (node == last)
+//!       break;
+//!     node = node->next();
+//!   }
+//! }
+//!
+//! static void setDirtyRegsOfFuncFrame(const x86::Builder& builder, FuncFrame& frame) {
+//!   Support::Array<RegMask, Globals::kNumVirtGroups> regMask {};
+//!   collectDirtyRegs(builder.firstNode(), builder.lastNode(), regMask);
+//!
+//!   // X86/X64 ABIs only require to save GP/XMM registers:
+//!   frame.setDirtyRegs(RegGroup::kGp, regMask[RegGroup::kGp]);
+//!   frame.setDirtyRegs(RegGroup::kVec, regMask[RegGroup::kVec]);
+//! }
+//! ```
+//!
+//! ### Casting Between Various Emitters
+//!
+//! Even when \ref BaseAssembler and \ref BaseBuilder provide the same interface as defined by \ref BaseEmitter their
+//! platform dependent variants like \ref x86::Assembler and \ref x86::Builder cannot be interchanged or casted to each
+//! other by using a C++ `static_cast<>`. The main reason is the inheritance graph of these classes is different and
+//! cast-incompatible, as illustrated below:
+//!
+//! ```
+//!                                             +--------------+      +=========================+
+//!                    +----------------------->| x86::Emitter |<--+--# x86::EmitterImplicitT<> #<--+
+//!                    |                        +--------------+   |  +=========================+   |
+//!                    |                           (abstract)      |           (mixin)              |
+//!                    |   +--------------+     +~~~~~~~~~~~~~~+   |                                |
+//!                    +-->| BaseAssembler|---->|x86::Assembler|<--+                                |
+//!                    |   +--------------+     +~~~~~~~~~~~~~~+   |                                |
+//!                    |      (abstract)            (final)        |                                |
+//! +===============+  |   +--------------+     +~~~~~~~~~~~~~~+   |                                |
+//! #  BaseEmitter  #--+-->|  BaseBuilder |--+->| x86::Builder |<--+                                |
+//! +===============+      +--------------+  |  +~~~~~~~~~~~~~~+                                    |
+//!    (abstract)             (abstract)     |      (final)                                         |
+//!                    +---------------------+                                                      |
+//!                    |                                                                            |
+//!                    |   +--------------+     +~~~~~~~~~~~~~~+      +=========================+   |
+//!                    +-->| BaseCompiler |---->| x86::Compiler|<-----# x86::EmitterExplicitT<> #---+
+//!                        +--------------+     +~~~~~~~~~~~~~~+      +=========================+
+//!                           (abstract)            (final)                   (mixin)
+//! ```
+//!
+//! The graph basically shows that it's not possible to cast between \ref x86::Assembler and \ref x86::Builder.
+//! However, since both share the base interface (\ref BaseEmitter) it's possible to cast them to a class that
+//! cannot be instantiated, but defines the same interface - the class is called \ref x86::Emitter and was
+//! introduced to make it possible to write a function that can emit to both \ref x86::Assembler and \ref
+//! x86::Builder. Note that \ref x86::Emitter cannot be created, it's abstract and has private constructors and
+//! destructors; it was only designed to be casted to and used as an interface.
+//!
+//! Each architecture-specific emitter implements a member function called
+//! `as<arch::Emitter>()`, which casts the instance to the architecture
+//! specific emitter as illustrated below:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! static void emitSomething(x86::Emitter* e) {
+//!   e->mov(x86::eax, x86::ebx);
+//! }
+//!
+//! static void assemble(CodeHolder& code, bool useAsm) {
+//!   if (useAsm) {
+//!     x86::Assembler assembler(&code);
+//!     emitSomething(assembler.as<x86::Emitter>());
+//!   }
+//!   else {
+//!     x86::Builder builder(&code);
+//!     emitSomething(builder.as<x86::Emitter>());
+//!
+//!     // NOTE: Builder requires `finalize()` to be called to serialize its
+//!     // content to Assembler (it automatically creates one if not attached).
+//!     builder.finalize();
+//!   }
+//! }
+//! ```
+//!
+//! The example above shows how to create a function that can emit code to either \ref x86::Assembler or \ref
+//! x86::Builder through \ref x86::Emitter, which provides emitter-neutral functionality. \ref x86::Emitter,
+//! however, doesn't provide any emitter-specific functionality like `setCursor()`.
+//!
+//! ### Code Injection and Manipulation
+//!
+//! \ref BaseBuilder emitter stores its nodes in a double-linked list, which makes it easy to manipulate that
+//! list during the code generation or afterwards. Each node is always emitted next to the current cursor and
+//! the cursor is advanced to that newly emitted node. The cursor can be retrieved and changed by \ref
+//! BaseBuilder::cursor() and \ref BaseBuilder::setCursor(), respectively.
+//!
+//! The example below demonstrates how to remember a node and inject something
+//! next to it.
+//!
+//! ```
+//! static void example(x86::Builder& builder) {
+//!   // Emit something, after it returns the cursor would point at the last
+//!   // emitted node.
+//!   builder.mov(x86::rax, x86::rdx); // [1]
+//!
+//!   // We can retrieve the node.
+//!   BaseNode* node = builder.cursor();
+//!
+//!   // Change the instruction we just emitted, just for fun...
+//!   if (node->isInst()) {
+//!     InstNode* inst = node->as<InstNode>();
+//!     // Changes the operands at index [1] to RCX.
+//!     inst->setOp(1, x86::rcx);
+//!   }
+//!
+//!   // ------------------------- Generate Some Code -------------------------
+//!   builder.add(x86::rax, x86::rdx); // [2]
+//!   builder.shr(x86::rax, 3);        // [3]
+//!   // ----------------------------------------------------------------------
+//!
+//!   // Now, we know where our node is, and we can simply change the cursor
+//!   // and start emitting something after it. The setCursor() function
+//!   // returns the previous cursor, and it's always a good practice to remember
+//!   // it, because you never know if you are not already injecting the code
+//!   // somewhere else...
+//!   BaseNode* oldCursor = builder.setCursor(node);
+//!
+//!   builder.mul(x86::rax, 8);        // [4]
+//!
+//!   // Restore the cursor
+//!   builder.setCursor(oldCursor);
+//! }
+//! ```
+//!
+//! The function above would actually emit the following:
+//!
+//! ```
+//! mov rax, rcx ; [1] Patched at the beginning.
+//! mul rax, 8   ; [4] Injected.
+//! add rax, rdx ; [2] Followed [1] initially.
+//! shr rax, 3   ; [3] Follows [2].
+//! ```
+class ASMJIT_VIRTAPI Builder
+  : public BaseBuilder,
+    public EmitterImplicitT<Builder> {
+public:
+  ASMJIT_NONCOPYABLE(Builder)
+  typedef BaseBuilder Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Builder(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Builder() noexcept;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+
+  //! \name Finalize
+  //! \{
+
+  ASMJIT_API Error finalize() override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_BUILDER
+#endif // ASMJIT_X86_X86BUILDER_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86compiler.cpp b/lib/lepton/asmjit/x86/x86compiler.cpp
new file mode 100644
index 0000000000..04d0980707
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86compiler.cpp
@@ -0,0 +1,61 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86) && !defined(ASMJIT_NO_COMPILER)
+
+#include "../x86/x86assembler.h"
+#include "../x86/x86compiler.h"
+#include "../x86/x86instapi_p.h"
+#include "../x86/x86rapass_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::Compiler - Construction & Destruction
+// ==========================================
+
+Compiler::Compiler(CodeHolder* code) noexcept : BaseCompiler() {
+  _archMask = (uint64_t(1) << uint32_t(Arch::kX86)) |
+              (uint64_t(1) << uint32_t(Arch::kX64)) ;
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Compiler::~Compiler() noexcept {}
+
+// x86::Compiler - Events
+// ======================
+
+Error Compiler::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+  Error err = addPassT<X86RAPass>();
+
+  if (ASMJIT_UNLIKELY(err)) {
+    onDetach(code);
+    return err;
+  }
+
+  return kErrorOk;
+}
+
+Error Compiler::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+// x86::Compiler - Finalize
+// ========================
+
+Error Compiler::finalize() {
+  ASMJIT_PROPAGATE(runPasses());
+  Assembler a(_code);
+  a.addEncodingOptions(encodingOptions());
+  a.addDiagnosticOptions(diagnosticOptions());
+  return serializeTo(&a);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86 && !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/x86/x86compiler.h b/lib/lepton/asmjit/x86/x86compiler.h
new file mode 100644
index 0000000000..d89aea0251
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86compiler.h
@@ -0,0 +1,721 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86COMPILER_H_INCLUDED
+#define ASMJIT_X86_X86COMPILER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/type.h"
+#include "../x86/x86emitter.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86/X64 compiler implementation.
+//!
+//! ### Compiler Basics
+//!
+//! The first \ref x86::Compiler example shows how to generate a function that simply returns an integer value. It's
+//! an analogy to the first Assembler example:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef int (*Func)(void);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   cc.addFunc(FuncSignatureT<int>());// Begin a function of `int fn(void)` signature.
+//!
+//!   x86::Gp vReg = cc.newGpd();       // Create a 32-bit general purpose register.
+//!   cc.mov(vReg, 1);                  // Move one to our virtual register `vReg`.
+//!   cc.ret(vReg);                     // Return `vReg` from the function.
+//!
+//!   cc.endFunc();                     // End of the function body.
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   Func fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   int result = fn();                // Execute the generated code.
+//!   printf("%d\n", result);           // Print the resulting "1".
+//!
+//!   rt.release(fn);                   // Explicitly remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! The \ref BaseCompiler::addFunc() and \ref BaseCompiler::endFunc() functions are used to define the function and
+//! its end. Both must be called per function, but the body doesn't have to be generated in sequence. An example of
+//! generating two functions will be shown later. The next example shows more complicated code that contain a loop
+//! and generates a simple memory copy function that uses `uint32_t` items:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef void (*MemCpy32)(uint32_t* dst, const uint32_t* src, size_t count);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   FuncNode* funcNode = cc.addFunc(  // Begin the function of the following signature:
+//!     FuncSignatureT<void,            //   Return value - void      (no return value).
+//!       uint32_t*,                    //   1st argument - uint32_t* (machine reg-size).
+//!       const uint32_t*,              //   2nd argument - uint32_t* (machine reg-size).
+//!       size_t>());                   //   3rd argument - size_t    (machine reg-size).
+//!
+//!   Label L_Loop = cc.newLabel();     // Start of the loop.
+//!   Label L_Exit = cc.newLabel();     // Used to exit early.
+//!
+//!   x86::Gp dst = cc.newIntPtr("dst");// Create `dst` register (destination pointer).
+//!   x86::Gp src = cc.newIntPtr("src");// Create `src` register (source pointer).
+//!   x86::Gp i = cc.newUIntPtr("i");   // Create `i` register (loop counter).
+//!
+//!   funcNode->setArg(0, dst);         // Assign `dst` argument.
+//!   funcNode->setArg(1, src);         // Assign `src` argument.
+//!   funcNode->setArg(2, i);           // Assign `i` argument.
+//!
+//!   cc.test(i, i);                    // Early exit if length is zero.
+//!   cc.jz(L_Exit);
+//!
+//!   cc.bind(L_Loop);                  // Bind the beginning of the loop here.
+//!
+//!   x86::Gp tmp = cc.newInt32("tmp"); // Copy a single dword (4 bytes).
+//!   cc.mov(tmp, x86::dword_ptr(src)); // Load DWORD from [src] address.
+//!   cc.mov(x86::dword_ptr(dst), tmp); // Store DWORD to [dst] address.
+//!
+//!   cc.add(src, 4);                   // Increment `src`.
+//!   cc.add(dst, 4);                   // Increment `dst`.
+//!
+//!   cc.dec(i);                        // Loop until `i` is non-zero.
+//!   cc.jnz(L_Loop);
+//!
+//!   cc.bind(L_Exit);                  // Label used by early exit.
+//!   cc.endFunc();                     // End of the function body.
+//!
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   // Add the generated code to the runtime.
+//!   MemCpy32 memcpy32;
+//!   Error err = rt.add(&memcpy32, &code);
+//!
+//!   // Handle a possible error returned by AsmJit.
+//!   if (err)
+//!     return 1;
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   // Test the generated code.
+//!   uint32_t input[6] = { 1, 2, 3, 5, 8, 13 };
+//!   uint32_t output[6];
+//!   memcpy32(output, input, 6);
+//!
+//!   for (uint32_t i = 0; i < 6; i++)
+//!     printf("%d\n", output[i]);
+//!
+//!   rt.release(memcpy32);
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### AVX and AVX-512
+//!
+//! AVX and AVX-512 code generation must be explicitly enabled via \ref FuncFrame to work properly. If it's not setup
+//! correctly then Prolog & Epilog would use SSE instead of AVX instructions to work with SIMD registers. In addition,
+//! Compiler requires explicitly enable AVX-512 via \ref FuncFrame in order to use all 32 SIMD registers.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef void (*Func)(void*);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   FuncNode* funcNode = cc.addFunc(FuncSignatureT<void, void*>());
+//!
+//!   // Use the following to enable AVX and/or AVX-512.
+//!   funcNode->frame().setAvxEnabled();
+//!   funcNode->frame().setAvx512Enabled();
+//!
+//!   // Do something with the input pointer.
+//!   x86::Gp addr = cc.newIntPtr("addr");
+//!   x86::Zmm vreg = cc.newZmm("vreg");
+//!
+//!   funcNode->setArg(0, addr);
+//!
+//!   cc.vmovdqu32(vreg, x86::ptr(addr));
+//!   cc.vpaddq(vreg, vreg, vreg);
+//!   cc.vmovdqu32(x86::ptr(addr), vreg);
+//!
+//!   cc.endFunc();                     // End of the function body.
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   Func fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   // Execute the generated code and print some output.
+//!   uint64_t data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+//!   fn(data);
+//!   printf("%llu\n", (unsigned long long)data[0]);
+//!
+//!   rt.release(fn);                   // Explicitly remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Recursive Functions
+//!
+//! It's possible to create more functions by using the same \ref x86::Compiler instance and make links between them.
+//! In such case it's important to keep the pointer to \ref FuncNode.
+//!
+//! The example below creates a simple Fibonacci function that calls itself recursively:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef uint32_t (*Fibonacci)(uint32_t x);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   FuncNode* funcNode = cc.addFunc(  // Begin of the Fibonacci function, addFunc()
+//!     FuncSignatureT<int, int>());    // Returns a pointer to the FuncNode node.
+//!
+//!   Label L_Exit = cc.newLabel()      // Exit label.
+//!   x86::Gp x = cc.newUInt32();       // Function x argument.
+//!   x86::Gp y = cc.newUInt32();       // Temporary.
+//!
+//!   funcNode->setArg(0, x);
+//!
+//!   cc.cmp(x, 3);                     // Return x if less than 3.
+//!   cc.jb(L_Exit);
+//!
+//!   cc.mov(y, x);                     // Make copy of the original x.
+//!   cc.dec(x);                        // Decrease x.
+//!
+//!   InvokeNode* invokeNode;           // Function invocation:
+//!   cc.invoke(&invokeNode,            //   - InvokeNode (output).
+//!     funcNode->label(),              //   - Function address or Label.
+//!     FuncSignatureT<int, int>());    //   - Function signature.
+//!
+//!   invokeNode->setArg(0, x);         // Assign x as the first argument.
+//!   invokeNode->setRet(0, x);         // Assign x as a return value as well.
+//!
+//!   cc.add(x, y);                     // Combine the return value with y.
+//!
+//!   cc.bind(L_Exit);
+//!   cc.ret(x);                        // Return x.
+//!   cc.endFunc();                     // End of the function body.
+//!
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   Fibonacci fib;
+//!   Error err = rt.add(&fib, &code);  // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   // Test the generated code.
+//!   printf("Fib(%u) -> %u\n", 8, fib(8));
+//!
+//!   rt.release(fib);
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Stack Management
+//!
+//! Function's stack-frame is managed automatically, which is used by the register allocator to spill virtual
+//! registers. It also provides an interface to allocate user-defined block of the stack, which can be used as
+//! a temporary storage by the generated function. In the following example a stack of 256 bytes size is allocated,
+//! filled by bytes starting from 0 to 255 and then iterated again to sum all the values.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef int (*Func)(void);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   cc.addFunc(FuncSignatureT<int>());// Create a function that returns int.
+//!
+//!   x86::Gp p = cc.newIntPtr("p");
+//!   x86::Gp i = cc.newIntPtr("i");
+//!
+//!   // Allocate 256 bytes on the stack aligned to 4 bytes.
+//!   x86::Mem stack = cc.newStack(256, 4);
+//!
+//!   x86::Mem stackIdx(stack);         // Copy of stack with i added.
+//!   stackIdx.setIndex(i);             // stackIdx <- stack[i].
+//!   stackIdx.setSize(1);              // stackIdx <- byte ptr stack[i].
+//!
+//!   // Load a stack address to `p`. This step is purely optional and shows
+//!   // that `lea` is useful to load a memory operands address (even absolute)
+//!   // to a general purpose register.
+//!   cc.lea(p, stack);
+//!
+//!   // Clear i (xor is a C++ keyword, hence 'xor_' is used instead).
+//!   cc.xor_(i, i);
+//!
+//!   Label L1 = cc.newLabel();
+//!   Label L2 = cc.newLabel();
+//!
+//!   cc.bind(L1);                      // First loop, fill the stack.
+//!   cc.mov(stackIdx, i.r8());         // stack[i] = uint8_t(i).
+//!
+//!   cc.inc(i);                        // i++;
+//!   cc.cmp(i, 256);                   // if (i < 256)
+//!   cc.jb(L1);                        //   goto L1;
+//!
+//!   // Second loop, sum all bytes stored in `stack`.
+//!   x86::Gp sum = cc.newInt32("sum");
+//!   x86::Gp val = cc.newInt32("val");
+//!
+//!   cc.xor_(i, i);
+//!   cc.xor_(sum, sum);
+//!
+//!   cc.bind(L2);
+//!
+//!   cc.movzx(val, stackIdx);          // val = uint32_t(stack[i]);
+//!   cc.add(sum, val);                 // sum += val;
+//!
+//!   cc.inc(i);                        // i++;
+//!   cc.cmp(i, 256);                   // if (i < 256)
+//!   cc.jb(L2);                        //   goto L2;
+//!
+//!   cc.ret(sum);                      // Return the `sum` of all values.
+//!   cc.endFunc();                     // End of the function body.
+//!
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   Func func;
+//!   Error err = rt.add(&func, &code); // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   printf("Func() -> %d\n", func()); // Test the generated code.
+//!
+//!   rt.release(func);
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Constant Pool
+//!
+//! Compiler provides two constant pools for a general purpose code generation:
+//!
+//!   - Local constant pool - Part of \ref FuncNode, can be only used by a single function and added after the
+//!     function epilog sequence (after `ret` instruction).
+//!
+//!   - Global constant pool - Part of \ref BaseCompiler, flushed at the end of the generated code by \ref
+//!     BaseEmitter::finalize().
+//!
+//! The example below illustrates how a built-in constant pool can be used:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! static void exampleUseOfConstPool(x86::Compiler& cc) {
+//!   cc.addFunc(FuncSignatureT<int>());
+//!
+//!   x86::Gp v0 = cc.newGpd("v0");
+//!   x86::Gp v1 = cc.newGpd("v1");
+//!
+//!   x86::Mem c0 = cc.newInt32Const(ConstPoolScope::kLocal, 200);
+//!   x86::Mem c1 = cc.newInt32Const(ConstPoolScope::kLocal, 33);
+//!
+//!   cc.mov(v0, c0);
+//!   cc.mov(v1, c1);
+//!   cc.add(v0, v1);
+//!
+//!   cc.ret(v0);
+//!   cc.endFunc();
+//! }
+//! ```
+//!
+//! ### Jump Tables
+//!
+//! x86::Compiler supports `jmp` instruction with reg/mem operand, which is a commonly used pattern to implement
+//! indirect jumps within a function, for example to implement `switch()` statement in a programming languages.
+//! By default AsmJit assumes that every basic block can be a possible jump target as it's unable to deduce targets
+//! from instruction's operands. This is a very pessimistic default that should be avoided if possible as it's costly
+//! and very unfriendly to liveness analysis and register allocation.
+//!
+//! Instead of relying on such pessimistic default behavior, let's use \ref JumpAnnotation to annotate a jump where
+//! all targets are known:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! static void exampleUseOfIndirectJump(x86::Compiler& cc) {
+//!   FuncNode* funcNode = cc.addFunc(FuncSignatureT<float, float, float, uint32_t>(CallConvId::kHost));
+//!
+//!   // Function arguments
+//!   x86::Xmm a = cc.newXmmSs("a");
+//!   x86::Xmm b = cc.newXmmSs("b");
+//!   x86::Gp op = cc.newUInt32("op");
+//!
+//!   x86::Gp target = cc.newIntPtr("target");
+//!   x86::Gp offset = cc.newIntPtr("offset");
+//!
+//!   Label L_Table = cc.newLabel();
+//!   Label L_Add = cc.newLabel();
+//!   Label L_Sub = cc.newLabel();
+//!   Label L_Mul = cc.newLabel();
+//!   Label L_Div = cc.newLabel();
+//!   Label L_End = cc.newLabel();
+//!
+//!   funcNode->setArg(0, a);
+//!   funcNode->setArg(1, b);
+//!   funcNode->setArg(2, op);
+//!
+//!   // Jump annotation is a building block that allows to annotate all possible targets where `jmp()` can
+//!   // jump. It then drives the CFG construction and liveness analysis, which impacts register allocation.
+//!   JumpAnnotation* annotation = cc.newJumpAnnotation();
+//!   annotation->addLabel(L_Add);
+//!   annotation->addLabel(L_Sub);
+//!   annotation->addLabel(L_Mul);
+//!   annotation->addLabel(L_Div);
+//!
+//!   // Most likely not the common indirect jump approach, but it
+//!   // doesn't really matter how final address is calculated. The
+//!   // most important path using JumpAnnotation with `jmp()`.
+//!   cc.lea(offset, x86::ptr(L_Table));
+//!   if (cc.is64Bit())
+//!     cc.movsxd(target, x86::dword_ptr(offset, op.cloneAs(offset), 2));
+//!   else
+//!     cc.mov(target, x86::dword_ptr(offset, op.cloneAs(offset), 2));
+//!   cc.add(target, offset);
+//!   cc.jmp(target, annotation);
+//!
+//!   // Acts like a switch() statement in C.
+//!   cc.bind(L_Add);
+//!   cc.addss(a, b);
+//!   cc.jmp(L_End);
+//!
+//!   cc.bind(L_Sub);
+//!   cc.subss(a, b);
+//!   cc.jmp(L_End);
+//!
+//!   cc.bind(L_Mul);
+//!   cc.mulss(a, b);
+//!   cc.jmp(L_End);
+//!
+//!   cc.bind(L_Div);
+//!   cc.divss(a, b);
+//!
+//!   cc.bind(L_End);
+//!   cc.ret(a);
+//!
+//!   cc.endFunc();
+//!
+//!   // Relative int32_t offsets of `L_XXX - L_Table`.
+//!   cc.bind(L_Table);
+//!   cc.embedLabelDelta(L_Add, L_Table, 4);
+//!   cc.embedLabelDelta(L_Sub, L_Table, 4);
+//!   cc.embedLabelDelta(L_Mul, L_Table, 4);
+//!   cc.embedLabelDelta(L_Div, L_Table, 4);
+//! }
+//! ```
+class ASMJIT_VIRTAPI Compiler
+  : public BaseCompiler,
+    public EmitterExplicitT<Compiler> {
+public:
+  ASMJIT_NONCOPYABLE(Compiler)
+  typedef BaseCompiler Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Compiler(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Compiler() noexcept;
+
+  //! \}
+
+  //! \name Virtual Registers
+  //! \{
+
+#ifndef ASMJIT_NO_LOGGING
+# define ASMJIT_NEW_REG_FMT(OUT, PARAM, FORMAT, ARGS)                         \
+    _newRegFmt(&OUT, PARAM, FORMAT, ARGS)
+#else
+# define ASMJIT_NEW_REG_FMT(OUT, PARAM, FORMAT, ARGS)                         \
+    DebugUtils::unused(FORMAT);                                               \
+    DebugUtils::unused(std::forward<Args>(args)...);                          \
+    _newReg(&OUT, PARAM)
+#endif
+
+#define ASMJIT_NEW_REG_CUSTOM(FUNC, REG)                                      \
+    inline REG FUNC(TypeId typeId) {                                          \
+      REG reg(Globals::NoInit);                                               \
+      _newReg(&reg, typeId);                                                  \
+      return reg;                                                             \
+    }                                                                         \
+                                                                              \
+    template<typename... Args>                                                \
+    inline REG FUNC(TypeId typeId, const char* fmt, Args&&... args) {         \
+      REG reg(Globals::NoInit);                                               \
+      ASMJIT_NEW_REG_FMT(reg, typeId, fmt, std::forward<Args>(args)...);      \
+      return reg;                                                             \
+    }
+
+#define ASMJIT_NEW_REG_TYPED(FUNC, REG, TYPE_ID)                              \
+    inline REG FUNC() {                                                       \
+      REG reg(Globals::NoInit);                                               \
+      _newReg(&reg, TYPE_ID);                                                 \
+      return reg;                                                             \
+    }                                                                         \
+                                                                              \
+    template<typename... Args>                                                \
+    inline REG FUNC(const char* fmt, Args&&... args) {                        \
+      REG reg(Globals::NoInit);                                               \
+      ASMJIT_NEW_REG_FMT(reg, TYPE_ID, fmt, std::forward<Args>(args)...);     \
+      return reg;                                                             \
+    }
+
+  template<typename RegT>
+  inline RegT newSimilarReg(const RegT& ref) {
+    RegT reg(Globals::NoInit);
+    _newReg(reg, ref);
+    return reg;
+  }
+
+  template<typename RegT, typename... Args>
+  inline RegT newSimilarReg(const RegT& ref, const char* fmt, Args&&... args) {
+    RegT reg(Globals::NoInit);
+    ASMJIT_NEW_REG_FMT(reg, ref, fmt, std::forward<Args>(args)...);
+    return reg;
+  }
+
+  ASMJIT_NEW_REG_CUSTOM(newReg    , Reg )
+  ASMJIT_NEW_REG_CUSTOM(newGp     , Gp  )
+  ASMJIT_NEW_REG_CUSTOM(newVec    , Vec )
+  ASMJIT_NEW_REG_CUSTOM(newK      , KReg)
+
+  ASMJIT_NEW_REG_TYPED(newInt8   , Gp  , TypeId::kInt8)
+  ASMJIT_NEW_REG_TYPED(newUInt8  , Gp  , TypeId::kUInt8)
+  ASMJIT_NEW_REG_TYPED(newInt16  , Gp  , TypeId::kInt16)
+  ASMJIT_NEW_REG_TYPED(newUInt16 , Gp  , TypeId::kUInt16)
+  ASMJIT_NEW_REG_TYPED(newInt32  , Gp  , TypeId::kInt32)
+  ASMJIT_NEW_REG_TYPED(newUInt32 , Gp  , TypeId::kUInt32)
+  ASMJIT_NEW_REG_TYPED(newInt64  , Gp  , TypeId::kInt64)
+  ASMJIT_NEW_REG_TYPED(newUInt64 , Gp  , TypeId::kUInt64)
+  ASMJIT_NEW_REG_TYPED(newIntPtr , Gp  , TypeId::kIntPtr)
+  ASMJIT_NEW_REG_TYPED(newUIntPtr, Gp  , TypeId::kUIntPtr)
+
+  ASMJIT_NEW_REG_TYPED(newGpb    , Gp  , TypeId::kUInt8)
+  ASMJIT_NEW_REG_TYPED(newGpw    , Gp  , TypeId::kUInt16)
+  ASMJIT_NEW_REG_TYPED(newGpd    , Gp  , TypeId::kUInt32)
+  ASMJIT_NEW_REG_TYPED(newGpq    , Gp  , TypeId::kUInt64)
+  ASMJIT_NEW_REG_TYPED(newGpz    , Gp  , TypeId::kUIntPtr)
+  ASMJIT_NEW_REG_TYPED(newXmm    , Xmm , TypeId::kInt32x4)
+  ASMJIT_NEW_REG_TYPED(newXmmSs  , Xmm , TypeId::kFloat32x1)
+  ASMJIT_NEW_REG_TYPED(newXmmSd  , Xmm , TypeId::kFloat64x1)
+  ASMJIT_NEW_REG_TYPED(newXmmPs  , Xmm , TypeId::kFloat32x4)
+  ASMJIT_NEW_REG_TYPED(newXmmPd  , Xmm , TypeId::kFloat64x2)
+  ASMJIT_NEW_REG_TYPED(newYmm    , Ymm , TypeId::kInt32x8)
+  ASMJIT_NEW_REG_TYPED(newYmmPs  , Ymm , TypeId::kFloat32x8)
+  ASMJIT_NEW_REG_TYPED(newYmmPd  , Ymm , TypeId::kFloat64x4)
+  ASMJIT_NEW_REG_TYPED(newZmm    , Zmm , TypeId::kInt32x16)
+  ASMJIT_NEW_REG_TYPED(newZmmPs  , Zmm , TypeId::kFloat32x16)
+  ASMJIT_NEW_REG_TYPED(newZmmPd  , Zmm , TypeId::kFloat64x8)
+  ASMJIT_NEW_REG_TYPED(newMm     , Mm  , TypeId::kMmx64)
+  ASMJIT_NEW_REG_TYPED(newKb     , KReg, TypeId::kMask8)
+  ASMJIT_NEW_REG_TYPED(newKw     , KReg, TypeId::kMask16)
+  ASMJIT_NEW_REG_TYPED(newKd     , KReg, TypeId::kMask32)
+  ASMJIT_NEW_REG_TYPED(newKq     , KReg, TypeId::kMask64)
+
+#undef ASMJIT_NEW_REG_TYPED
+#undef ASMJIT_NEW_REG_CUSTOM
+#undef ASMJIT_NEW_REG_FMT
+
+  //! \}
+
+  //! \name Stack
+  //! \{
+
+  //! Creates a new memory chunk allocated on the current function's stack.
+  inline Mem newStack(uint32_t size, uint32_t alignment, const char* name = nullptr) {
+    Mem m(Globals::NoInit);
+    _newStack(&m, size, alignment, name);
+    return m;
+  }
+
+  //! \}
+
+  //! \name Constants
+  //! \{
+
+  //! Put data to a constant-pool and get a memory reference to it.
+  inline Mem newConst(ConstPoolScope scope, const void* data, size_t size) {
+    Mem m(Globals::NoInit);
+    _newConst(&m, scope, data, size);
+    return m;
+  }
+
+  //! Put a BYTE `val` to a constant-pool.
+  inline Mem newByteConst(ConstPoolScope scope, uint8_t val) noexcept { return newConst(scope, &val, 1); }
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newWordConst(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newDWordConst(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newQWordConst(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
+
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newInt16Const(ConstPoolScope scope, int16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newUInt16Const(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newInt32Const(ConstPoolScope scope, int32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newUInt32Const(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newInt64Const(ConstPoolScope scope, int64_t val) noexcept { return newConst(scope, &val, 8); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newUInt64Const(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
+
+  //! Put a SP-FP `val` to a constant-pool.
+  inline Mem newFloatConst(ConstPoolScope scope, float val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DP-FP `val` to a constant-pool.
+  inline Mem newDoubleConst(ConstPoolScope scope, double val) noexcept { return newConst(scope, &val, 8); }
+
+  //! \}
+
+  //! \name Instruction Options
+  //! \{
+
+  //! Force the compiler to not follow the conditional or unconditional jump.
+  inline Compiler& unfollow() noexcept { addInstOptions(InstOptions::kUnfollow); return *this; }
+  //! Tell the compiler that the destination variable will be overwritten.
+  inline Compiler& overwrite() noexcept { addInstOptions(InstOptions::kOverwrite); return *this; }
+
+  //! \}
+
+  //! \name Function Call & Ret Intrinsics
+  //! \{
+
+  //! Invoke a function call without `target` type enforcement.
+  inline Error invoke_(InvokeNode** out, const Operand_& target, const FuncSignature& signature) {
+    return addInvokeNode(out, Inst::kIdCall, target, signature);
+  }
+
+  //! Invoke a function call of the given `target` and `signature` and store the added node to `out`.
+  //!
+  //! Creates a new \ref InvokeNode, initializes all the necessary members to match the given function `signature`,
+  //! adds the node to the compiler, and stores its pointer to `out`. The operation is atomic, if anything fails
+  //! nullptr is stored in `out` and error code is returned.
+  inline Error invoke(InvokeNode** out, const Gp& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Mem& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Label& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Imm& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, uint64_t target, const FuncSignature& signature) { return invoke_(out, Imm(int64_t(target)), signature); }
+
+  //! Return from function.
+  inline Error ret() { return addRet(Operand(), Operand()); }
+  //! \overload
+  inline Error ret(const BaseReg& o0) { return addRet(o0, Operand()); }
+  //! \overload
+  inline Error ret(const BaseReg& o0, const BaseReg& o1) { return addRet(o0, o1); }
+
+  //! \}
+
+  //! \name Jump Tables Support
+  //! \{
+
+  using EmitterExplicitT<Compiler>::jmp;
+
+  //! Adds a jump to the given `target` with the provided jump `annotation`.
+  inline Error jmp(const BaseReg& target, JumpAnnotation* annotation) { return emitAnnotatedJump(Inst::kIdJmp, target, annotation); }
+  //! \overload
+  inline Error jmp(const BaseMem& target, JumpAnnotation* annotation) { return emitAnnotatedJump(Inst::kIdJmp, target, annotation); }
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+
+  //! \name Finalize
+  //! \{
+
+  ASMJIT_API Error finalize() override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_X86_X86COMPILER_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86emithelper.cpp b/lib/lepton/asmjit/x86/x86emithelper.cpp
new file mode 100644
index 0000000000..b541c048b0
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86emithelper.cpp
@@ -0,0 +1,619 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../core/formatter.h"
+#include "../core/funcargscontext_p.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../core/radefs_p.h"
+#include "../x86/x86emithelper_p.h"
+#include "../x86/x86emitter.h"
+#include "../x86/x86formatter_p.h"
+#include "../x86/x86instapi_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::EmitHelper - Utilities
+// ===========================
+
+static inline uint32_t getXmmMovInst(const FuncFrame& frame) {
+  bool avx = frame.isAvxEnabled();
+  bool aligned = frame.hasAlignedVecSR();
+
+  return aligned ? (avx ? Inst::kIdVmovaps : Inst::kIdMovaps)
+                 : (avx ? Inst::kIdVmovups : Inst::kIdMovups);
+}
+
+//! Converts `size` to a 'kmov?' instruction.
+static inline uint32_t kmovInstFromSize(uint32_t size) noexcept {
+  switch (size) {
+    case  1: return Inst::kIdKmovb;
+    case  2: return Inst::kIdKmovw;
+    case  4: return Inst::kIdKmovd;
+    case  8: return Inst::kIdKmovq;
+    default: return Inst::kIdNone;
+  }
+}
+
+static inline uint32_t makeCastOp(TypeId dst, TypeId src) noexcept {
+  return (uint32_t(dst) << 8) | uint32_t(src);
+}
+
+// x86::EmitHelper - Emit Reg Move
+// ===============================
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitRegMove(
+  const Operand_& dst_,
+  const Operand_& src_, TypeId typeId, const char* comment) {
+
+  // Invalid or abstract TypeIds are not allowed.
+  ASMJIT_ASSERT(TypeUtils::isValid(typeId) && !TypeUtils::isAbstract(typeId));
+
+  Operand dst(dst_);
+  Operand src(src_);
+
+  InstId instId = Inst::kIdNone;
+  uint32_t memFlags = 0;
+  uint32_t overrideMemSize = 0;
+
+  enum MemFlags : uint32_t {
+    kDstMem = 0x1,
+    kSrcMem = 0x2
+  };
+
+  // Detect memory operands and patch them to have the same size as the register. BaseCompiler always sets memory size
+  // of allocs and spills, so it shouldn't be really necessary, however, after this function was separated from Compiler
+  // it's better to make sure that the size is always specified, as we can use 'movzx' and 'movsx' that rely on it.
+  if (dst.isMem()) { memFlags |= kDstMem; dst.as<Mem>().setSize(src.size()); }
+  if (src.isMem()) { memFlags |= kSrcMem; src.as<Mem>().setSize(dst.size()); }
+
+  switch (typeId) {
+    case TypeId::kInt8:
+    case TypeId::kUInt8:
+    case TypeId::kInt16:
+    case TypeId::kUInt16:
+      // Special case - 'movzx' load.
+      if (memFlags & kSrcMem) {
+        instId = Inst::kIdMovzx;
+        dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+
+      if (!memFlags) {
+        // Change both destination and source registers to GPD (safer, no dependencies).
+        dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case TypeId::kInt32:
+    case TypeId::kUInt32:
+    case TypeId::kInt64:
+    case TypeId::kUInt64:
+      instId = Inst::kIdMov;
+      break;
+
+    case TypeId::kMmx32:
+      instId = Inst::kIdMovd;
+      if (memFlags) break;
+      ASMJIT_FALLTHROUGH;
+
+    case TypeId::kMmx64 : instId = Inst::kIdMovq ; break;
+    case TypeId::kMask8 : instId = Inst::kIdKmovb; break;
+    case TypeId::kMask16: instId = Inst::kIdKmovw; break;
+    case TypeId::kMask32: instId = Inst::kIdKmovd; break;
+    case TypeId::kMask64: instId = Inst::kIdKmovq; break;
+
+    default: {
+      TypeId scalarTypeId = TypeUtils::scalarOf(typeId);
+      if (TypeUtils::isVec32(typeId) && memFlags) {
+        overrideMemSize = 4;
+        if (scalarTypeId == TypeId::kFloat32)
+          instId = _avxEnabled ? Inst::kIdVmovss : Inst::kIdMovss;
+        else
+          instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
+        break;
+      }
+
+      if (TypeUtils::isVec64(typeId) && memFlags) {
+        overrideMemSize = 8;
+        if (scalarTypeId == TypeId::kFloat64)
+          instId = _avxEnabled ? Inst::kIdVmovsd : Inst::kIdMovsd;
+        else
+          instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
+        break;
+      }
+
+      if (scalarTypeId == TypeId::kFloat32)
+        instId = _avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
+      else if (scalarTypeId == TypeId::kFloat64)
+        instId = _avxEnabled ? Inst::kIdVmovapd : Inst::kIdMovapd;
+      else if (!_avx512Enabled)
+        instId = _avxEnabled ? Inst::kIdVmovdqa : Inst::kIdMovdqa;
+      else
+        instId = Inst::kIdVmovdqa32;
+      break;
+    }
+  }
+
+  if (!instId)
+    return DebugUtils::errored(kErrorInvalidState);
+
+  if (overrideMemSize) {
+    if (dst.isMem()) dst.as<Mem>().setSize(overrideMemSize);
+    if (src.isMem()) src.as<Mem>().setSize(overrideMemSize);
+  }
+
+  _emitter->setInlineComment(comment);
+  return _emitter->emit(instId, dst, src);
+}
+
+// x86::EmitHelper - Emit Arg Move
+// ===============================
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitArgMove(
+  const BaseReg& dst_, TypeId dstTypeId,
+  const Operand_& src_, TypeId srcTypeId, const char* comment) {
+
+  // Deduce optional `dstTypeId`, which may be `TypeId::kVoid` in some cases.
+  if (dstTypeId == TypeId::kVoid) {
+    const ArchTraits& archTraits = ArchTraits::byArch(_emitter->arch());
+    dstTypeId = archTraits.regTypeToTypeId(dst_.type());
+  }
+
+  // Invalid or abstract TypeIds are not allowed.
+  ASMJIT_ASSERT(TypeUtils::isValid(dstTypeId) && !TypeUtils::isAbstract(dstTypeId));
+  ASMJIT_ASSERT(TypeUtils::isValid(srcTypeId) && !TypeUtils::isAbstract(srcTypeId));
+
+  Reg dst(dst_.as<Reg>());
+  Operand src(src_);
+
+  uint32_t dstSize = TypeUtils::sizeOf(dstTypeId);
+  uint32_t srcSize = TypeUtils::sizeOf(srcTypeId);
+
+  InstId instId = Inst::kIdNone;
+
+  // Not a real loop, just 'break' is nicer than 'goto'.
+  for (;;) {
+    if (TypeUtils::isInt(dstTypeId)) {
+      if (TypeUtils::isInt(srcTypeId)) {
+        instId = Inst::kIdMovsx;
+        uint32_t castOp = makeCastOp(dstTypeId, srcTypeId);
+
+        // Sign extend by using 'movsx'.
+        if (castOp == makeCastOp(TypeId::kInt16, TypeId::kInt8 ) ||
+            castOp == makeCastOp(TypeId::kInt32, TypeId::kInt8 ) ||
+            castOp == makeCastOp(TypeId::kInt32, TypeId::kInt16) ||
+            castOp == makeCastOp(TypeId::kInt64, TypeId::kInt8 ) ||
+            castOp == makeCastOp(TypeId::kInt64, TypeId::kInt16))
+          break;
+
+        // Sign extend by using 'movsxd'.
+        instId = Inst::kIdMovsxd;
+        if (castOp == makeCastOp(TypeId::kInt64, TypeId::kInt32))
+          break;
+      }
+
+      if (TypeUtils::isInt(srcTypeId) || src_.isMem()) {
+        // Zero extend by using 'movzx' or 'mov'.
+        if (dstSize <= 4 && srcSize < 4) {
+          instId = Inst::kIdMovzx;
+          dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        }
+        else {
+          // We should have caught all possibilities where `srcSize` is less than 4, so we don't have to worry
+          // about 'movzx' anymore. Minimum size is enough to determine if we want 32-bit or 64-bit move.
+          instId = Inst::kIdMov;
+          srcSize = Support::min(srcSize, dstSize);
+
+          dst.setSignature(srcSize == 4 ? Reg::signatureOfT<RegType::kX86_Gpd>()
+                                        : Reg::signatureOfT<RegType::kX86_Gpq>());
+          if (src.isReg())
+            src.setSignature(dst.signature());
+        }
+        break;
+      }
+
+      // NOTE: The previous branch caught all memory sources, from here it's always register to register conversion,
+      // so catch the remaining cases.
+      srcSize = Support::min(srcSize, dstSize);
+
+      if (TypeUtils::isMmx(srcTypeId)) {
+        // 64-bit move.
+        instId = Inst::kIdMovq;
+        if (srcSize == 8)
+          break;
+
+        // 32-bit move.
+        instId = Inst::kIdMovd;
+        dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+
+      if (TypeUtils::isMask(srcTypeId)) {
+        instId = kmovInstFromSize(srcSize);
+        dst.setSignature(srcSize <= 4 ? Reg::signatureOfT<RegType::kX86_Gpd>()
+                                      : Reg::signatureOfT<RegType::kX86_Gpq>());
+        break;
+      }
+
+      if (TypeUtils::isVec(srcTypeId)) {
+        // 64-bit move.
+        instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
+        if (srcSize == 8)
+          break;
+
+        // 32-bit move.
+        instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
+        dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+    }
+
+    if (TypeUtils::isMmx(dstTypeId)) {
+      instId = Inst::kIdMovq;
+      srcSize = Support::min(srcSize, dstSize);
+
+      if (TypeUtils::isInt(srcTypeId) || src.isMem()) {
+        // 64-bit move.
+        if (srcSize == 8)
+          break;
+
+        // 32-bit move.
+        instId = Inst::kIdMovd;
+        if (src.isReg())
+          src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+
+      if (TypeUtils::isMmx(srcTypeId))
+        break;
+
+      // This will hurt if AVX is enabled.
+      instId = Inst::kIdMovdq2q;
+      if (TypeUtils::isVec(srcTypeId))
+        break;
+    }
+
+    if (TypeUtils::isMask(dstTypeId)) {
+      srcSize = Support::min(srcSize, dstSize);
+
+      if (TypeUtils::isInt(srcTypeId) || TypeUtils::isMask(srcTypeId) || src.isMem()) {
+        instId = kmovInstFromSize(srcSize);
+        if (Reg::isGp(src) && srcSize <= 4)
+          src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+    }
+
+    if (TypeUtils::isVec(dstTypeId)) {
+      // By default set destination to XMM, will be set to YMM|ZMM if needed.
+      dst.setSignature(Reg::signatureOfT<RegType::kX86_Xmm>());
+
+      // This will hurt if AVX is enabled.
+      if (Reg::isMm(src)) {
+        // 64-bit move.
+        instId = Inst::kIdMovq2dq;
+        break;
+      }
+
+      // Argument conversion.
+      TypeId dstScalarId = TypeUtils::scalarOf(dstTypeId);
+      TypeId srcScalarId = TypeUtils::scalarOf(srcTypeId);
+
+      if (dstScalarId == TypeId::kFloat32 && srcScalarId == TypeId::kFloat64) {
+        srcSize = Support::min(dstSize * 2, srcSize);
+        dstSize = srcSize / 2;
+
+        if (srcSize <= 8)
+          instId = _avxEnabled ? Inst::kIdVcvtss2sd : Inst::kIdCvtss2sd;
+        else
+          instId = _avxEnabled ? Inst::kIdVcvtps2pd : Inst::kIdCvtps2pd;
+
+        if (dstSize == 32)
+          dst.setSignature(Reg::signatureOfT<RegType::kX86_Ymm>());
+        if (src.isReg())
+          src.setSignature(Reg::signatureOfVecBySize(srcSize));
+        break;
+      }
+
+      if (dstScalarId == TypeId::kFloat64 && srcScalarId == TypeId::kFloat32) {
+        srcSize = Support::min(dstSize, srcSize * 2) / 2;
+        dstSize = srcSize * 2;
+
+        if (srcSize <= 4)
+          instId = _avxEnabled ? Inst::kIdVcvtsd2ss : Inst::kIdCvtsd2ss;
+        else
+          instId = _avxEnabled ? Inst::kIdVcvtpd2ps : Inst::kIdCvtpd2ps;
+
+        dst.setSignature(Reg::signatureOfVecBySize(dstSize));
+        if (src.isReg() && srcSize >= 32)
+          src.setSignature(Reg::signatureOfT<RegType::kX86_Ymm>());
+        break;
+      }
+
+      srcSize = Support::min(srcSize, dstSize);
+      if (Reg::isGp(src) || src.isMem()) {
+        // 32-bit move.
+        if (srcSize <= 4) {
+          instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
+          if (src.isReg())
+            src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+          break;
+        }
+
+        // 64-bit move.
+        if (srcSize == 8) {
+          instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
+          break;
+        }
+      }
+
+      if (Reg::isVec(src) || src.isMem()) {
+        instId = _avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
+
+        if (src.isMem() && srcSize < _emitter->environment().stackAlignment())
+          instId = _avxEnabled ? Inst::kIdVmovups : Inst::kIdMovups;
+
+        OperandSignature signature = Reg::signatureOfVecBySize(srcSize);
+        dst.setSignature(signature);
+        if (src.isReg())
+          src.setSignature(signature);
+        break;
+      }
+    }
+
+    return DebugUtils::errored(kErrorInvalidState);
+  }
+
+  if (src.isMem())
+    src.as<Mem>().setSize(srcSize);
+
+  _emitter->setInlineComment(comment);
+  return _emitter->emit(instId, dst, src);
+}
+
+Error EmitHelper::emitRegSwap(
+  const BaseReg& a,
+  const BaseReg& b, const char* comment) {
+
+  if (a.isGp() && b.isGp()) {
+    _emitter->setInlineComment(comment);
+    return _emitter->emit(Inst::kIdXchg, a, b);
+  }
+  else
+    return DebugUtils::errored(kErrorInvalidState);
+}
+
+// x86::EmitHelper - Emit Prolog & Epilog
+// ======================================
+
+static inline void X86Internal_setupSaveRestoreInfo(RegGroup group, const FuncFrame& frame, Reg& xReg, uint32_t& xInst, uint32_t& xSize) noexcept {
+  switch (group) {
+    case RegGroup::kVec:
+      xReg = xmm(0);
+      xInst = getXmmMovInst(frame);
+      xSize = xReg.size();
+      break;
+    case RegGroup::kX86_K:
+      xReg = k(0);
+      xInst = Inst::kIdKmovq;
+      xSize = xReg.size();
+      break;
+    case RegGroup::kX86_MM:
+      xReg = mm(0);
+      xInst = Inst::kIdMovq;
+      xSize = xReg.size();
+      break;
+    default:
+      break;
+  }
+}
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitProlog(const FuncFrame& frame) {
+  Emitter* emitter = _emitter->as<Emitter>();
+  uint32_t gpSaved = frame.savedRegs(RegGroup::kGp);
+
+  Gp zsp = emitter->zsp();   // ESP|RSP register.
+  Gp zbp = emitter->zbp();   // EBP|RBP register.
+  Gp gpReg = zsp;            // General purpose register (temporary).
+  Gp saReg = zsp;            // Stack-arguments base pointer.
+
+  // Emit: 'push zbp'
+  //       'mov  zbp, zsp'.
+  if (frame.hasPreservedFP()) {
+    gpSaved &= ~Support::bitMask(Gp::kIdBp);
+    ASMJIT_PROPAGATE(emitter->push(zbp));
+    ASMJIT_PROPAGATE(emitter->mov(zbp, zsp));
+  }
+
+  // Emit: 'push gp' sequence.
+  {
+    Support::BitWordIterator<RegMask> it(gpSaved);
+    while (it.hasNext()) {
+      gpReg.setId(it.next());
+      ASMJIT_PROPAGATE(emitter->push(gpReg));
+    }
+  }
+
+  // Emit: 'mov saReg, zsp'.
+  uint32_t saRegId = frame.saRegId();
+  if (saRegId != BaseReg::kIdBad && saRegId != Gp::kIdSp) {
+    saReg.setId(saRegId);
+    if (frame.hasPreservedFP()) {
+      if (saRegId != Gp::kIdBp)
+        ASMJIT_PROPAGATE(emitter->mov(saReg, zbp));
+    }
+    else {
+      ASMJIT_PROPAGATE(emitter->mov(saReg, zsp));
+    }
+  }
+
+  // Emit: 'and zsp, StackAlignment'.
+  if (frame.hasDynamicAlignment()) {
+    ASMJIT_PROPAGATE(emitter->and_(zsp, -int32_t(frame.finalStackAlignment())));
+  }
+
+  // Emit: 'sub zsp, StackAdjustment'.
+  if (frame.hasStackAdjustment()) {
+    ASMJIT_PROPAGATE(emitter->sub(zsp, frame.stackAdjustment()));
+  }
+
+  // Emit: 'mov [zsp + DAOffset], saReg'.
+  if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
+    Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
+    ASMJIT_PROPAGATE(emitter->mov(saMem, saReg));
+  }
+
+  // Emit 'movxxx [zsp + X], {[x|y|z]mm, k}'.
+  {
+    Reg xReg;
+    Mem xBase = ptr(zsp, int32_t(frame.extraRegSaveOffset()));
+
+    uint32_t xInst;
+    uint32_t xSize;
+
+    for (RegGroup group : Support::EnumValues<RegGroup, RegGroup(1), RegGroup::kMaxVirt>{}) {
+      Support::BitWordIterator<RegMask> it(frame.savedRegs(group));
+      if (it.hasNext()) {
+        X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
+        do {
+          xReg.setId(it.next());
+          ASMJIT_PROPAGATE(emitter->emit(xInst, xBase, xReg));
+          xBase.addOffsetLo32(int32_t(xSize));
+        } while (it.hasNext());
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitEpilog(const FuncFrame& frame) {
+  Emitter* emitter = _emitter->as<Emitter>();
+
+  uint32_t i;
+  uint32_t regId;
+
+  uint32_t registerSize = emitter->registerSize();
+  uint32_t gpSaved = frame.savedRegs(RegGroup::kGp);
+
+  Gp zsp = emitter->zsp();   // ESP|RSP register.
+  Gp zbp = emitter->zbp();   // EBP|RBP register.
+  Gp gpReg = emitter->zsp(); // General purpose register (temporary).
+
+  // Don't emit 'pop zbp' in the pop sequence, this case is handled separately.
+  if (frame.hasPreservedFP())
+    gpSaved &= ~Support::bitMask(Gp::kIdBp);
+
+  // Emit 'movxxx {[x|y|z]mm, k}, [zsp + X]'.
+  {
+    Reg xReg;
+    Mem xBase = ptr(zsp, int32_t(frame.extraRegSaveOffset()));
+
+    uint32_t xInst;
+    uint32_t xSize;
+
+    for (RegGroup group : Support::EnumValues<RegGroup, RegGroup(1), RegGroup::kMaxVirt>{}) {
+      Support::BitWordIterator<RegMask> it(frame.savedRegs(group));
+      if (it.hasNext()) {
+        X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
+        do {
+          xReg.setId(it.next());
+          ASMJIT_PROPAGATE(emitter->emit(xInst, xReg, xBase));
+          xBase.addOffsetLo32(int32_t(xSize));
+        } while (it.hasNext());
+      }
+    }
+  }
+
+  // Emit 'emms' and/or 'vzeroupper'.
+  if (frame.hasMmxCleanup()) ASMJIT_PROPAGATE(emitter->emms());
+  if (frame.hasAvxCleanup()) ASMJIT_PROPAGATE(emitter->vzeroupper());
+
+  if (frame.hasPreservedFP()) {
+    // Emit 'mov zsp, zbp' or 'lea zsp, [zbp - x]'
+    int32_t count = int32_t(frame.pushPopSaveSize() - registerSize);
+    if (!count)
+      ASMJIT_PROPAGATE(emitter->mov(zsp, zbp));
+    else
+      ASMJIT_PROPAGATE(emitter->lea(zsp, ptr(zbp, -count)));
+  }
+  else {
+    if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
+      // Emit 'mov zsp, [zsp + DsaSlot]'.
+      Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
+      ASMJIT_PROPAGATE(emitter->mov(zsp, saMem));
+    }
+    else if (frame.hasStackAdjustment()) {
+      // Emit 'add zsp, StackAdjustment'.
+      ASMJIT_PROPAGATE(emitter->add(zsp, int32_t(frame.stackAdjustment())));
+    }
+  }
+
+  // Emit 'pop gp' sequence.
+  if (gpSaved) {
+    i = gpSaved;
+    regId = 16;
+
+    do {
+      regId--;
+      if (i & 0x8000) {
+        gpReg.setId(regId);
+        ASMJIT_PROPAGATE(emitter->pop(gpReg));
+      }
+      i <<= 1;
+    } while (regId != 0);
+  }
+
+  // Emit 'pop zbp'.
+  if (frame.hasPreservedFP())
+    ASMJIT_PROPAGATE(emitter->pop(zbp));
+
+  // Emit 'ret' or 'ret x'.
+  if (frame.hasCalleeStackCleanup())
+    ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet, int(frame.calleeStackCleanup())));
+  else
+    ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet));
+
+  return kErrorOk;
+}
+
+static Error ASMJIT_CDECL Emitter_emitProlog(BaseEmitter* emitter, const FuncFrame& frame) {
+  EmitHelper emitHelper(emitter, frame.isAvxEnabled(), frame.isAvx512Enabled());
+  return emitHelper.emitProlog(frame);
+}
+
+static Error ASMJIT_CDECL Emitter_emitEpilog(BaseEmitter* emitter, const FuncFrame& frame) {
+  EmitHelper emitHelper(emitter, frame.isAvxEnabled(), frame.isAvx512Enabled());
+  return emitHelper.emitEpilog(frame);
+}
+
+static Error ASMJIT_CDECL Emitter_emitArgsAssignment(BaseEmitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args) {
+  EmitHelper emitHelper(emitter, frame.isAvxEnabled(), frame.isAvx512Enabled());
+  return emitHelper.emitArgsAssignment(frame, args);
+}
+
+void assignEmitterFuncs(BaseEmitter* emitter) {
+  emitter->_funcs.emitProlog = Emitter_emitProlog;
+  emitter->_funcs.emitEpilog = Emitter_emitEpilog;
+  emitter->_funcs.emitArgsAssignment = Emitter_emitArgsAssignment;
+
+#ifndef ASMJIT_NO_LOGGING
+  emitter->_funcs.formatInstruction = FormatterInternal::formatInstruction;
+#endif
+
+#ifndef ASMJIT_NO_VALIDATION
+  emitter->_funcs.validate = InstInternal::validate;
+#endif
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
diff --git a/lib/lepton/asmjit/x86/x86emithelper_p.h b/lib/lepton/asmjit/x86/x86emithelper_p.h
new file mode 100644
index 0000000000..e71d9afe7d
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86emithelper_p.h
@@ -0,0 +1,60 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED
+#define ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+
+#include "../core/emithelper_p.h"
+#include "../core/func.h"
+#include "../x86/x86emitter.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+static inline RegType vecTypeIdToRegType(TypeId typeId) noexcept {
+  return uint32_t(typeId) <= uint32_t(TypeId::_kVec128End) ? RegType::kX86_Xmm :
+         uint32_t(typeId) <= uint32_t(TypeId::_kVec256End) ? RegType::kX86_Ymm : RegType::kX86_Zmm;
+}
+
+class EmitHelper : public BaseEmitHelper {
+public:
+  bool _avxEnabled;
+  bool _avx512Enabled;
+
+  inline explicit EmitHelper(BaseEmitter* emitter = nullptr, bool avxEnabled = false, bool avx512Enabled = false) noexcept
+    : BaseEmitHelper(emitter),
+      _avxEnabled(avxEnabled || avx512Enabled),
+      _avx512Enabled(avx512Enabled) {}
+
+  Error emitRegMove(
+    const Operand_& dst_,
+    const Operand_& src_, TypeId typeId, const char* comment = nullptr) override;
+
+  Error emitArgMove(
+    const BaseReg& dst_, TypeId dstTypeId,
+    const Operand_& src_, TypeId srcTypeId, const char* comment = nullptr) override;
+
+  Error emitRegSwap(
+    const BaseReg& a,
+    const BaseReg& b, const char* comment = nullptr) override;
+
+  Error emitProlog(const FuncFrame& frame);
+  Error emitEpilog(const FuncFrame& frame);
+};
+
+void assignEmitterFuncs(BaseEmitter* emitter);
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86emitter.h b/lib/lepton/asmjit/x86/x86emitter.h
new file mode 100644
index 0000000000..1f85dec4fd
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86emitter.h
@@ -0,0 +1,4315 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86EMITTER_H_INCLUDED
+#define ASMJIT_X86_X86EMITTER_H_INCLUDED
+
+#include "../core/emitter.h"
+#include "../core/support.h"
+#include "../x86/x86globals.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+#define ASMJIT_INST_0x(NAME, ID) \
+  inline Error NAME() { return _emitter()->_emitI(Inst::kId##ID); }
+
+#define ASMJIT_INST_1x(NAME, ID, T0) \
+  inline Error NAME(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID, o0); }
+
+#define ASMJIT_INST_1c(NAME, ID, CONV, T0) \
+  inline Error NAME(CondCode cc, const T0& o0) { return _emitter()->_emitI(CONV(cc), o0); } \
+  inline Error NAME##a(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##a, o0); } \
+  inline Error NAME##ae(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ae, o0); } \
+  inline Error NAME##b(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##b, o0); } \
+  inline Error NAME##be(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##be, o0); } \
+  inline Error NAME##c(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##c, o0); } \
+  inline Error NAME##e(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##e, o0); } \
+  inline Error NAME##g(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##g, o0); } \
+  inline Error NAME##ge(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ge, o0); } \
+  inline Error NAME##l(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##l, o0); } \
+  inline Error NAME##le(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##le, o0); } \
+  inline Error NAME##na(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##na, o0); } \
+  inline Error NAME##nae(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nae, o0); } \
+  inline Error NAME##nb(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nb, o0); } \
+  inline Error NAME##nbe(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nbe, o0); } \
+  inline Error NAME##nc(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nc, o0); } \
+  inline Error NAME##ne(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ne, o0); } \
+  inline Error NAME##ng(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ng, o0); } \
+  inline Error NAME##nge(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nge, o0); } \
+  inline Error NAME##nl(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nl, o0); } \
+  inline Error NAME##nle(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nle, o0); } \
+  inline Error NAME##no(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##no, o0); } \
+  inline Error NAME##np(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##np, o0); } \
+  inline Error NAME##ns(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ns, o0); } \
+  inline Error NAME##nz(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nz, o0); } \
+  inline Error NAME##o(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##o, o0); } \
+  inline Error NAME##p(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##p, o0); } \
+  inline Error NAME##pe(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##pe, o0); } \
+  inline Error NAME##po(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##po, o0); } \
+  inline Error NAME##s(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##s, o0); } \
+  inline Error NAME##z(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##z, o0); }
+
+#define ASMJIT_INST_2x(NAME, ID, T0, T1) \
+  inline Error NAME(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID, o0, o1); }
+
+#define ASMJIT_INST_2c(NAME, ID, CONV, T0, T1) \
+  inline Error NAME(CondCode cc, const T0& o0, const T1& o1) { return _emitter()->_emitI(CONV(cc), o0, o1); } \
+  inline Error NAME##a(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##a, o0, o1); } \
+  inline Error NAME##ae(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ae, o0, o1); } \
+  inline Error NAME##b(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##b, o0, o1); } \
+  inline Error NAME##be(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##be, o0, o1); } \
+  inline Error NAME##c(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##c, o0, o1); } \
+  inline Error NAME##e(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##e, o0, o1); } \
+  inline Error NAME##g(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##g, o0, o1); } \
+  inline Error NAME##ge(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ge, o0, o1); } \
+  inline Error NAME##l(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##l, o0, o1); } \
+  inline Error NAME##le(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##le, o0, o1); } \
+  inline Error NAME##na(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##na, o0, o1); } \
+  inline Error NAME##nae(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nae, o0, o1); } \
+  inline Error NAME##nb(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nb, o0, o1); } \
+  inline Error NAME##nbe(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nbe, o0, o1); } \
+  inline Error NAME##nc(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nc, o0, o1); } \
+  inline Error NAME##ne(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ne, o0, o1); } \
+  inline Error NAME##ng(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ng, o0, o1); } \
+  inline Error NAME##nge(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nge, o0, o1); } \
+  inline Error NAME##nl(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nl, o0, o1); } \
+  inline Error NAME##nle(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nle, o0, o1); } \
+  inline Error NAME##no(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##no, o0, o1); } \
+  inline Error NAME##np(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##np, o0, o1); } \
+  inline Error NAME##ns(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ns, o0, o1); } \
+  inline Error NAME##nz(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nz, o0, o1); } \
+  inline Error NAME##o(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##o, o0, o1); } \
+  inline Error NAME##p(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##p, o0, o1); } \
+  inline Error NAME##pe(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##pe, o0, o1); } \
+  inline Error NAME##po(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##po, o0, o1); } \
+  inline Error NAME##s(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##s, o0, o1); } \
+  inline Error NAME##z(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##z, o0, o1); }
+
+#define ASMJIT_INST_3x(NAME, ID, T0, T1, T2) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2); }
+
+#define ASMJIT_INST_4x(NAME, ID, T0, T1, T2, T3) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3); }
+
+#define ASMJIT_INST_5x(NAME, ID, T0, T1, T2, T3, T4) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3, o4); }
+
+#define ASMJIT_INST_6x(NAME, ID, T0, T1, T2, T3, T4, T5) \
+  inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4, const T5& o5) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3, o4, o5); }
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! Emitter (X86 - explicit).
+template<typename This>
+struct EmitterExplicitT {
+  //! \cond
+
+  // These typedefs are used to describe implicit operands passed explicitly.
+  typedef Gp Gp_AL;
+  typedef Gp Gp_AH;
+  typedef Gp Gp_CL;
+  typedef Gp Gp_AX;
+  typedef Gp Gp_DX;
+
+  typedef Gp Gp_EAX;
+  typedef Gp Gp_EBX;
+  typedef Gp Gp_ECX;
+  typedef Gp Gp_EDX;
+
+  typedef Gp Gp_RAX;
+  typedef Gp Gp_RBX;
+  typedef Gp Gp_RCX;
+  typedef Gp Gp_RDX;
+
+  typedef Gp Gp_ZAX;
+  typedef Gp Gp_ZBX;
+  typedef Gp Gp_ZCX;
+  typedef Gp Gp_ZDX;
+
+  typedef Mem DS_ZAX; // ds:[zax]
+  typedef Mem DS_ZDI; // ds:[zdi]
+  typedef Mem ES_ZDI; // es:[zdi]
+  typedef Mem DS_ZSI; // ds:[zsi]
+
+  typedef Xmm XMM0;
+
+  // These two are unfortunately reported by the sanitizer. We know what we do, however, the sanitizer doesn't.
+  // I have tried to use reinterpret_cast instead, but that would generate bad code when compiled by MSC.
+  ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF inline This* _emitter() noexcept { return static_cast<This*>(this); }
+  ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF inline const This* _emitter() const noexcept { return static_cast<const This*>(this); }
+
+  //! \endcond
+
+  //! \name Native Registers
+  //! \{
+
+  //! Returns either GPD or GPQ register of the given `id` depending on the emitter's architecture.
+  inline Gp gpz(uint32_t id) const noexcept { return Gp(_emitter()->_gpSignature, id); }
+
+  inline Gp zax() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdAx); }
+  inline Gp zcx() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdCx); }
+  inline Gp zdx() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdDx); }
+  inline Gp zbx() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdBx); }
+  inline Gp zsp() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdSp); }
+  inline Gp zbp() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdBp); }
+  inline Gp zsi() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdSi); }
+  inline Gp zdi() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdDi); }
+
+  //! \}
+
+  //! \name Native Pointers
+  //! \{
+
+  //! Creates a target dependent pointer of which base register's id is `baseId`.
+  inline Mem ptr_base(uint32_t baseId, int32_t off = 0, uint32_t size = 0) const noexcept {
+    return Mem(OperandSignature::fromOpType(OperandType::kMem) |
+               OperandSignature::fromMemBaseType(_emitter()->_gpSignature.regType()) |
+               OperandSignature::fromSize(size),
+               baseId, 0, off);
+  }
+
+  inline Mem ptr_zax(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdAx, off, size); }
+  inline Mem ptr_zcx(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdCx, off, size); }
+  inline Mem ptr_zdx(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdDx, off, size); }
+  inline Mem ptr_zbx(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdBx, off, size); }
+  inline Mem ptr_zsp(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdSp, off, size); }
+  inline Mem ptr_zbp(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdBp, off, size); }
+  inline Mem ptr_zsi(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdSi, off, size); }
+  inline Mem ptr_zdi(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdDi, off, size); }
+
+  //! Creates an `intptr_t` memory operand depending on the current architecture.
+  inline Mem intptr_ptr(const Gp& base, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(const Gp& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, index, shift, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(const Gp& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, index, shift, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(const Label& base, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(const Label& base, const Gp& index, uint32_t shift, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, index, shift, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(const Label& base, const Vec& index, uint32_t shift, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, index, shift, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(const Rip& rip, int32_t offset = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(rip, offset, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(uint64_t base) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr(uint64_t base, const Gp& index, uint32_t shift = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, index, shift, nativeGpSize);
+  }
+  //! \overload
+  inline Mem intptr_ptr_abs(uint64_t base) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, nativeGpSize, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs));
+  }
+  //! \overload
+  inline Mem intptr_ptr_abs(uint64_t base, const Gp& index, uint32_t shift = 0) const noexcept {
+    uint32_t nativeGpSize = _emitter()->registerSize();
+    return Mem(base, index, shift, nativeGpSize, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel));
+  }
+
+  //! \}
+
+  //! \name Embed
+  //! \{
+
+  //! Embeds 8-bit integer data.
+  inline Error db(uint8_t x, size_t repeatCount = 1) { return _emitter()->embedUInt8(x, repeatCount); }
+  //! Embeds 16-bit integer data.
+  inline Error dw(uint16_t x, size_t repeatCount = 1) { return _emitter()->embedUInt16(x, repeatCount); }
+  //! Embeds 32-bit integer data.
+  inline Error dd(uint32_t x, size_t repeatCount = 1) { return _emitter()->embedUInt32(x, repeatCount); }
+  //! Embeds 64-bit integer data.
+  inline Error dq(uint64_t x, size_t repeatCount = 1) { return _emitter()->embedUInt64(x, repeatCount); }
+
+  //! Adds data in a given structure instance to the CodeBuffer.
+  template<typename T>
+  inline Error dstruct(const T& x) { return _emitter()->embed(&x, uint32_t(sizeof(T))); }
+
+  //! \}
+
+protected:
+  //! \cond
+  inline This& _addInstOptions(InstOptions options) noexcept {
+    _emitter()->addInstOptions(options);
+    return *_emitter();
+  }
+  //! \endcond
+
+public:
+  //! \name Short/Long Form Options
+  //! \{
+
+  //! Force short form of jmp/jcc instruction.
+  inline This& short_() noexcept { return _addInstOptions(InstOptions::kShortForm); }
+  //! Force long form of jmp/jcc instruction.
+  inline This& long_() noexcept { return _addInstOptions(InstOptions::kLongForm); }
+
+  //! \}
+
+  //! \name Encoding Options
+  //! \{
+
+  //! Prefer MOD/RM encoding when both MOD/RM and MOD/MR forms are applicable.
+  inline This& mod_rm() noexcept { return _addInstOptions(InstOptions::kX86_ModRM); }
+
+  //! Prefer MOD/MR encoding when both MOD/RM and MOD/MR forms are applicable.
+  inline This& mod_mr() noexcept { return _addInstOptions(InstOptions::kX86_ModMR); }
+
+  //! \}
+
+  //! \name Prefix Options
+  //! \{
+
+  //! Condition is likely to be taken (has only benefit on P4).
+  inline This& taken() noexcept { return _addInstOptions(InstOptions::kTaken); }
+  //! Condition is unlikely to be taken (has only benefit on P4).
+  inline This& notTaken() noexcept { return _addInstOptions(InstOptions::kNotTaken); }
+
+  //! Use LOCK prefix.
+  inline This& lock() noexcept { return _addInstOptions(InstOptions::kX86_Lock); }
+  //! Use XACQUIRE prefix.
+  inline This& xacquire() noexcept { return _addInstOptions(InstOptions::kX86_XAcquire); }
+  //! Use XRELEASE prefix.
+  inline This& xrelease() noexcept { return _addInstOptions(InstOptions::kX86_XRelease); }
+
+  //! Use BND/REPNE prefix.
+  //!
+  //! \note This is the same as using `repne()` or `repnz()` prefix.
+  inline This& bnd() noexcept { return _addInstOptions(InstOptions::kX86_Repne); }
+
+  //! Use REP/REPZ prefix.
+  //!
+  //! \note This is the same as using `repe()` or `repz()` prefix.
+  inline This& rep(const Gp& zcx) noexcept {
+    _emitter()->_extraReg.init(zcx);
+    return _addInstOptions(InstOptions::kX86_Rep);
+  }
+
+  //! Use REP/REPE prefix.
+  //!
+  //! \note This is the same as using `rep()` or `repz()` prefix.
+  inline This& repe(const Gp& zcx) noexcept { return rep(zcx); }
+
+  //! Use REP/REPE prefix.
+  //!
+  //! \note This is the same as using `rep()` or `repe()` prefix.
+  inline This& repz(const Gp& zcx) noexcept { return rep(zcx); }
+
+  //! Use REPNE prefix.
+  //!
+  //! \note This is the same as using `bnd()` or `repnz()` prefix.
+  inline This& repne(const Gp& zcx) noexcept {
+    _emitter()->_extraReg.init(zcx);
+    return _addInstOptions(InstOptions::kX86_Repne);
+  }
+
+  //! Use REPNE prefix.
+  //!
+  //! \note This is the same as using `bnd()` or `repne()` prefix.
+  inline This& repnz(const Gp& zcx) noexcept { return repne(zcx); }
+
+  //! \}
+
+  //! \name REX Options
+  //! \{
+
+  //! Force REX prefix to be emitted even when it's not needed (X86_64).
+  //!
+  //! \note Don't use when using high 8-bit registers as REX prefix makes them inaccessible and `x86::Assembler`
+  //! would fail to encode such instruction.
+  inline This& rex() noexcept { return _addInstOptions(InstOptions::kX86_Rex); }
+
+  //! Force REX.B prefix (X64) [It exists for special purposes only].
+  inline This& rex_b() noexcept { return _addInstOptions(InstOptions::kX86_OpCodeB); }
+  //! Force REX.X prefix (X64) [It exists for special purposes only].
+  inline This& rex_x() noexcept { return _addInstOptions(InstOptions::kX86_OpCodeX); }
+  //! Force REX.R prefix (X64) [It exists for special purposes only].
+  inline This& rex_r() noexcept { return _addInstOptions(InstOptions::kX86_OpCodeR); }
+  //! Force REX.W prefix (X64) [It exists for special purposes only].
+  inline This& rex_w() noexcept { return _addInstOptions(InstOptions::kX86_OpCodeW); }
+
+  //! \}
+
+  //! \name VEX and EVEX Options
+  //! \{
+
+  //! Use VEX prefix instead of EVEX prefix (useful to select AVX_VNNI instruction instead of AVX512_VNNI).
+  inline This& vex() noexcept { return _addInstOptions(InstOptions::kX86_Vex); }
+  //! Force 3-byte VEX prefix (AVX+).
+  inline This& vex3() noexcept { return _addInstOptions(InstOptions::kX86_Vex3); }
+  //! Force 4-byte EVEX prefix (AVX512+).
+  inline This& evex() noexcept { return _addInstOptions(InstOptions::kX86_Evex); }
+
+  //! \}
+
+  //! \name AVX-512 Options & Masking
+  //! \{
+
+  //! Use masking {k} (AVX512+).
+  inline This& k(const KReg& kreg) noexcept {
+    _emitter()->_extraReg.init(kreg);
+    return *_emitter();
+  }
+
+  //! Use zeroing instead of merging (AVX512+).
+  inline This& z() noexcept { return _addInstOptions(InstOptions::kX86_ZMask); }
+
+  //! Suppress all exceptions (AVX512+).
+  inline This& sae() noexcept { return _addInstOptions(InstOptions::kX86_SAE); }
+  //! Static rounding mode {rn} (round-to-nearest even) and {sae} (AVX512+).
+  inline This& rn_sae() noexcept { return _addInstOptions(InstOptions::kX86_ER | InstOptions::kX86_RN_SAE); }
+  //! Static rounding mode {rd} (round-down, toward -inf) and {sae} (AVX512+).
+  inline This& rd_sae() noexcept { return _addInstOptions(InstOptions::kX86_ER | InstOptions::kX86_RD_SAE); }
+  //! Static rounding mode {ru} (round-up, toward +inf) and {sae} (AVX512+).
+  inline This& ru_sae() noexcept { return _addInstOptions(InstOptions::kX86_ER | InstOptions::kX86_RU_SAE); }
+  //! Static rounding mode {rz} (round-toward-zero, truncate) and {sae} (AVX512+).
+  inline This& rz_sae() noexcept { return _addInstOptions(InstOptions::kX86_ER | InstOptions::kX86_RZ_SAE); }
+
+  //! \}
+
+  //! \name Core Instructions
+  //! \{
+
+  ASMJIT_INST_2x(adc, Adc, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(adc, Adc, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(adc, Adc, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(adc, Adc, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(adc, Adc, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(add, Add, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(add, Add, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(add, Add, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(add, Add, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(add, Add, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(and_, And, Gp, Gp)                                    // ANY
+  ASMJIT_INST_2x(and_, And, Gp, Mem)                                   // ANY
+  ASMJIT_INST_2x(and_, And, Gp, Imm)                                   // ANY
+  ASMJIT_INST_2x(and_, And, Mem, Gp)                                   // ANY
+  ASMJIT_INST_2x(and_, And, Mem, Imm)                                  // ANY
+  ASMJIT_INST_2x(bound, Bound, Gp, Mem)                                // X86
+  ASMJIT_INST_2x(bsf, Bsf, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(bsf, Bsf, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(bsr, Bsr, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(bsr, Bsr, Gp, Mem)                                    // ANY
+  ASMJIT_INST_1x(bswap, Bswap, Gp)                                     // ANY
+  ASMJIT_INST_2x(bt, Bt, Gp, Gp)                                       // ANY
+  ASMJIT_INST_2x(bt, Bt, Gp, Imm)                                      // ANY
+  ASMJIT_INST_2x(bt, Bt, Mem, Gp)                                      // ANY
+  ASMJIT_INST_2x(bt, Bt, Mem, Imm)                                     // ANY
+  ASMJIT_INST_2x(btc, Btc, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(btc, Btc, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(btc, Btc, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(btc, Btc, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(btr, Btr, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(btr, Btr, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(btr, Btr, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(btr, Btr, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(bts, Bts, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(bts, Bts, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(bts, Bts, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(bts, Bts, Mem, Imm)                                   // ANY
+  ASMJIT_INST_1x(cbw, Cbw, Gp_AX)                                      // ANY [EXPLICIT] AX      <- Sign Extend AL
+  ASMJIT_INST_2x(cdq, Cdq, Gp_EDX, Gp_EAX)                             // ANY [EXPLICIT] EDX:EAX <- Sign Extend EAX
+  ASMJIT_INST_1x(cdqe, Cdqe, Gp_EAX)                                   // X64 [EXPLICIT] RAX     <- Sign Extend EAX
+  ASMJIT_INST_2x(cqo, Cqo, Gp_RDX, Gp_RAX)                             // X64 [EXPLICIT] RDX:RAX <- Sign Extend RAX
+  ASMJIT_INST_2x(cwd, Cwd, Gp_DX, Gp_AX)                               // ANY [EXPLICIT] DX:AX   <- Sign Extend AX
+  ASMJIT_INST_1x(cwde, Cwde, Gp_EAX)                                   // ANY [EXPLICIT] EAX     <- Sign Extend AX
+  ASMJIT_INST_1x(call, Call, Gp)                                       // ANY
+  ASMJIT_INST_1x(call, Call, Mem)                                      // ANY
+  ASMJIT_INST_1x(call, Call, Label)                                    // ANY
+  ASMJIT_INST_1x(call, Call, Imm)                                      // ANY
+  ASMJIT_INST_2c(cmov, Cmov, Inst::cmovccFromCond, Gp, Gp)             // CMOV
+  ASMJIT_INST_2c(cmov, Cmov, Inst::cmovccFromCond, Gp, Mem)            // CMOV
+  ASMJIT_INST_2x(cmp, Cmp, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(cmp, Cmp, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(cmp, Cmp, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(cmp, Cmp, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(cmp, Cmp, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(cmps, Cmps, DS_ZSI, ES_ZDI)                           // ANY [EXPLICIT]
+  ASMJIT_INST_3x(cmpxchg, Cmpxchg, Gp, Gp, Gp_ZAX)                     // I486 [EXPLICIT]
+  ASMJIT_INST_3x(cmpxchg, Cmpxchg, Mem, Gp, Gp_ZAX)                    // I486 [EXPLICIT]
+  ASMJIT_INST_5x(cmpxchg16b, Cmpxchg16b, Mem, Gp_RDX, Gp_RAX, Gp_RCX, Gp_RBX); // CMPXCHG16B [EXPLICIT] m == EDX:EAX ? m <- ECX:EBX
+  ASMJIT_INST_5x(cmpxchg8b, Cmpxchg8b, Mem, Gp_EDX, Gp_EAX, Gp_ECX, Gp_EBX);   // CMPXCHG8B  [EXPLICIT] m == RDX:RAX ? m <- RCX:RBX
+  ASMJIT_INST_1x(dec, Dec, Gp)                                         // ANY
+  ASMJIT_INST_1x(dec, Dec, Mem)                                        // ANY
+  ASMJIT_INST_2x(div, Div, Gp, Gp)                                     // ANY [EXPLICIT]  AH[Rem]: AL[Quot] <- AX / r8
+  ASMJIT_INST_2x(div, Div, Gp, Mem)                                    // ANY [EXPLICIT]  AH[Rem]: AL[Quot] <- AX / m8
+  ASMJIT_INST_3x(div, Div, Gp, Gp, Gp)                                 // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / r16|r32|r64
+  ASMJIT_INST_3x(div, Div, Gp, Gp, Mem)                                // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / m16|m32|m64
+  ASMJIT_INST_2x(idiv, Idiv, Gp, Gp)                                   // ANY [EXPLICIT]  AH[Rem]: AL[Quot] <- AX / r8
+  ASMJIT_INST_2x(idiv, Idiv, Gp, Mem)                                  // ANY [EXPLICIT]  AH[Rem]: AL[Quot] <- AX / m8
+  ASMJIT_INST_3x(idiv, Idiv, Gp, Gp, Gp)                               // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / r16|r32|r64
+  ASMJIT_INST_3x(idiv, Idiv, Gp, Gp, Mem)                              // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / m16|m32|m64
+  ASMJIT_INST_2x(imul, Imul, Gp, Gp)                                   // ANY [EXPLICIT] AX <- AL * r8 | ra <- ra * rb
+  ASMJIT_INST_2x(imul, Imul, Gp, Mem)                                  // ANY [EXPLICIT] AX <- AL * m8 | ra <- ra * m16|m32|m64
+  ASMJIT_INST_2x(imul, Imul, Gp, Imm)                                  // ANY
+  ASMJIT_INST_3x(imul, Imul, Gp, Gp, Imm)                              // ANY
+  ASMJIT_INST_3x(imul, Imul, Gp, Mem, Imm)                             // ANY
+  ASMJIT_INST_3x(imul, Imul, Gp, Gp, Gp)                               // ANY [EXPLICIT] xDX:xAX <- xAX * r16|r32|r64
+  ASMJIT_INST_3x(imul, Imul, Gp, Gp, Mem)                              // ANY [EXPLICIT] xDX:xAX <- xAX * m16|m32|m64
+  ASMJIT_INST_1x(inc, Inc, Gp)                                         // ANY
+  ASMJIT_INST_1x(inc, Inc, Mem)                                        // ANY
+  ASMJIT_INST_1c(j, J, Inst::jccFromCond, Label)                       // ANY
+  ASMJIT_INST_1c(j, J, Inst::jccFromCond, Imm)                         // ANY
+  ASMJIT_INST_2x(jecxz, Jecxz, Gp, Label)                              // ANY [EXPLICIT] Short jump if CX/ECX/RCX is zero.
+  ASMJIT_INST_2x(jecxz, Jecxz, Gp, Imm)                                // ANY [EXPLICIT] Short jump if CX/ECX/RCX is zero.
+  ASMJIT_INST_1x(jmp, Jmp, Gp)                                         // ANY
+  ASMJIT_INST_1x(jmp, Jmp, Mem)                                        // ANY
+  ASMJIT_INST_1x(jmp, Jmp, Label)                                      // ANY
+  ASMJIT_INST_1x(jmp, Jmp, Imm)                                        // ANY
+  ASMJIT_INST_2x(lcall, Lcall, Imm, Imm)                               // ANY
+  ASMJIT_INST_1x(lcall, Lcall, Mem)                                    // ANY
+  ASMJIT_INST_2x(lea, Lea, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(ljmp, Ljmp, Imm, Imm)                                 // ANY
+  ASMJIT_INST_1x(ljmp, Ljmp, Mem)                                      // ANY
+  ASMJIT_INST_2x(lods, Lods, Gp_ZAX, DS_ZSI)                           // ANY [EXPLICIT]
+  ASMJIT_INST_2x(loop, Loop, Gp_ZCX, Label)                            // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0.
+  ASMJIT_INST_2x(loop, Loop, Gp_ZCX, Imm)                              // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0.
+  ASMJIT_INST_2x(loope, Loope, Gp_ZCX, Label)                          // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
+  ASMJIT_INST_2x(loope, Loope, Gp_ZCX, Imm)                            // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
+  ASMJIT_INST_2x(loopne, Loopne, Gp_ZCX, Label)                        // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
+  ASMJIT_INST_2x(loopne, Loopne, Gp_ZCX, Imm)                          // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
+  ASMJIT_INST_2x(mov, Mov, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(mov, Mov, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(mov, Mov, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(mov, Mov, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(mov, Mov, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, Gp, CReg)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, CReg, Gp)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, Gp, DReg)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, DReg, Gp)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, Gp, SReg)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, Mem, SReg)                                  // ANY
+  ASMJIT_INST_2x(mov, Mov, SReg, Gp)                                   // ANY
+  ASMJIT_INST_2x(mov, Mov, SReg, Mem)                                  // ANY
+  ASMJIT_INST_2x(movabs, Movabs, Gp, Mem)                              // X64
+  ASMJIT_INST_2x(movabs, Movabs, Gp, Imm)                              // X64
+  ASMJIT_INST_2x(movabs, Movabs, Mem, Gp)                              // X64
+  ASMJIT_INST_2x(movnti, Movnti, Mem, Gp)                              // SSE2
+  ASMJIT_INST_2x(movs, Movs, ES_ZDI, DS_ZSI)                           // ANY [EXPLICIT]
+  ASMJIT_INST_2x(movsx, Movsx, Gp, Gp)                                 // ANY
+  ASMJIT_INST_2x(movsx, Movsx, Gp, Mem)                                // ANY
+  ASMJIT_INST_2x(movsxd, Movsxd, Gp, Gp)                               // X64
+  ASMJIT_INST_2x(movsxd, Movsxd, Gp, Mem)                              // X64
+  ASMJIT_INST_2x(movzx, Movzx, Gp, Gp)                                 // ANY
+  ASMJIT_INST_2x(movzx, Movzx, Gp, Mem)                                // ANY
+  ASMJIT_INST_2x(mul, Mul, Gp_AX, Gp)                                  // ANY [EXPLICIT] AX      <-  AL * r8
+  ASMJIT_INST_2x(mul, Mul, Gp_AX, Mem)                                 // ANY [EXPLICIT] AX      <-  AL * m8
+  ASMJIT_INST_3x(mul, Mul, Gp_ZDX, Gp_ZAX, Gp)                         // ANY [EXPLICIT] xDX:xAX <- xAX * r16|r32|r64
+  ASMJIT_INST_3x(mul, Mul, Gp_ZDX, Gp_ZAX, Mem)                        // ANY [EXPLICIT] xDX:xAX <- xAX * m16|m32|m64
+  ASMJIT_INST_1x(neg, Neg, Gp)                                         // ANY
+  ASMJIT_INST_1x(neg, Neg, Mem)                                        // ANY
+  ASMJIT_INST_0x(nop, Nop)                                             // ANY
+  ASMJIT_INST_1x(nop, Nop, Gp)                                         // ANY
+  ASMJIT_INST_1x(nop, Nop, Mem)                                        // ANY
+  ASMJIT_INST_2x(nop, Nop, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(nop, Nop, Mem, Gp)                                    // ANY
+  ASMJIT_INST_1x(not_, Not, Gp)                                        // ANY
+  ASMJIT_INST_1x(not_, Not, Mem)                                       // ANY
+  ASMJIT_INST_2x(or_, Or, Gp, Gp)                                      // ANY
+  ASMJIT_INST_2x(or_, Or, Gp, Mem)                                     // ANY
+  ASMJIT_INST_2x(or_, Or, Gp, Imm)                                     // ANY
+  ASMJIT_INST_2x(or_, Or, Mem, Gp)                                     // ANY
+  ASMJIT_INST_2x(or_, Or, Mem, Imm)                                    // ANY
+  ASMJIT_INST_1x(pop, Pop, Gp)                                         // ANY
+  ASMJIT_INST_1x(pop, Pop, Mem)                                        // ANY
+  ASMJIT_INST_1x(pop, Pop, SReg);                                      // ANY
+  ASMJIT_INST_0x(popa, Popa)                                           // X86
+  ASMJIT_INST_0x(popad, Popad)                                         // X86
+  ASMJIT_INST_0x(popf, Popf)                                           // ANY
+  ASMJIT_INST_0x(popfd, Popfd)                                         // X86
+  ASMJIT_INST_0x(popfq, Popfq)                                         // X64
+  ASMJIT_INST_1x(push, Push, Gp)                                       // ANY
+  ASMJIT_INST_1x(push, Push, Mem)                                      // ANY
+  ASMJIT_INST_1x(push, Push, SReg)                                     // ANY
+  ASMJIT_INST_1x(push, Push, Imm)                                      // ANY
+  ASMJIT_INST_0x(pusha, Pusha)                                         // X86
+  ASMJIT_INST_0x(pushad, Pushad)                                       // X86
+  ASMJIT_INST_0x(pushf, Pushf)                                         // ANY
+  ASMJIT_INST_0x(pushfd, Pushfd)                                       // X86
+  ASMJIT_INST_0x(pushfq, Pushfq)                                       // X64
+  ASMJIT_INST_2x(rcl, Rcl, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(rcl, Rcl, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(rcl, Rcl, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(rcl, Rcl, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(rcr, Rcr, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(rcr, Rcr, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(rcr, Rcr, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(rcr, Rcr, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(rol, Rol, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(rol, Rol, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(rol, Rol, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(rol, Rol, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(ror, Ror, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(ror, Ror, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(ror, Ror, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(ror, Ror, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(sbb, Sbb, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(sbb, Sbb, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(sbb, Sbb, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(sbb, Sbb, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(sbb, Sbb, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(sal, Sal, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(sal, Sal, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(sal, Sal, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(sal, Sal, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(sar, Sar, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(sar, Sar, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(sar, Sar, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(sar, Sar, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(scas, Scas, Gp_ZAX, ES_ZDI)                           // ANY [EXPLICIT]
+  ASMJIT_INST_1c(set, Set, Inst::setccFromCond, Gp)                    // ANY
+  ASMJIT_INST_1c(set, Set, Inst::setccFromCond, Mem)                   // ANY
+  ASMJIT_INST_2x(shl, Shl, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(shl, Shl, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(shl, Shl, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(shl, Shl, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(shr, Shr, Gp, Gp_CL)                                  // ANY
+  ASMJIT_INST_2x(shr, Shr, Mem, Gp_CL)                                 // ANY
+  ASMJIT_INST_2x(shr, Shr, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(shr, Shr, Mem, Imm)                                   // ANY
+  ASMJIT_INST_3x(shld, Shld, Gp, Gp, Gp_CL)                            // ANY
+  ASMJIT_INST_3x(shld, Shld, Mem, Gp, Gp_CL)                           // ANY
+  ASMJIT_INST_3x(shld, Shld, Gp, Gp, Imm)                              // ANY
+  ASMJIT_INST_3x(shld, Shld, Mem, Gp, Imm)                             // ANY
+  ASMJIT_INST_3x(shrd, Shrd, Gp, Gp, Gp_CL)                            // ANY
+  ASMJIT_INST_3x(shrd, Shrd, Mem, Gp, Gp_CL)                           // ANY
+  ASMJIT_INST_3x(shrd, Shrd, Gp, Gp, Imm)                              // ANY
+  ASMJIT_INST_3x(shrd, Shrd, Mem, Gp, Imm)                             // ANY
+  ASMJIT_INST_2x(stos, Stos, ES_ZDI, Gp_ZAX)                           // ANY [EXPLICIT]
+  ASMJIT_INST_2x(sub, Sub, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(sub, Sub, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(sub, Sub, Gp, Imm)                                    // ANY
+  ASMJIT_INST_2x(sub, Sub, Mem, Gp)                                    // ANY
+  ASMJIT_INST_2x(sub, Sub, Mem, Imm)                                   // ANY
+  ASMJIT_INST_2x(test, Test, Gp, Gp)                                   // ANY
+  ASMJIT_INST_2x(test, Test, Gp, Imm)                                  // ANY
+  ASMJIT_INST_2x(test, Test, Mem, Gp)                                  // ANY
+  ASMJIT_INST_2x(test, Test, Mem, Imm)                                 // ANY
+  ASMJIT_INST_2x(ud0, Ud0, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(ud0, Ud0, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(ud1, Ud1, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(ud1, Ud1, Gp, Mem)                                    // ANY
+  ASMJIT_INST_0x(ud2, Ud2)                                             // ANY
+  ASMJIT_INST_2x(xadd, Xadd, Gp, Gp)                                   // ANY
+  ASMJIT_INST_2x(xadd, Xadd, Mem, Gp)                                  // ANY
+  ASMJIT_INST_2x(xchg, Xchg, Gp, Gp)                                   // ANY
+  ASMJIT_INST_2x(xchg, Xchg, Mem, Gp)                                  // ANY
+  ASMJIT_INST_2x(xchg, Xchg, Gp, Mem)                                  // ANY
+  ASMJIT_INST_2x(xor_, Xor, Gp, Gp)                                    // ANY
+  ASMJIT_INST_2x(xor_, Xor, Gp, Mem)                                   // ANY
+  ASMJIT_INST_2x(xor_, Xor, Gp, Imm)                                   // ANY
+  ASMJIT_INST_2x(xor_, Xor, Mem, Gp)                                   // ANY
+  ASMJIT_INST_2x(xor_, Xor, Mem, Imm)                                  // ANY
+
+  //! \}
+
+  //! \name Deprecated 32-bit Instructions
+  //! \{
+
+  ASMJIT_INST_1x(aaa, Aaa, Gp)                                         // X86 [EXPLICIT]
+  ASMJIT_INST_2x(aad, Aad, Gp, Imm)                                    // X86 [EXPLICIT]
+  ASMJIT_INST_2x(aam, Aam, Gp, Imm)                                    // X86 [EXPLICIT]
+  ASMJIT_INST_1x(aas, Aas, Gp)                                         // X86 [EXPLICIT]
+  ASMJIT_INST_1x(daa, Daa, Gp)                                         // X86 [EXPLICIT]
+  ASMJIT_INST_1x(das, Das, Gp)                                         // X86 [EXPLICIT]
+
+  //! \}
+
+  //! \name ENTER/LEAVE Instructions
+  //! \{
+
+  ASMJIT_INST_2x(enter, Enter, Imm, Imm)                               // ANY
+  ASMJIT_INST_0x(leave, Leave)                                         // ANY
+
+  //! \}
+
+  //! \name IN/OUT Instructions
+  //! \{
+
+  // NOTE: For some reason Doxygen is messed up here and thinks we are in cond.
+  //! \endcond
+
+  ASMJIT_INST_2x(in, In, Gp_ZAX, Imm)                                  // ANY
+  ASMJIT_INST_2x(in, In, Gp_ZAX, Gp_DX)                                // ANY
+  ASMJIT_INST_2x(ins, Ins, ES_ZDI, Gp_DX)                              // ANY
+  ASMJIT_INST_2x(out, Out, Imm, Gp_ZAX)                                // ANY
+  ASMJIT_INST_2x(out, Out, Gp_DX, Gp_ZAX)                              // ANY
+  ASMJIT_INST_2x(outs, Outs, Gp_DX, DS_ZSI)                            // ANY
+
+  //! \}
+
+  //! \name Clear/Set CF/DF Instructions
+  //! \{
+
+  ASMJIT_INST_0x(clc, Clc)                                             // ANY
+  ASMJIT_INST_0x(cld, Cld)                                             // ANY
+  ASMJIT_INST_0x(cmc, Cmc)                                             // ANY
+  ASMJIT_INST_0x(stc, Stc)                                             // ANY
+  ASMJIT_INST_0x(std, Std)                                             // ANY
+
+  //! \}
+
+  //! \name LAHF/SAHF Instructions
+  //! \{
+
+  ASMJIT_INST_1x(lahf, Lahf, Gp_AH)                                    // LAHFSAHF [EXPLICIT] AH <- EFL
+  ASMJIT_INST_1x(sahf, Sahf, Gp_AH)                                    // LAHFSAHF [EXPLICIT] EFL <- AH
+
+  //! \}
+
+  //! \name ADX Instructions
+  //! \{
+
+  ASMJIT_INST_2x(adcx, Adcx, Gp, Gp)                                   // ADX
+  ASMJIT_INST_2x(adcx, Adcx, Gp, Mem)                                  // ADX
+  ASMJIT_INST_2x(adox, Adox, Gp, Gp)                                   // ADX
+  ASMJIT_INST_2x(adox, Adox, Gp, Mem)                                  // ADX
+
+  //! \}
+
+  //! \name LZCNT/POPCNT Instructions
+  //! \{
+
+  ASMJIT_INST_2x(lzcnt, Lzcnt, Gp, Gp)                                 // LZCNT
+  ASMJIT_INST_2x(lzcnt, Lzcnt, Gp, Mem)                                // LZCNT
+  ASMJIT_INST_2x(popcnt, Popcnt, Gp, Gp)                               // POPCNT
+  ASMJIT_INST_2x(popcnt, Popcnt, Gp, Mem)                              // POPCNT
+
+  //! \}
+
+  //! \name BMI Instructions
+  //! \{
+
+  ASMJIT_INST_3x(andn, Andn, Gp, Gp, Gp)                               // BMI
+  ASMJIT_INST_3x(andn, Andn, Gp, Gp, Mem)                              // BMI
+  ASMJIT_INST_3x(bextr, Bextr, Gp, Gp, Gp)                             // BMI
+  ASMJIT_INST_3x(bextr, Bextr, Gp, Mem, Gp)                            // BMI
+  ASMJIT_INST_2x(blsi, Blsi, Gp, Gp)                                   // BMI
+  ASMJIT_INST_2x(blsi, Blsi, Gp, Mem)                                  // BMI
+  ASMJIT_INST_2x(blsmsk, Blsmsk, Gp, Gp)                               // BMI
+  ASMJIT_INST_2x(blsmsk, Blsmsk, Gp, Mem)                              // BMI
+  ASMJIT_INST_2x(blsr, Blsr, Gp, Gp)                                   // BMI
+  ASMJIT_INST_2x(blsr, Blsr, Gp, Mem)                                  // BMI
+  ASMJIT_INST_2x(tzcnt, Tzcnt, Gp, Gp)                                 // BMI
+  ASMJIT_INST_2x(tzcnt, Tzcnt, Gp, Mem)                                // BMI
+
+  //! \}
+
+  //! \name BMI2 Instructions
+  //! \{
+
+  ASMJIT_INST_3x(bzhi, Bzhi, Gp, Gp, Gp)                               // BMI2
+  ASMJIT_INST_3x(bzhi, Bzhi, Gp, Mem, Gp)                              // BMI2
+  ASMJIT_INST_4x(mulx, Mulx, Gp, Gp, Gp, Gp_ZDX)                       // BMI2      [EXPLICIT]
+  ASMJIT_INST_4x(mulx, Mulx, Gp, Gp, Mem, Gp_ZDX)                      // BMI2      [EXPLICIT]
+  ASMJIT_INST_3x(pdep, Pdep, Gp, Gp, Gp)                               // BMI2
+  ASMJIT_INST_3x(pdep, Pdep, Gp, Gp, Mem)                              // BMI2
+  ASMJIT_INST_3x(pext, Pext, Gp, Gp, Gp)                               // BMI2
+  ASMJIT_INST_3x(pext, Pext, Gp, Gp, Mem)                              // BMI2
+  ASMJIT_INST_3x(rorx, Rorx, Gp, Gp, Imm)                              // BMI2
+  ASMJIT_INST_3x(rorx, Rorx, Gp, Mem, Imm)                             // BMI2
+  ASMJIT_INST_3x(sarx, Sarx, Gp, Gp, Gp)                               // BMI2
+  ASMJIT_INST_3x(sarx, Sarx, Gp, Mem, Gp)                              // BMI2
+  ASMJIT_INST_3x(shlx, Shlx, Gp, Gp, Gp)                               // BMI2
+  ASMJIT_INST_3x(shlx, Shlx, Gp, Mem, Gp)                              // BMI2
+  ASMJIT_INST_3x(shrx, Shrx, Gp, Gp, Gp)                               // BMI2
+  ASMJIT_INST_3x(shrx, Shrx, Gp, Mem, Gp)                              // BMI2
+
+  //! \}
+
+  //! \name TBM Instructions
+  //! \{
+
+  ASMJIT_INST_2x(blcfill, Blcfill, Gp, Gp)                             // TBM
+  ASMJIT_INST_2x(blcfill, Blcfill, Gp, Mem)                            // TBM
+  ASMJIT_INST_2x(blci, Blci, Gp, Gp)                                   // TBM
+  ASMJIT_INST_2x(blci, Blci, Gp, Mem)                                  // TBM
+  ASMJIT_INST_2x(blcic, Blcic, Gp, Gp)                                 // TBM
+  ASMJIT_INST_2x(blcic, Blcic, Gp, Mem)                                // TBM
+  ASMJIT_INST_2x(blcmsk, Blcmsk, Gp, Gp)                               // TBM
+  ASMJIT_INST_2x(blcmsk, Blcmsk, Gp, Mem)                              // TBM
+  ASMJIT_INST_2x(blcs, Blcs, Gp, Gp)                                   // TBM
+  ASMJIT_INST_2x(blcs, Blcs, Gp, Mem)                                  // TBM
+  ASMJIT_INST_2x(blsfill, Blsfill, Gp, Gp)                             // TBM
+  ASMJIT_INST_2x(blsfill, Blsfill, Gp, Mem)                            // TBM
+  ASMJIT_INST_2x(blsic, Blsic, Gp, Gp)                                 // TBM
+  ASMJIT_INST_2x(blsic, Blsic, Gp, Mem)                                // TBM
+  ASMJIT_INST_2x(t1mskc, T1mskc, Gp, Gp)                               // TBM
+  ASMJIT_INST_2x(t1mskc, T1mskc, Gp, Mem)                              // TBM
+  ASMJIT_INST_2x(tzmsk, Tzmsk, Gp, Gp)                                 // TBM
+  ASMJIT_INST_2x(tzmsk, Tzmsk, Gp, Mem)                                // TBM
+
+  //! \}
+
+  //! \name CRC32 Instructions (SSE4.2)
+  //! \{
+
+  ASMJIT_INST_2x(crc32, Crc32, Gp, Gp)                                 // SSE4_2
+  ASMJIT_INST_2x(crc32, Crc32, Gp, Mem)                                // SSE4_2
+
+  //! \}
+
+  //! \name MOVBE Instructions
+  //! \{
+
+  ASMJIT_INST_2x(movbe, Movbe, Gp, Mem)                                // MOVBE
+  ASMJIT_INST_2x(movbe, Movbe, Mem, Gp)                                // MOVBE
+
+  //! \}
+
+  //! \name MOVDIRI & MOVDIR64B Instructions
+  //! \{
+
+  ASMJIT_INST_2x(movdiri, Movdiri, Mem, Gp)                            // MOVDIRI
+  ASMJIT_INST_2x(movdir64b, Movdir64b, Mem, Mem)                       // MOVDIR64B
+
+  //! \}
+
+  //! \name MXCSR Instructions (SSE)
+  //! \{
+
+  ASMJIT_INST_1x(ldmxcsr, Ldmxcsr, Mem)                                // SSE
+  ASMJIT_INST_1x(stmxcsr, Stmxcsr, Mem)                                // SSE
+
+  //! \}
+
+  //! \name FENCE Instructions (SSE and SSE2)
+  //! \{
+
+  ASMJIT_INST_0x(lfence, Lfence)                                       // SSE2
+  ASMJIT_INST_0x(mfence, Mfence)                                       // SSE2
+  ASMJIT_INST_0x(sfence, Sfence)                                       // SSE
+
+  //! \}
+
+  //! \name PREFETCH Instructions
+  //! \{
+
+  ASMJIT_INST_1x(prefetch, Prefetch, Mem)                              // 3DNOW
+  ASMJIT_INST_1x(prefetchnta, Prefetchnta, Mem)                        // SSE
+  ASMJIT_INST_1x(prefetcht0, Prefetcht0, Mem)                          // SSE
+  ASMJIT_INST_1x(prefetcht1, Prefetcht1, Mem)                          // SSE
+  ASMJIT_INST_1x(prefetcht2, Prefetcht2, Mem)                          // SSE
+  ASMJIT_INST_1x(prefetchw, Prefetchw, Mem)                            // PREFETCHW
+  ASMJIT_INST_1x(prefetchwt1, Prefetchwt1, Mem)                        // PREFETCHW1
+
+  //! \}
+
+  //! \name CPUID Instruction
+  //! \{
+
+  ASMJIT_INST_4x(cpuid, Cpuid, Gp_EAX, Gp_EBX, Gp_ECX, Gp_EDX)         // I486 [EXPLICIT] EAX:EBX:ECX:EDX <- CPUID[EAX:ECX]
+
+  //! \}
+
+  //! \name CacheLine Instructions
+  //! \{
+
+  ASMJIT_INST_1x(cldemote, Cldemote, Mem)                              // CLDEMOTE
+  ASMJIT_INST_1x(clflush, Clflush, Mem)                                // CLFLUSH
+  ASMJIT_INST_1x(clflushopt, Clflushopt, Mem)                          // CLFLUSH_OPT
+  ASMJIT_INST_1x(clwb, Clwb, Mem)                                      // CLWB
+  ASMJIT_INST_1x(clzero, Clzero, DS_ZAX)                               // CLZERO [EXPLICIT]
+
+  //! \}
+
+  //! \name SERIALIZE Instruction
+  //! \{
+
+  ASMJIT_INST_0x(serialize, Serialize)                                 // SERIALIZE
+
+  //! \}
+
+  //! \name RDPID Instruction
+  //! \{
+
+  ASMJIT_INST_1x(rdpid, Rdpid, Gp)                                     // RDPID
+
+  //! \}
+
+  //! \name RDPRU/RDPKRU Instructions
+  //! \{
+
+  ASMJIT_INST_3x(rdpru, Rdpru, Gp_EDX, Gp_EAX, Gp_ECX)                 // RDPRU     [EXPLICIT] EDX:EAX <- PRU[ECX]
+  ASMJIT_INST_3x(rdpkru, Rdpkru, Gp_EDX, Gp_EAX, Gp_ECX)               // RDPKRU    [EXPLICIT] EDX:EAX <- PKRU[ECX]
+
+  //! \}
+
+  //! \name RDTSC/RDTSCP Instructions
+  //! \{
+
+  ASMJIT_INST_2x(rdtsc, Rdtsc, Gp_EDX, Gp_EAX)                         // RDTSC     [EXPLICIT] EDX:EAX     <- Counter
+  ASMJIT_INST_3x(rdtscp, Rdtscp, Gp_EDX, Gp_EAX, Gp_ECX)               // RDTSCP    [EXPLICIT] EDX:EAX:EXC <- Counter
+
+  //! \}
+
+  //! \name Other User-Mode Instructions
+  //! \{
+
+  ASMJIT_INST_2x(arpl, Arpl, Gp, Gp)                                   // X86
+  ASMJIT_INST_2x(arpl, Arpl, Mem, Gp)                                  // X86
+  ASMJIT_INST_0x(cli, Cli)                                             // ANY
+  ASMJIT_INST_0x(getsec, Getsec)                                       // SMX
+  ASMJIT_INST_1x(int_, Int, Imm)                                       // ANY
+  ASMJIT_INST_0x(int3, Int3)                                           // ANY
+  ASMJIT_INST_0x(into, Into)                                           // ANY
+  ASMJIT_INST_2x(lar, Lar, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(lar, Lar, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(lds, Lds, Gp, Mem)                                    // X86
+  ASMJIT_INST_2x(les, Les, Gp, Mem)                                    // X86
+  ASMJIT_INST_2x(lfs, Lfs, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(lgs, Lgs, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(lsl, Lsl, Gp, Gp)                                     // ANY
+  ASMJIT_INST_2x(lsl, Lsl, Gp, Mem)                                    // ANY
+  ASMJIT_INST_2x(lss, Lss, Gp, Mem)                                    // ANY
+  ASMJIT_INST_0x(pause, Pause)                                         // SSE2
+  ASMJIT_INST_0x(rsm, Rsm)                                             // X86
+  ASMJIT_INST_1x(sgdt, Sgdt, Mem)                                      // ANY
+  ASMJIT_INST_1x(sidt, Sidt, Mem)                                      // ANY
+  ASMJIT_INST_1x(sldt, Sldt, Gp)                                       // ANY
+  ASMJIT_INST_1x(sldt, Sldt, Mem)                                      // ANY
+  ASMJIT_INST_1x(smsw, Smsw, Gp)                                       // ANY
+  ASMJIT_INST_1x(smsw, Smsw, Mem)                                      // ANY
+  ASMJIT_INST_0x(sti, Sti)                                             // ANY
+  ASMJIT_INST_1x(str, Str, Gp)                                         // ANY
+  ASMJIT_INST_1x(str, Str, Mem)                                        // ANY
+  ASMJIT_INST_1x(verr, Verr, Gp)                                       // ANY
+  ASMJIT_INST_1x(verr, Verr, Mem)                                      // ANY
+  ASMJIT_INST_1x(verw, Verw, Gp)                                       // ANY
+  ASMJIT_INST_1x(verw, Verw, Mem)                                      // ANY
+
+  //! \}
+
+  //! \name FSGSBASE Instructions
+  //! \{
+
+  ASMJIT_INST_1x(rdfsbase, Rdfsbase, Gp)                               // FSGSBASE
+  ASMJIT_INST_1x(rdgsbase, Rdgsbase, Gp)                               // FSGSBASE
+  ASMJIT_INST_1x(wrfsbase, Wrfsbase, Gp)                               // FSGSBASE
+  ASMJIT_INST_1x(wrgsbase, Wrgsbase, Gp)                               // FSGSBASE
+
+  //! \}
+
+  //! \name FXSR Instructions
+  //! \{
+
+  ASMJIT_INST_1x(fxrstor, Fxrstor, Mem)                                // FXSR
+  ASMJIT_INST_1x(fxrstor64, Fxrstor64, Mem)                            // FXSR
+  ASMJIT_INST_1x(fxsave, Fxsave, Mem)                                  // FXSR
+  ASMJIT_INST_1x(fxsave64, Fxsave64, Mem)                              // FXSR
+
+  //! \}
+
+  //! \name XSAVE Instructions
+  //! \{
+
+  ASMJIT_INST_3x(xgetbv, Xgetbv, Gp_EDX, Gp_EAX, Gp_ECX)               // XSAVE     [EXPLICIT] EDX:EAX <- XCR[ECX]
+  ASMJIT_INST_3x(xrstor, Xrstor, Mem, Gp_EDX, Gp_EAX)                  // XSAVE     [EXPLICIT]
+  ASMJIT_INST_3x(xrstor64, Xrstor64, Mem, Gp_EDX, Gp_EAX)              // XSAVE+X64 [EXPLICIT]
+  ASMJIT_INST_3x(xrstors, Xrstors, Mem, Gp_EDX, Gp_EAX)                // XSAVE     [EXPLICIT]
+  ASMJIT_INST_3x(xrstors64, Xrstors64, Mem, Gp_EDX, Gp_EAX)            // XSAVE+X64 [EXPLICIT]
+  ASMJIT_INST_3x(xsave, Xsave, Mem, Gp_EDX, Gp_EAX)                    // XSAVE     [EXPLICIT]
+  ASMJIT_INST_3x(xsave64, Xsave64, Mem, Gp_EDX, Gp_EAX)                // XSAVE+X64 [EXPLICIT]
+  ASMJIT_INST_3x(xsavec, Xsavec, Mem, Gp_EDX, Gp_EAX)                  // XSAVE     [EXPLICIT]
+  ASMJIT_INST_3x(xsavec64, Xsavec64, Mem, Gp_EDX, Gp_EAX)              // XSAVE+X64 [EXPLICIT]
+  ASMJIT_INST_3x(xsaveopt, Xsaveopt, Mem, Gp_EDX, Gp_EAX)              // XSAVE     [EXPLICIT]
+  ASMJIT_INST_3x(xsaveopt64, Xsaveopt64, Mem, Gp_EDX, Gp_EAX)          // XSAVE+X64 [EXPLICIT]
+  ASMJIT_INST_3x(xsaves, Xsaves, Mem, Gp_EDX, Gp_EAX)                  // XSAVE     [EXPLICIT]
+  ASMJIT_INST_3x(xsaves64, Xsaves64, Mem, Gp_EDX, Gp_EAX)              // XSAVE+X64 [EXPLICIT]
+
+  //! \}
+
+  //! \name MPX Extensions
+  //! \{
+
+  ASMJIT_INST_2x(bndcl, Bndcl, Bnd, Gp)                                // MPX
+  ASMJIT_INST_2x(bndcl, Bndcl, Bnd, Mem)                               // MPX
+  ASMJIT_INST_2x(bndcn, Bndcn, Bnd, Gp)                                // MPX
+  ASMJIT_INST_2x(bndcn, Bndcn, Bnd, Mem)                               // MPX
+  ASMJIT_INST_2x(bndcu, Bndcu, Bnd, Gp)                                // MPX
+  ASMJIT_INST_2x(bndcu, Bndcu, Bnd, Mem)                               // MPX
+  ASMJIT_INST_2x(bndldx, Bndldx, Bnd, Mem)                             // MPX
+  ASMJIT_INST_2x(bndmk, Bndmk, Bnd, Mem)                               // MPX
+  ASMJIT_INST_2x(bndmov, Bndmov, Bnd, Bnd)                             // MPX
+  ASMJIT_INST_2x(bndmov, Bndmov, Bnd, Mem)                             // MPX
+  ASMJIT_INST_2x(bndmov, Bndmov, Mem, Bnd)                             // MPX
+  ASMJIT_INST_2x(bndstx, Bndstx, Mem, Bnd)                             // MPX
+
+  //! \}
+
+  //! \name MONITORX Instructions
+  //! \{
+
+  ASMJIT_INST_3x(monitorx, Monitorx, Mem, Gp, Gp)                      // MONITORX
+  ASMJIT_INST_3x(mwaitx, Mwaitx, Gp, Gp, Gp)                           // MONITORX
+
+  //! \}
+
+  //! \name MCOMMIT Instruction
+  //! \{
+
+  ASMJIT_INST_0x(mcommit, Mcommit)                                     // MCOMMIT
+
+  //! \}
+
+  //! \name PTWRITE Instruction
+  //! \{
+
+  ASMJIT_INST_1x(ptwrite, Ptwrite, Gp)                                 // PTWRITE
+  ASMJIT_INST_1x(ptwrite, Ptwrite, Mem)                                // PTWRITE
+
+  //! \}
+
+  //! \name ENQCMD Instructions
+  //! \{
+
+  ASMJIT_INST_2x(enqcmd, Enqcmd, Mem, Mem)                             // ENQCMD
+  ASMJIT_INST_2x(enqcmds, Enqcmds, Mem, Mem)                           // ENQCMD
+
+  //! \}
+
+  //! \name WAITPKG Instructions
+  //! \{
+
+  ASMJIT_INST_3x(tpause, Tpause, Gp, Gp, Gp)                           // WAITPKG
+  ASMJIT_INST_1x(umonitor, Umonitor, Mem)                              // WAITPKG
+  ASMJIT_INST_3x(umwait, Umwait, Gp, Gp, Gp)                           // WAITPKG
+
+  //! \}
+
+  //! \name RDRAND & RDSEED Instructions
+  //! \{
+
+  ASMJIT_INST_1x(rdrand, Rdrand, Gp)                                   // RDRAND
+  ASMJIT_INST_1x(rdseed, Rdseed, Gp)                                   // RDSEED
+
+  //! \}
+
+  //! \name LWP Instructions
+  //! \{
+
+  ASMJIT_INST_1x(llwpcb, Llwpcb, Gp)                                   // LWP
+  ASMJIT_INST_3x(lwpins, Lwpins, Gp, Gp, Imm)                          // LWP
+  ASMJIT_INST_3x(lwpins, Lwpins, Gp, Mem, Imm)                         // LWP
+  ASMJIT_INST_3x(lwpval, Lwpval, Gp, Gp, Imm)                          // LWP
+  ASMJIT_INST_3x(lwpval, Lwpval, Gp, Mem, Imm)                         // LWP
+  ASMJIT_INST_1x(slwpcb, Slwpcb, Gp)                                   // LWP
+
+  //! \}
+
+  //! \name RTM & TSX Instructions
+  //! \{
+
+  ASMJIT_INST_1x(xabort, Xabort, Imm)                                  // RTM
+  ASMJIT_INST_1x(xbegin, Xbegin, Label)                                // RTM
+  ASMJIT_INST_1x(xbegin, Xbegin, Imm)                                  // RTM
+  ASMJIT_INST_0x(xend, Xend)                                           // RTM
+  ASMJIT_INST_0x(xtest, Xtest)                                         // TSX
+
+  //! \}
+
+  //! \name TSXLDTRK Instructions
+  //! \{
+
+  ASMJIT_INST_0x(xresldtrk, Xresldtrk)                                 // TSXLDTRK
+  ASMJIT_INST_0x(xsusldtrk, Xsusldtrk)                                 // TSXLDTRK
+
+  //! \}
+
+  //! \name CET-IBT Instructions
+  //! \{
+
+  ASMJIT_INST_0x(endbr32, Endbr32)                                     // CET_IBT
+  ASMJIT_INST_0x(endbr64, Endbr64)                                     // CET_IBT
+
+  //! \}
+
+  //! \name CET-SS Instructions
+  //! \{
+
+  ASMJIT_INST_1x(clrssbsy, Clrssbsy, Mem)                              // CET_SS
+  ASMJIT_INST_0x(setssbsy, Setssbsy)                                   // CET_SS
+
+  ASMJIT_INST_1x(rstorssp, Rstorssp, Mem)                              // CET_SS
+  ASMJIT_INST_0x(saveprevssp, Saveprevssp)                             // CET_SS
+
+  ASMJIT_INST_1x(incsspd, Incsspd, Gp)                                 // CET_SS
+  ASMJIT_INST_1x(incsspq, Incsspq, Gp)                                 // CET_SS
+  ASMJIT_INST_1x(rdsspd, Rdsspd, Gp)                                   // CET_SS
+  ASMJIT_INST_1x(rdsspq, Rdsspq, Gp)                                   // CET_SS
+  ASMJIT_INST_2x(wrssd, Wrssd, Gp, Gp)                                 // CET_SS
+  ASMJIT_INST_2x(wrssd, Wrssd, Mem, Gp)                                // CET_SS
+  ASMJIT_INST_2x(wrssq, Wrssq, Gp, Gp)                                 // CET_SS
+  ASMJIT_INST_2x(wrssq, Wrssq, Mem, Gp)                                // CET_SS
+  ASMJIT_INST_2x(wrussd, Wrussd, Gp, Gp)                               // CET_SS
+  ASMJIT_INST_2x(wrussd, Wrussd, Mem, Gp)                              // CET_SS
+  ASMJIT_INST_2x(wrussq, Wrussq, Gp, Gp)                               // CET_SS
+  ASMJIT_INST_2x(wrussq, Wrussq, Mem, Gp)                              // CET_SS
+
+  //! \}
+
+  //! \name HRESET Instructions
+  //! \{
+
+  ASMJIT_INST_2x(hreset, Hreset, Imm, Gp)                              // HRESET
+
+  //! \}
+
+  //! \name UINTR Instructions
+  //! \{
+
+  ASMJIT_INST_0x(clui, Clui)                                           // UINTR
+  ASMJIT_INST_1x(senduipi, Senduipi, Gp)                               // UINTR
+  ASMJIT_INST_0x(testui, Testui)                                       // UINTR
+  ASMJIT_INST_0x(stui, Stui)                                           // UINTR
+  ASMJIT_INST_0x(uiret, Uiret)                                         // UINTR
+
+  //! \}
+
+  //! \name Core Privileged Instructions
+  //! \{
+
+  ASMJIT_INST_0x(clts, Clts)                                           // ANY
+  ASMJIT_INST_0x(hlt, Hlt)                                             // ANY
+  ASMJIT_INST_0x(invd, Invd)                                           // ANY
+  ASMJIT_INST_1x(invlpg, Invlpg, Mem)                                  // ANY
+  ASMJIT_INST_2x(invpcid, Invpcid, Gp, Mem)                            // ANY
+  ASMJIT_INST_1x(lgdt, Lgdt, Mem)                                      // ANY
+  ASMJIT_INST_1x(lidt, Lidt, Mem)                                      // ANY
+  ASMJIT_INST_1x(lldt, Lldt, Gp)                                       // ANY
+  ASMJIT_INST_1x(lldt, Lldt, Mem)                                      // ANY
+  ASMJIT_INST_1x(lmsw, Lmsw, Gp)                                       // ANY
+  ASMJIT_INST_1x(lmsw, Lmsw, Mem)                                      // ANY
+  ASMJIT_INST_1x(ltr, Ltr, Gp)                                         // ANY
+  ASMJIT_INST_1x(ltr, Ltr, Mem)                                        // ANY
+  ASMJIT_INST_3x(rdmsr, Rdmsr, Gp_EDX, Gp_EAX, Gp_ECX)                 // MSR       [EXPLICIT] RDX:EAX <- MSR[ECX]
+  ASMJIT_INST_3x(rdpmc, Rdpmc, Gp_EDX, Gp_EAX, Gp_ECX)                 // ANY       [EXPLICIT] RDX:EAX <- PMC[ECX]
+  ASMJIT_INST_0x(swapgs, Swapgs)                                       // X64
+  ASMJIT_INST_0x(wbinvd, Wbinvd)                                       // ANY
+  ASMJIT_INST_0x(wbnoinvd, Wbnoinvd)                                   // WBNOINVD
+  ASMJIT_INST_3x(wrmsr, Wrmsr, Gp_EDX, Gp_EAX, Gp_ECX)                 // MSR       [EXPLICIT] RDX:EAX  -> MSR[ECX]
+  ASMJIT_INST_3x(xsetbv, Xsetbv, Gp_EDX, Gp_EAX, Gp_ECX)               // XSAVE     [EXPLICIT] XCR[ECX] <- EDX:EAX
+
+  //! \}
+
+  //! \name MONITOR Instructions (Privileged)
+  //! \{
+
+  ASMJIT_INST_3x(monitor, Monitor, Mem, Gp, Gp)                        // MONITOR
+  ASMJIT_INST_2x(mwait, Mwait, Gp, Gp)                                 // MONITOR
+
+  //! \}
+
+  //! \name SMAP Instructions (Privileged)
+  //! \{
+
+  ASMJIT_INST_0x(clac, Clac)                                           // SMAP
+  ASMJIT_INST_0x(stac, Stac)                                           // SMAP
+
+  //! \}
+
+  //! \name SKINIT Instructions (Privileged)
+  //! \{
+
+  ASMJIT_INST_1x(skinit, Skinit, Gp)                                   // SKINIT    [EXPLICIT] <eax>
+  ASMJIT_INST_0x(stgi, Stgi)                                           // SKINIT
+
+  //! \}
+
+  //! \name SNP Instructions (Privileged)
+  //! \{
+
+  ASMJIT_INST_0x(psmash, Psmash)                                       // SNP
+  ASMJIT_INST_0x(pvalidate, Pvalidate)                                 // SNP
+  ASMJIT_INST_0x(rmpadjust, Rmpadjust)                                 // SNP
+  ASMJIT_INST_0x(rmpupdate, Rmpupdate)                                 // SNP
+
+  //! \}
+
+  //! \name VMX Instructions (All privileged except vmfunc)
+  //! \{
+
+  ASMJIT_INST_2x(invept, Invept, Gp, Mem)                              // VMX
+  ASMJIT_INST_2x(invvpid, Invvpid, Gp, Mem)                            // VMX
+  ASMJIT_INST_0x(vmcall, Vmcall)                                       // VMX
+  ASMJIT_INST_1x(vmclear, Vmclear, Mem)                                // VMX
+  ASMJIT_INST_0x(vmfunc, Vmfunc)                                       // VMX
+  ASMJIT_INST_0x(vmlaunch, Vmlaunch)                                   // VMX
+  ASMJIT_INST_1x(vmptrld, Vmptrld, Mem)                                // VMX
+  ASMJIT_INST_1x(vmptrst, Vmptrst, Mem)                                // VMX
+  ASMJIT_INST_2x(vmread, Vmread, Gp, Gp)                               // VMX
+  ASMJIT_INST_2x(vmread, Vmread, Mem, Gp)                              // VMX
+  ASMJIT_INST_0x(vmresume, Vmresume)                                   // VMX
+  ASMJIT_INST_2x(vmwrite, Vmwrite, Gp, Mem)                            // VMX
+  ASMJIT_INST_2x(vmwrite, Vmwrite, Gp, Gp)                             // VMX
+  ASMJIT_INST_1x(vmxon, Vmxon, Mem)                                    // VMX
+
+  //! \}
+
+  //! \name SVM Instructions (All privileged except vmmcall)
+  //! \{
+
+  ASMJIT_INST_0x(clgi, Clgi)                                           // SVM
+  ASMJIT_INST_2x(invlpga, Invlpga, Gp, Gp)                             // SVM       [EXPLICIT] <eax|rax, ecx>
+  ASMJIT_INST_1x(vmload, Vmload, Gp)                                   // SVM       [EXPLICIT] <zax>
+  ASMJIT_INST_0x(vmmcall, Vmmcall)                                     // SVM
+  ASMJIT_INST_1x(vmrun, Vmrun, Gp)                                     // SVM       [EXPLICIT] <zax>
+  ASMJIT_INST_1x(vmsave, Vmsave, Gp)                                   // SVM       [EXPLICIT] <zax>
+
+  //! \}
+
+  //! \name FPU Instructions
+  //! \{
+
+  ASMJIT_INST_0x(f2xm1, F2xm1)                                         // FPU
+  ASMJIT_INST_0x(fabs, Fabs)                                           // FPU
+  ASMJIT_INST_2x(fadd, Fadd, St, St)                                   // FPU
+  ASMJIT_INST_1x(fadd, Fadd, Mem)                                      // FPU
+  ASMJIT_INST_1x(faddp, Faddp, St)                                     // FPU
+  ASMJIT_INST_0x(faddp, Faddp)                                         // FPU
+  ASMJIT_INST_1x(fbld, Fbld, Mem)                                      // FPU
+  ASMJIT_INST_1x(fbstp, Fbstp, Mem)                                    // FPU
+  ASMJIT_INST_0x(fchs, Fchs)                                           // FPU
+  ASMJIT_INST_0x(fclex, Fclex)                                         // FPU
+  ASMJIT_INST_1x(fcmovb, Fcmovb, St)                                   // FPU
+  ASMJIT_INST_1x(fcmovbe, Fcmovbe, St)                                 // FPU
+  ASMJIT_INST_1x(fcmove, Fcmove, St)                                   // FPU
+  ASMJIT_INST_1x(fcmovnb, Fcmovnb, St)                                 // FPU
+  ASMJIT_INST_1x(fcmovnbe, Fcmovnbe, St)                               // FPU
+  ASMJIT_INST_1x(fcmovne, Fcmovne, St)                                 // FPU
+  ASMJIT_INST_1x(fcmovnu, Fcmovnu, St)                                 // FPU
+  ASMJIT_INST_1x(fcmovu, Fcmovu, St)                                   // FPU
+  ASMJIT_INST_1x(fcom, Fcom, St)                                       // FPU
+  ASMJIT_INST_0x(fcom, Fcom)                                           // FPU
+  ASMJIT_INST_1x(fcom, Fcom, Mem)                                      // FPU
+  ASMJIT_INST_1x(fcomp, Fcomp, St)                                     // FPU
+  ASMJIT_INST_0x(fcomp, Fcomp)                                         // FPU
+  ASMJIT_INST_1x(fcomp, Fcomp, Mem)                                    // FPU
+  ASMJIT_INST_0x(fcompp, Fcompp)                                       // FPU
+  ASMJIT_INST_1x(fcomi, Fcomi, St)                                     // FPU
+  ASMJIT_INST_1x(fcomip, Fcomip, St)                                   // FPU
+  ASMJIT_INST_0x(fcos, Fcos)                                           // FPU
+  ASMJIT_INST_0x(fdecstp, Fdecstp)                                     // FPU
+  ASMJIT_INST_2x(fdiv, Fdiv, St, St)                                   // FPU
+  ASMJIT_INST_1x(fdiv, Fdiv, Mem)                                      // FPU
+  ASMJIT_INST_1x(fdivp, Fdivp, St)                                     // FPU
+  ASMJIT_INST_0x(fdivp, Fdivp)                                         // FPU
+  ASMJIT_INST_2x(fdivr, Fdivr, St, St)                                 // FPU
+  ASMJIT_INST_1x(fdivr, Fdivr, Mem)                                    // FPU
+  ASMJIT_INST_1x(fdivrp, Fdivrp, St)                                   // FPU
+  ASMJIT_INST_0x(fdivrp, Fdivrp)                                       // FPU
+  ASMJIT_INST_1x(ffree, Ffree, St)                                     // FPU
+  ASMJIT_INST_1x(fiadd, Fiadd, Mem)                                    // FPU
+  ASMJIT_INST_1x(ficom, Ficom, Mem)                                    // FPU
+  ASMJIT_INST_1x(ficomp, Ficomp, Mem)                                  // FPU
+  ASMJIT_INST_1x(fidiv, Fidiv, Mem)                                    // FPU
+  ASMJIT_INST_1x(fidivr, Fidivr, Mem)                                  // FPU
+  ASMJIT_INST_1x(fild, Fild, Mem)                                      // FPU
+  ASMJIT_INST_1x(fimul, Fimul, Mem)                                    // FPU
+  ASMJIT_INST_0x(fincstp, Fincstp)                                     // FPU
+  ASMJIT_INST_0x(finit, Finit)                                         // FPU
+  ASMJIT_INST_1x(fisub, Fisub, Mem)                                    // FPU
+  ASMJIT_INST_1x(fisubr, Fisubr, Mem)                                  // FPU
+  ASMJIT_INST_0x(fninit, Fninit)                                       // FPU
+  ASMJIT_INST_1x(fist, Fist, Mem)                                      // FPU
+  ASMJIT_INST_1x(fistp, Fistp, Mem)                                    // FPU
+  ASMJIT_INST_1x(fisttp, Fisttp, Mem)                                  // FPU+SSE3
+  ASMJIT_INST_1x(fld, Fld, Mem)                                        // FPU
+  ASMJIT_INST_1x(fld, Fld, St)                                         // FPU
+  ASMJIT_INST_0x(fld1, Fld1)                                           // FPU
+  ASMJIT_INST_0x(fldl2t, Fldl2t)                                       // FPU
+  ASMJIT_INST_0x(fldl2e, Fldl2e)                                       // FPU
+  ASMJIT_INST_0x(fldpi, Fldpi)                                         // FPU
+  ASMJIT_INST_0x(fldlg2, Fldlg2)                                       // FPU
+  ASMJIT_INST_0x(fldln2, Fldln2)                                       // FPU
+  ASMJIT_INST_0x(fldz, Fldz)                                           // FPU
+  ASMJIT_INST_1x(fldcw, Fldcw, Mem)                                    // FPU
+  ASMJIT_INST_1x(fldenv, Fldenv, Mem)                                  // FPU
+  ASMJIT_INST_2x(fmul, Fmul, St, St)                                   // FPU
+  ASMJIT_INST_1x(fmul, Fmul, Mem)                                      // FPU
+  ASMJIT_INST_1x(fmulp, Fmulp, St)                                     // FPU
+  ASMJIT_INST_0x(fmulp, Fmulp)                                         // FPU
+  ASMJIT_INST_0x(fnclex, Fnclex)                                       // FPU
+  ASMJIT_INST_0x(fnop, Fnop)                                           // FPU
+  ASMJIT_INST_1x(fnsave, Fnsave, Mem)                                  // FPU
+  ASMJIT_INST_1x(fnstenv, Fnstenv, Mem)                                // FPU
+  ASMJIT_INST_1x(fnstcw, Fnstcw, Mem)                                  // FPU
+  ASMJIT_INST_0x(fpatan, Fpatan)                                       // FPU
+  ASMJIT_INST_0x(fprem, Fprem)                                         // FPU
+  ASMJIT_INST_0x(fprem1, Fprem1)                                       // FPU
+  ASMJIT_INST_0x(fptan, Fptan)                                         // FPU
+  ASMJIT_INST_0x(frndint, Frndint)                                     // FPU
+  ASMJIT_INST_1x(frstor, Frstor, Mem)                                  // FPU
+  ASMJIT_INST_1x(fsave, Fsave, Mem)                                    // FPU
+  ASMJIT_INST_0x(fscale, Fscale)                                       // FPU
+  ASMJIT_INST_0x(fsin, Fsin)                                           // FPU
+  ASMJIT_INST_0x(fsincos, Fsincos)                                     // FPU
+  ASMJIT_INST_0x(fsqrt, Fsqrt)                                         // FPU
+  ASMJIT_INST_1x(fst, Fst, Mem)                                        // FPU
+  ASMJIT_INST_1x(fst, Fst, St)                                         // FPU
+  ASMJIT_INST_1x(fstp, Fstp, Mem)                                      // FPU
+  ASMJIT_INST_1x(fstp, Fstp, St)                                       // FPU
+  ASMJIT_INST_1x(fstcw, Fstcw, Mem)                                    // FPU
+  ASMJIT_INST_1x(fstenv, Fstenv, Mem)                                  // FPU
+  ASMJIT_INST_2x(fsub, Fsub, St, St)                                   // FPU
+  ASMJIT_INST_1x(fsub, Fsub, Mem)                                      // FPU
+  ASMJIT_INST_1x(fsubp, Fsubp, St)                                     // FPU
+  ASMJIT_INST_0x(fsubp, Fsubp)                                         // FPU
+  ASMJIT_INST_2x(fsubr, Fsubr, St, St)                                 // FPU
+  ASMJIT_INST_1x(fsubr, Fsubr, Mem)                                    // FPU
+  ASMJIT_INST_1x(fsubrp, Fsubrp, St)                                   // FPU
+  ASMJIT_INST_0x(fsubrp, Fsubrp)                                       // FPU
+  ASMJIT_INST_0x(ftst, Ftst)                                           // FPU
+  ASMJIT_INST_1x(fucom, Fucom, St)                                     // FPU
+  ASMJIT_INST_0x(fucom, Fucom)                                         // FPU
+  ASMJIT_INST_1x(fucomi, Fucomi, St)                                   // FPU
+  ASMJIT_INST_1x(fucomip, Fucomip, St)                                 // FPU
+  ASMJIT_INST_1x(fucomp, Fucomp, St)                                   // FPU
+  ASMJIT_INST_0x(fucomp, Fucomp)                                       // FPU
+  ASMJIT_INST_0x(fucompp, Fucompp)                                     // FPU
+  ASMJIT_INST_0x(fwait, Fwait)                                         // FPU
+  ASMJIT_INST_0x(fxam, Fxam)                                           // FPU
+  ASMJIT_INST_1x(fxch, Fxch, St)                                       // FPU
+  ASMJIT_INST_0x(fxtract, Fxtract)                                     // FPU
+  ASMJIT_INST_0x(fyl2x, Fyl2x)                                         // FPU
+  ASMJIT_INST_0x(fyl2xp1, Fyl2xp1)                                     // FPU
+  ASMJIT_INST_1x(fstsw, Fstsw, Gp)                                     // FPU
+  ASMJIT_INST_1x(fstsw, Fstsw, Mem)                                    // FPU
+  ASMJIT_INST_1x(fnstsw, Fnstsw, Gp)                                   // FPU
+  ASMJIT_INST_1x(fnstsw, Fnstsw, Mem)                                  // FPU
+
+  //! \}
+
+  //! \name MMX & SSE+ Instructions
+  //! \{
+
+  ASMJIT_INST_2x(addpd, Addpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(addpd, Addpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(addps, Addps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(addps, Addps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(addsd, Addsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(addsd, Addsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(addss, Addss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(addss, Addss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(addsubpd, Addsubpd, Xmm, Xmm)                         // SSE3
+  ASMJIT_INST_2x(addsubpd, Addsubpd, Xmm, Mem)                         // SSE3
+  ASMJIT_INST_2x(addsubps, Addsubps, Xmm, Xmm)                         // SSE3
+  ASMJIT_INST_2x(addsubps, Addsubps, Xmm, Mem)                         // SSE3
+  ASMJIT_INST_2x(andnpd, Andnpd, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(andnpd, Andnpd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(andnps, Andnps, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(andnps, Andnps, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(andpd, Andpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(andpd, Andpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(andps, Andps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(andps, Andps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_3x(blendpd, Blendpd, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(blendpd, Blendpd, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(blendps, Blendps, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(blendps, Blendps, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(blendvpd, Blendvpd, Xmm, Xmm, XMM0)                   // SSE4_1 [EXPLICIT]
+  ASMJIT_INST_3x(blendvpd, Blendvpd, Xmm, Mem, XMM0)                   // SSE4_1 [EXPLICIT]
+  ASMJIT_INST_3x(blendvps, Blendvps, Xmm, Xmm, XMM0)                   // SSE4_1 [EXPLICIT]
+  ASMJIT_INST_3x(blendvps, Blendvps, Xmm, Mem, XMM0)                   // SSE4_1 [EXPLICIT]
+  ASMJIT_INST_3x(cmppd, Cmppd, Xmm, Xmm, Imm)                          // SSE2
+  ASMJIT_INST_3x(cmppd, Cmppd, Xmm, Mem, Imm)                          // SSE2
+  ASMJIT_INST_3x(cmpps, Cmpps, Xmm, Xmm, Imm)                          // SSE
+  ASMJIT_INST_3x(cmpps, Cmpps, Xmm, Mem, Imm)                          // SSE
+  ASMJIT_INST_3x(cmpsd, Cmpsd, Xmm, Xmm, Imm)                          // SSE2
+  ASMJIT_INST_3x(cmpsd, Cmpsd, Xmm, Mem, Imm)                          // SSE2
+  ASMJIT_INST_3x(cmpss, Cmpss, Xmm, Xmm, Imm)                          // SSE
+  ASMJIT_INST_3x(cmpss, Cmpss, Xmm, Mem, Imm)                          // SSE
+  ASMJIT_INST_2x(comisd, Comisd, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(comisd, Comisd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(comiss, Comiss, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(comiss, Comiss, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(cvtdq2pd, Cvtdq2pd, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtdq2pd, Cvtdq2pd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtdq2ps, Cvtdq2ps, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtdq2ps, Cvtdq2ps, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtpd2dq, Cvtpd2dq, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtpd2dq, Cvtpd2dq, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtpd2pi, Cvtpd2pi, Mm, Xmm)                          // SSE2
+  ASMJIT_INST_2x(cvtpd2pi, Cvtpd2pi, Mm, Mem)                          // SSE2
+  ASMJIT_INST_2x(cvtpd2ps, Cvtpd2ps, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtpd2ps, Cvtpd2ps, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtpi2pd, Cvtpi2pd, Xmm, Mm)                          // SSE2
+  ASMJIT_INST_2x(cvtpi2pd, Cvtpi2pd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtpi2ps, Cvtpi2ps, Xmm, Mm)                          // SSE
+  ASMJIT_INST_2x(cvtpi2ps, Cvtpi2ps, Xmm, Mem)                         // SSE
+  ASMJIT_INST_2x(cvtps2dq, Cvtps2dq, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtps2dq, Cvtps2dq, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtps2pd, Cvtps2pd, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtps2pd, Cvtps2pd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtps2pi, Cvtps2pi, Mm, Xmm)                          // SSE
+  ASMJIT_INST_2x(cvtps2pi, Cvtps2pi, Mm, Mem)                          // SSE
+  ASMJIT_INST_2x(cvtsd2si, Cvtsd2si, Gp, Xmm)                          // SSE2
+  ASMJIT_INST_2x(cvtsd2si, Cvtsd2si, Gp, Mem)                          // SSE2
+  ASMJIT_INST_2x(cvtsd2ss, Cvtsd2ss, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtsd2ss, Cvtsd2ss, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtsi2sd, Cvtsi2sd, Xmm, Gp)                          // SSE2
+  ASMJIT_INST_2x(cvtsi2sd, Cvtsi2sd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtsi2ss, Cvtsi2ss, Xmm, Gp)                          // SSE
+  ASMJIT_INST_2x(cvtsi2ss, Cvtsi2ss, Xmm, Mem)                         // SSE
+  ASMJIT_INST_2x(cvtss2sd, Cvtss2sd, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(cvtss2sd, Cvtss2sd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(cvtss2si, Cvtss2si, Gp, Xmm)                          // SSE
+  ASMJIT_INST_2x(cvtss2si, Cvtss2si, Gp, Mem)                          // SSE
+  ASMJIT_INST_2x(cvttpd2pi, Cvttpd2pi, Mm, Xmm)                        // SSE2
+  ASMJIT_INST_2x(cvttpd2pi, Cvttpd2pi, Mm, Mem)                        // SSE2
+  ASMJIT_INST_2x(cvttpd2dq, Cvttpd2dq, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(cvttpd2dq, Cvttpd2dq, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(cvttps2dq, Cvttps2dq, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(cvttps2dq, Cvttps2dq, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(cvttps2pi, Cvttps2pi, Mm, Xmm)                        // SSE
+  ASMJIT_INST_2x(cvttps2pi, Cvttps2pi, Mm, Mem)                        // SSE
+  ASMJIT_INST_2x(cvttsd2si, Cvttsd2si, Gp, Xmm)                        // SSE2
+  ASMJIT_INST_2x(cvttsd2si, Cvttsd2si, Gp, Mem)                        // SSE2
+  ASMJIT_INST_2x(cvttss2si, Cvttss2si, Gp, Xmm)                        // SSE
+  ASMJIT_INST_2x(cvttss2si, Cvttss2si, Gp, Mem)                        // SSE
+  ASMJIT_INST_2x(divpd, Divpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(divpd, Divpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(divps, Divps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(divps, Divps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(divsd, Divsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(divsd, Divsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(divss, Divss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(divss, Divss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_3x(dppd, Dppd, Xmm, Xmm, Imm)                            // SSE4_1
+  ASMJIT_INST_3x(dppd, Dppd, Xmm, Mem, Imm)                            // SSE4_1
+  ASMJIT_INST_3x(dpps, Dpps, Xmm, Xmm, Imm)                            // SSE4_1
+  ASMJIT_INST_3x(dpps, Dpps, Xmm, Mem, Imm)                            // SSE4_1
+  ASMJIT_INST_3x(extractps, Extractps, Gp, Xmm, Imm)                   // SSE4_1
+  ASMJIT_INST_3x(extractps, Extractps, Mem, Xmm, Imm)                  // SSE4_1
+  ASMJIT_INST_2x(extrq, Extrq, Xmm, Xmm)                               // SSE4A
+  ASMJIT_INST_3x(extrq, Extrq, Xmm, Imm, Imm)                          // SSE4A
+  ASMJIT_INST_2x(haddpd, Haddpd, Xmm, Xmm)                             // SSE3
+  ASMJIT_INST_2x(haddpd, Haddpd, Xmm, Mem)                             // SSE3
+  ASMJIT_INST_2x(haddps, Haddps, Xmm, Xmm)                             // SSE3
+  ASMJIT_INST_2x(haddps, Haddps, Xmm, Mem)                             // SSE3
+  ASMJIT_INST_2x(hsubpd, Hsubpd, Xmm, Xmm)                             // SSE3
+  ASMJIT_INST_2x(hsubpd, Hsubpd, Xmm, Mem)                             // SSE3
+  ASMJIT_INST_2x(hsubps, Hsubps, Xmm, Xmm)                             // SSE3
+  ASMJIT_INST_2x(hsubps, Hsubps, Xmm, Mem)                             // SSE3
+  ASMJIT_INST_3x(insertps, Insertps, Xmm, Xmm, Imm)                    // SSE4_1
+  ASMJIT_INST_3x(insertps, Insertps, Xmm, Mem, Imm)                    // SSE4_1
+  ASMJIT_INST_2x(insertq, Insertq, Xmm, Xmm)                           // SSE4A
+  ASMJIT_INST_4x(insertq, Insertq, Xmm, Xmm, Imm, Imm)                 // SSE4A
+  ASMJIT_INST_2x(lddqu, Lddqu, Xmm, Mem)                               // SSE3
+  ASMJIT_INST_3x(maskmovq, Maskmovq, Mm, Mm, DS_ZDI)                   // SSE  [EXPLICIT]
+  ASMJIT_INST_3x(maskmovdqu, Maskmovdqu, Xmm, Xmm, DS_ZDI)             // SSE2 [EXPLICIT]
+  ASMJIT_INST_2x(maxpd, Maxpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(maxpd, Maxpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(maxps, Maxps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(maxps, Maxps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(maxsd, Maxsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(maxsd, Maxsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(maxss, Maxss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(maxss, Maxss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(minpd, Minpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(minpd, Minpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(minps, Minps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(minps, Minps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(minsd, Minsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(minsd, Minsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(minss, Minss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(minss, Minss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(movapd, Movapd, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movapd, Movapd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(movapd, Movapd, Mem, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movaps, Movaps, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(movaps, Movaps, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(movaps, Movaps, Mem, Xmm)                             // SSE
+  ASMJIT_INST_2x(movd, Movd, Mem, Mm)                                  // MMX
+  ASMJIT_INST_2x(movd, Movd, Mem, Xmm)                                 // SSE
+  ASMJIT_INST_2x(movd, Movd, Gp, Mm)                                   // MMX
+  ASMJIT_INST_2x(movd, Movd, Gp, Xmm)                                  // SSE
+  ASMJIT_INST_2x(movd, Movd, Mm, Mem)                                  // MMX
+  ASMJIT_INST_2x(movd, Movd, Xmm, Mem)                                 // SSE
+  ASMJIT_INST_2x(movd, Movd, Mm, Gp)                                   // MMX
+  ASMJIT_INST_2x(movd, Movd, Xmm, Gp)                                  // SSE
+  ASMJIT_INST_2x(movddup, Movddup, Xmm, Xmm)                           // SSE3
+  ASMJIT_INST_2x(movddup, Movddup, Xmm, Mem)                           // SSE3
+  ASMJIT_INST_2x(movdq2q, Movdq2q, Mm, Xmm)                            // SSE2
+  ASMJIT_INST_2x(movdqa, Movdqa, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movdqa, Movdqa, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(movdqa, Movdqa, Mem, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movdqu, Movdqu, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movdqu, Movdqu, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(movdqu, Movdqu, Mem, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movhlps, Movhlps, Xmm, Xmm)                           // SSE
+  ASMJIT_INST_2x(movhpd, Movhpd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(movhpd, Movhpd, Mem, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movhps, Movhps, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(movhps, Movhps, Mem, Xmm)                             // SSE
+  ASMJIT_INST_2x(movlhps, Movlhps, Xmm, Xmm)                           // SSE
+  ASMJIT_INST_2x(movlpd, Movlpd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(movlpd, Movlpd, Mem, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movlps, Movlps, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(movlps, Movlps, Mem, Xmm)                             // SSE
+  ASMJIT_INST_2x(movmskps, Movmskps, Gp, Xmm)                          // SSE2
+  ASMJIT_INST_2x(movmskpd, Movmskpd, Gp, Xmm)                          // SSE2
+  ASMJIT_INST_2x(movntdq, Movntdq, Mem, Xmm)                           // SSE2
+  ASMJIT_INST_2x(movntdqa, Movntdqa, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(movntpd, Movntpd, Mem, Xmm)                           // SSE2
+  ASMJIT_INST_2x(movntps, Movntps, Mem, Xmm)                           // SSE
+  ASMJIT_INST_2x(movntsd, Movntsd, Mem, Xmm)                           // SSE4A
+  ASMJIT_INST_2x(movntss, Movntss, Mem, Xmm)                           // SSE4A
+  ASMJIT_INST_2x(movntq, Movntq, Mem, Mm)                              // SSE
+  ASMJIT_INST_2x(movq, Movq, Mm, Mm)                                   // MMX
+  ASMJIT_INST_2x(movq, Movq, Xmm, Xmm)                                 // SSE
+  ASMJIT_INST_2x(movq, Movq, Mem, Mm)                                  // MMX
+  ASMJIT_INST_2x(movq, Movq, Mem, Xmm)                                 // SSE
+  ASMJIT_INST_2x(movq, Movq, Mm, Mem)                                  // MMX
+  ASMJIT_INST_2x(movq, Movq, Xmm, Mem)                                 // SSE
+  ASMJIT_INST_2x(movq, Movq, Gp, Mm)                                   // MMX
+  ASMJIT_INST_2x(movq, Movq, Gp, Xmm)                                  // SSE+X64.
+  ASMJIT_INST_2x(movq, Movq, Mm, Gp)                                   // MMX
+  ASMJIT_INST_2x(movq, Movq, Xmm, Gp)                                  // SSE+X64.
+  ASMJIT_INST_2x(movq2dq, Movq2dq, Xmm, Mm)                            // SSE2
+  ASMJIT_INST_2x(movsd, Movsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(movsd, Movsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(movsd, Movsd, Mem, Xmm)                               // SSE2
+  ASMJIT_INST_2x(movshdup, Movshdup, Xmm, Xmm)                         // SSE3
+  ASMJIT_INST_2x(movshdup, Movshdup, Xmm, Mem)                         // SSE3
+  ASMJIT_INST_2x(movsldup, Movsldup, Xmm, Xmm)                         // SSE3
+  ASMJIT_INST_2x(movsldup, Movsldup, Xmm, Mem)                         // SSE3
+  ASMJIT_INST_2x(movss, Movss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(movss, Movss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(movss, Movss, Mem, Xmm)                               // SSE
+  ASMJIT_INST_2x(movupd, Movupd, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movupd, Movupd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(movupd, Movupd, Mem, Xmm)                             // SSE2
+  ASMJIT_INST_2x(movups, Movups, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(movups, Movups, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(movups, Movups, Mem, Xmm)                             // SSE
+  ASMJIT_INST_3x(mpsadbw, Mpsadbw, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(mpsadbw, Mpsadbw, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_2x(mulpd, Mulpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(mulpd, Mulpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(mulps, Mulps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(mulps, Mulps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(mulsd, Mulsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(mulsd, Mulsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(mulss, Mulss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(mulss, Mulss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(orpd, Orpd, Xmm, Xmm)                                 // SSE2
+  ASMJIT_INST_2x(orpd, Orpd, Xmm, Mem)                                 // SSE2
+  ASMJIT_INST_2x(orps, Orps, Xmm, Xmm)                                 // SSE
+  ASMJIT_INST_2x(orps, Orps, Xmm, Mem)                                 // SSE
+  ASMJIT_INST_2x(packssdw, Packssdw, Mm, Mm)                           // MMX
+  ASMJIT_INST_2x(packssdw, Packssdw, Mm, Mem)                          // MMX
+  ASMJIT_INST_2x(packssdw, Packssdw, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(packssdw, Packssdw, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(packsswb, Packsswb, Mm, Mm)                           // MMX
+  ASMJIT_INST_2x(packsswb, Packsswb, Mm, Mem)                          // MMX
+  ASMJIT_INST_2x(packsswb, Packsswb, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(packsswb, Packsswb, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(packusdw, Packusdw, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(packusdw, Packusdw, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(packuswb, Packuswb, Mm, Mm)                           // MMX
+  ASMJIT_INST_2x(packuswb, Packuswb, Mm, Mem)                          // MMX
+  ASMJIT_INST_2x(packuswb, Packuswb, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(packuswb, Packuswb, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(pabsb, Pabsb, Mm, Mm)                                 // SSSE3
+  ASMJIT_INST_2x(pabsb, Pabsb, Mm, Mem)                                // SSSE3
+  ASMJIT_INST_2x(pabsb, Pabsb, Xmm, Xmm)                               // SSSE3
+  ASMJIT_INST_2x(pabsb, Pabsb, Xmm, Mem)                               // SSSE3
+  ASMJIT_INST_2x(pabsd, Pabsd, Mm, Mm)                                 // SSSE3
+  ASMJIT_INST_2x(pabsd, Pabsd, Mm, Mem)                                // SSSE3
+  ASMJIT_INST_2x(pabsd, Pabsd, Xmm, Xmm)                               // SSSE3
+  ASMJIT_INST_2x(pabsd, Pabsd, Xmm, Mem)                               // SSSE3
+  ASMJIT_INST_2x(pabsw, Pabsw, Mm, Mm)                                 // SSSE3
+  ASMJIT_INST_2x(pabsw, Pabsw, Mm, Mem)                                // SSSE3
+  ASMJIT_INST_2x(pabsw, Pabsw, Xmm, Xmm)                               // SSSE3
+  ASMJIT_INST_2x(pabsw, Pabsw, Xmm, Mem)                               // SSSE3
+  ASMJIT_INST_2x(paddb, Paddb, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(paddb, Paddb, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(paddb, Paddb, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(paddb, Paddb, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(paddd, Paddd, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(paddd, Paddd, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(paddd, Paddd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(paddd, Paddd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(paddq, Paddq, Mm, Mm)                                 // SSE2
+  ASMJIT_INST_2x(paddq, Paddq, Mm, Mem)                                // SSE2
+  ASMJIT_INST_2x(paddq, Paddq, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(paddq, Paddq, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(paddsb, Paddsb, Mm, Mm)                               // MMX
+  ASMJIT_INST_2x(paddsb, Paddsb, Mm, Mem)                              // MMX
+  ASMJIT_INST_2x(paddsb, Paddsb, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(paddsb, Paddsb, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(paddsw, Paddsw, Mm, Mm)                               // MMX
+  ASMJIT_INST_2x(paddsw, Paddsw, Mm, Mem)                              // MMX
+  ASMJIT_INST_2x(paddsw, Paddsw, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(paddsw, Paddsw, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(paddusb, Paddusb, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(paddusb, Paddusb, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(paddusb, Paddusb, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(paddusb, Paddusb, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(paddusw, Paddusw, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(paddusw, Paddusw, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(paddusw, Paddusw, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(paddusw, Paddusw, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(paddw, Paddw, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(paddw, Paddw, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(paddw, Paddw, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(paddw, Paddw, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_3x(palignr, Palignr, Mm, Mm, Imm)                        // SSSE3
+  ASMJIT_INST_3x(palignr, Palignr, Mm, Mem, Imm)                       // SSSE3
+  ASMJIT_INST_3x(palignr, Palignr, Xmm, Xmm, Imm)                      // SSSE3
+  ASMJIT_INST_3x(palignr, Palignr, Xmm, Mem, Imm)                      // SSSE3
+  ASMJIT_INST_2x(pand, Pand, Mm, Mm)                                   // MMX
+  ASMJIT_INST_2x(pand, Pand, Mm, Mem)                                  // MMX
+  ASMJIT_INST_2x(pand, Pand, Xmm, Xmm)                                 // SSE2
+  ASMJIT_INST_2x(pand, Pand, Xmm, Mem)                                 // SSE2
+  ASMJIT_INST_2x(pandn, Pandn, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(pandn, Pandn, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(pandn, Pandn, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(pandn, Pandn, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(pavgb, Pavgb, Mm, Mm)                                 // SSE
+  ASMJIT_INST_2x(pavgb, Pavgb, Mm, Mem)                                // SSE
+  ASMJIT_INST_2x(pavgb, Pavgb, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(pavgb, Pavgb, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(pavgw, Pavgw, Mm, Mm)                                 // SSE
+  ASMJIT_INST_2x(pavgw, Pavgw, Mm, Mem)                                // SSE
+  ASMJIT_INST_2x(pavgw, Pavgw, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(pavgw, Pavgw, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_3x(pblendvb, Pblendvb, Xmm, Xmm, XMM0)                   // SSE4_1 [EXPLICIT]
+  ASMJIT_INST_3x(pblendvb, Pblendvb, Xmm, Mem, XMM0)                   // SSE4_1 [EXPLICIT]
+  ASMJIT_INST_3x(pblendw, Pblendw, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(pblendw, Pblendw, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(pclmulqdq, Pclmulqdq, Xmm, Xmm, Imm)                  // PCLMULQDQ.
+  ASMJIT_INST_3x(pclmulqdq, Pclmulqdq, Xmm, Mem, Imm)                  // PCLMULQDQ.
+  ASMJIT_INST_6x(pcmpestri, Pcmpestri, Xmm, Xmm, Imm, Gp_ECX, Gp_EAX, Gp_EDX) // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_6x(pcmpestri, Pcmpestri, Xmm, Mem, Imm, Gp_ECX, Gp_EAX, Gp_EDX) // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_6x(pcmpestrm, Pcmpestrm, Xmm, Xmm, Imm, XMM0, Gp_EAX, Gp_EDX)   // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_6x(pcmpestrm, Pcmpestrm, Xmm, Mem, Imm, XMM0, Gp_EAX, Gp_EDX)   // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pcmpeqd, Pcmpeqd, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pcmpeqd, Pcmpeqd, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pcmpeqd, Pcmpeqd, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pcmpeqd, Pcmpeqd, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pcmpeqq, Pcmpeqq, Xmm, Xmm)                           // SSE4_1
+  ASMJIT_INST_2x(pcmpeqq, Pcmpeqq, Xmm, Mem)                           // SSE4_1
+  ASMJIT_INST_2x(pcmpeqw, Pcmpeqw, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pcmpeqw, Pcmpeqw, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pcmpeqw, Pcmpeqw, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pcmpeqw, Pcmpeqw, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pcmpgtb, Pcmpgtb, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pcmpgtb, Pcmpgtb, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pcmpgtb, Pcmpgtb, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pcmpgtb, Pcmpgtb, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pcmpgtd, Pcmpgtd, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pcmpgtd, Pcmpgtd, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pcmpgtd, Pcmpgtd, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pcmpgtd, Pcmpgtd, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pcmpgtq, Pcmpgtq, Xmm, Xmm)                           // SSE4_2.
+  ASMJIT_INST_2x(pcmpgtq, Pcmpgtq, Xmm, Mem)                           // SSE4_2.
+  ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_4x(pcmpistri, Pcmpistri, Xmm, Xmm, Imm, Gp_ECX)          // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_4x(pcmpistri, Pcmpistri, Xmm, Mem, Imm, Gp_ECX)          // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_4x(pcmpistrm, Pcmpistrm, Xmm, Xmm, Imm, XMM0)            // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_4x(pcmpistrm, Pcmpistrm, Xmm, Mem, Imm, XMM0)            // SSE4_2 [EXPLICIT]
+  ASMJIT_INST_3x(pextrb, Pextrb, Gp, Xmm, Imm)                         // SSE4_1
+  ASMJIT_INST_3x(pextrb, Pextrb, Mem, Xmm, Imm)                        // SSE4_1
+  ASMJIT_INST_3x(pextrd, Pextrd, Gp, Xmm, Imm)                         // SSE4_1
+  ASMJIT_INST_3x(pextrd, Pextrd, Mem, Xmm, Imm)                        // SSE4_1
+  ASMJIT_INST_3x(pextrq, Pextrq, Gp, Xmm, Imm)                         // SSE4_1
+  ASMJIT_INST_3x(pextrq, Pextrq, Mem, Xmm, Imm)                        // SSE4_1
+  ASMJIT_INST_3x(pextrw, Pextrw, Gp, Mm, Imm)                          // SSE
+  ASMJIT_INST_3x(pextrw, Pextrw, Gp, Xmm, Imm)                         // SSE2
+  ASMJIT_INST_3x(pextrw, Pextrw, Mem, Xmm, Imm)                        // SSE4_1
+  ASMJIT_INST_2x(phaddd, Phaddd, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(phaddd, Phaddd, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(phaddd, Phaddd, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(phaddd, Phaddd, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_2x(phaddsw, Phaddsw, Mm, Mm)                             // SSSE3
+  ASMJIT_INST_2x(phaddsw, Phaddsw, Mm, Mem)                            // SSSE3
+  ASMJIT_INST_2x(phaddsw, Phaddsw, Xmm, Xmm)                           // SSSE3
+  ASMJIT_INST_2x(phaddsw, Phaddsw, Xmm, Mem)                           // SSSE3
+  ASMJIT_INST_2x(phaddw, Phaddw, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(phaddw, Phaddw, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(phaddw, Phaddw, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(phaddw, Phaddw, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_2x(phminposuw, Phminposuw, Xmm, Xmm)                     // SSE4_1
+  ASMJIT_INST_2x(phminposuw, Phminposuw, Xmm, Mem)                     // SSE4_1
+  ASMJIT_INST_2x(phsubd, Phsubd, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(phsubd, Phsubd, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(phsubd, Phsubd, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(phsubd, Phsubd, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_2x(phsubsw, Phsubsw, Mm, Mm)                             // SSSE3
+  ASMJIT_INST_2x(phsubsw, Phsubsw, Mm, Mem)                            // SSSE3
+  ASMJIT_INST_2x(phsubsw, Phsubsw, Xmm, Xmm)                           // SSSE3
+  ASMJIT_INST_2x(phsubsw, Phsubsw, Xmm, Mem)                           // SSSE3
+  ASMJIT_INST_2x(phsubw, Phsubw, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(phsubw, Phsubw, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(phsubw, Phsubw, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(phsubw, Phsubw, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_3x(pinsrb, Pinsrb, Xmm, Gp, Imm)                         // SSE4_1
+  ASMJIT_INST_3x(pinsrb, Pinsrb, Xmm, Mem, Imm)                        // SSE4_1
+  ASMJIT_INST_3x(pinsrd, Pinsrd, Xmm, Gp, Imm)                         // SSE4_1
+  ASMJIT_INST_3x(pinsrd, Pinsrd, Xmm, Mem, Imm)                        // SSE4_1
+  ASMJIT_INST_3x(pinsrq, Pinsrq, Xmm, Gp, Imm)                         // SSE4_1
+  ASMJIT_INST_3x(pinsrq, Pinsrq, Xmm, Mem, Imm)                        // SSE4_1
+  ASMJIT_INST_3x(pinsrw, Pinsrw, Mm, Gp, Imm)                          // SSE
+  ASMJIT_INST_3x(pinsrw, Pinsrw, Mm, Mem, Imm)                         // SSE
+  ASMJIT_INST_3x(pinsrw, Pinsrw, Xmm, Gp, Imm)                         // SSE2
+  ASMJIT_INST_3x(pinsrw, Pinsrw, Xmm, Mem, Imm)                        // SSE2
+  ASMJIT_INST_2x(pmaddubsw, Pmaddubsw, Mm, Mm)                         // SSSE3
+  ASMJIT_INST_2x(pmaddubsw, Pmaddubsw, Mm, Mem)                        // SSSE3
+  ASMJIT_INST_2x(pmaddubsw, Pmaddubsw, Xmm, Xmm)                       // SSSE3
+  ASMJIT_INST_2x(pmaddubsw, Pmaddubsw, Xmm, Mem)                       // SSSE3
+  ASMJIT_INST_2x(pmaddwd, Pmaddwd, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(pmaddwd, Pmaddwd, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(pmaddwd, Pmaddwd, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pmaddwd, Pmaddwd, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pmaxsb, Pmaxsb, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxsb, Pmaxsb, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxsd, Pmaxsd, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxsd, Pmaxsd, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxsw, Pmaxsw, Mm, Mm)                               // SSE
+  ASMJIT_INST_2x(pmaxsw, Pmaxsw, Mm, Mem)                              // SSE
+  ASMJIT_INST_2x(pmaxsw, Pmaxsw, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(pmaxsw, Pmaxsw, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(pmaxub, Pmaxub, Mm, Mm)                               // SSE
+  ASMJIT_INST_2x(pmaxub, Pmaxub, Mm, Mem)                              // SSE
+  ASMJIT_INST_2x(pmaxub, Pmaxub, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(pmaxub, Pmaxub, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(pmaxud, Pmaxud, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxud, Pmaxud, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxuw, Pmaxuw, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pmaxuw, Pmaxuw, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pminsb, Pminsb, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pminsb, Pminsb, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pminsd, Pminsd, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pminsd, Pminsd, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pminsw, Pminsw, Mm, Mm)                               // SSE
+  ASMJIT_INST_2x(pminsw, Pminsw, Mm, Mem)                              // SSE
+  ASMJIT_INST_2x(pminsw, Pminsw, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(pminsw, Pminsw, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(pminub, Pminub, Mm, Mm)                               // SSE
+  ASMJIT_INST_2x(pminub, Pminub, Mm, Mem)                              // SSE
+  ASMJIT_INST_2x(pminub, Pminub, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(pminub, Pminub, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(pminud, Pminud, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pminud, Pminud, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pminuw, Pminuw, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pminuw, Pminuw, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pmovmskb, Pmovmskb, Gp, Mm)                           // SSE
+  ASMJIT_INST_2x(pmovmskb, Pmovmskb, Gp, Xmm)                          // SSE2
+  ASMJIT_INST_2x(pmovsxbd, Pmovsxbd, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxbd, Pmovsxbd, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxbq, Pmovsxbq, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxbq, Pmovsxbq, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxbw, Pmovsxbw, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxbw, Pmovsxbw, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxdq, Pmovsxdq, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxdq, Pmovsxdq, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxwd, Pmovsxwd, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxwd, Pmovsxwd, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxwq, Pmovsxwq, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovsxwq, Pmovsxwq, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxbd, Pmovzxbd, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxbd, Pmovzxbd, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxbq, Pmovzxbq, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxbq, Pmovzxbq, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxbw, Pmovzxbw, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxbw, Pmovzxbw, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxdq, Pmovzxdq, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxdq, Pmovzxdq, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxwd, Pmovzxwd, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxwd, Pmovzxwd, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxwq, Pmovzxwq, Xmm, Xmm)                         // SSE4_1
+  ASMJIT_INST_2x(pmovzxwq, Pmovzxwq, Xmm, Mem)                         // SSE4_1
+  ASMJIT_INST_2x(pmuldq, Pmuldq, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pmuldq, Pmuldq, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pmulhrsw, Pmulhrsw, Mm, Mm)                           // SSSE3
+  ASMJIT_INST_2x(pmulhrsw, Pmulhrsw, Mm, Mem)                          // SSSE3
+  ASMJIT_INST_2x(pmulhrsw, Pmulhrsw, Xmm, Xmm)                         // SSSE3
+  ASMJIT_INST_2x(pmulhrsw, Pmulhrsw, Xmm, Mem)                         // SSSE3
+  ASMJIT_INST_2x(pmulhw, Pmulhw, Mm, Mm)                               // MMX
+  ASMJIT_INST_2x(pmulhw, Pmulhw, Mm, Mem)                              // MMX
+  ASMJIT_INST_2x(pmulhw, Pmulhw, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(pmulhw, Pmulhw, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(pmulhuw, Pmulhuw, Mm, Mm)                             // SSE
+  ASMJIT_INST_2x(pmulhuw, Pmulhuw, Mm, Mem)                            // SSE
+  ASMJIT_INST_2x(pmulhuw, Pmulhuw, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pmulhuw, Pmulhuw, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(pmulld, Pmulld, Xmm, Xmm)                             // SSE4_1
+  ASMJIT_INST_2x(pmulld, Pmulld, Xmm, Mem)                             // SSE4_1
+  ASMJIT_INST_2x(pmullw, Pmullw, Mm, Mm)                               // MMX
+  ASMJIT_INST_2x(pmullw, Pmullw, Mm, Mem)                              // MMX
+  ASMJIT_INST_2x(pmullw, Pmullw, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(pmullw, Pmullw, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(pmuludq, Pmuludq, Mm, Mm)                             // SSE2
+  ASMJIT_INST_2x(pmuludq, Pmuludq, Mm, Mem)                            // SSE2
+  ASMJIT_INST_2x(pmuludq, Pmuludq, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(pmuludq, Pmuludq, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(por, Por, Mm, Mm)                                     // MMX
+  ASMJIT_INST_2x(por, Por, Mm, Mem)                                    // MMX
+  ASMJIT_INST_2x(por, Por, Xmm, Xmm)                                   // SSE2
+  ASMJIT_INST_2x(por, Por, Xmm, Mem)                                   // SSE2
+  ASMJIT_INST_2x(psadbw, Psadbw, Mm, Mm)                               // SSE
+  ASMJIT_INST_2x(psadbw, Psadbw, Mm, Mem)                              // SSE
+  ASMJIT_INST_2x(psadbw, Psadbw, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(psadbw, Psadbw, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(pslld, Pslld, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(pslld, Pslld, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(pslld, Pslld, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(pslld, Pslld, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(pslld, Pslld, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(pslld, Pslld, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(pslldq, Pslldq, Xmm, Imm)                             // SSE2
+  ASMJIT_INST_2x(psllq, Psllq, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psllq, Psllq, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psllq, Psllq, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psllq, Psllq, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psllq, Psllq, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psllq, Psllq, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(psllw, Psllw, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psllw, Psllw, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psllw, Psllw, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psllw, Psllw, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psllw, Psllw, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psllw, Psllw, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(psrad, Psrad, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psrad, Psrad, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psrad, Psrad, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psrad, Psrad, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psrad, Psrad, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psrad, Psrad, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(psraw, Psraw, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psraw, Psraw, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psraw, Psraw, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psraw, Psraw, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psraw, Psraw, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psraw, Psraw, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(pshufb, Pshufb, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(pshufb, Pshufb, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(pshufb, Pshufb, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(pshufb, Pshufb, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_3x(pshufd, Pshufd, Xmm, Xmm, Imm)                        // SSE2
+  ASMJIT_INST_3x(pshufd, Pshufd, Xmm, Mem, Imm)                        // SSE2
+  ASMJIT_INST_3x(pshufhw, Pshufhw, Xmm, Xmm, Imm)                      // SSE2
+  ASMJIT_INST_3x(pshufhw, Pshufhw, Xmm, Mem, Imm)                      // SSE2
+  ASMJIT_INST_3x(pshuflw, Pshuflw, Xmm, Xmm, Imm)                      // SSE2
+  ASMJIT_INST_3x(pshuflw, Pshuflw, Xmm, Mem, Imm)                      // SSE2
+  ASMJIT_INST_3x(pshufw, Pshufw, Mm, Mm, Imm)                          // SSE
+  ASMJIT_INST_3x(pshufw, Pshufw, Mm, Mem, Imm)                         // SSE
+  ASMJIT_INST_2x(psignb, Psignb, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(psignb, Psignb, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(psignb, Psignb, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(psignb, Psignb, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_2x(psignd, Psignd, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(psignd, Psignd, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(psignd, Psignd, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(psignd, Psignd, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_2x(psignw, Psignw, Mm, Mm)                               // SSSE3
+  ASMJIT_INST_2x(psignw, Psignw, Mm, Mem)                              // SSSE3
+  ASMJIT_INST_2x(psignw, Psignw, Xmm, Xmm)                             // SSSE3
+  ASMJIT_INST_2x(psignw, Psignw, Xmm, Mem)                             // SSSE3
+  ASMJIT_INST_2x(psrld, Psrld, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psrld, Psrld, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psrld, Psrld, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psrld, Psrld, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psrld, Psrld, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psrld, Psrld, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(psrldq, Psrldq, Xmm, Imm)                             // SSE2
+  ASMJIT_INST_2x(psrlq, Psrlq, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psrlq, Psrlq, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psrlq, Psrlq, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psrlq, Psrlq, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psrlq, Psrlq, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psrlq, Psrlq, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(psrlw, Psrlw, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psrlw, Psrlw, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psrlw, Psrlw, Mm, Imm)                                // MMX
+  ASMJIT_INST_2x(psrlw, Psrlw, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psrlw, Psrlw, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psrlw, Psrlw, Xmm, Imm)                               // SSE2
+  ASMJIT_INST_2x(psubb, Psubb, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psubb, Psubb, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psubb, Psubb, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psubb, Psubb, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psubd, Psubd, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psubd, Psubd, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psubd, Psubd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psubd, Psubd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psubq, Psubq, Mm, Mm)                                 // SSE2
+  ASMJIT_INST_2x(psubq, Psubq, Mm, Mem)                                // SSE2
+  ASMJIT_INST_2x(psubq, Psubq, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psubq, Psubq, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(psubsb, Psubsb, Mm, Mm)                               // MMX
+  ASMJIT_INST_2x(psubsb, Psubsb, Mm, Mem)                              // MMX
+  ASMJIT_INST_2x(psubsb, Psubsb, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(psubsb, Psubsb, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(psubsw, Psubsw, Mm, Mm)                               // MMX
+  ASMJIT_INST_2x(psubsw, Psubsw, Mm, Mem)                              // MMX
+  ASMJIT_INST_2x(psubsw, Psubsw, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(psubsw, Psubsw, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(psubusb, Psubusb, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(psubusb, Psubusb, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(psubusb, Psubusb, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(psubusb, Psubusb, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(psubusw, Psubusw, Mm, Mm)                             // MMX
+  ASMJIT_INST_2x(psubusw, Psubusw, Mm, Mem)                            // MMX
+  ASMJIT_INST_2x(psubusw, Psubusw, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(psubusw, Psubusw, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(psubw, Psubw, Mm, Mm)                                 // MMX
+  ASMJIT_INST_2x(psubw, Psubw, Mm, Mem)                                // MMX
+  ASMJIT_INST_2x(psubw, Psubw, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(psubw, Psubw, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(ptest, Ptest, Xmm, Xmm)                               // SSE4_1
+  ASMJIT_INST_2x(ptest, Ptest, Xmm, Mem)                               // SSE4_1
+  ASMJIT_INST_2x(punpckhbw, Punpckhbw, Mm, Mm)                         // MMX
+  ASMJIT_INST_2x(punpckhbw, Punpckhbw, Mm, Mem)                        // MMX
+  ASMJIT_INST_2x(punpckhbw, Punpckhbw, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(punpckhbw, Punpckhbw, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(punpckhdq, Punpckhdq, Mm, Mm)                         // MMX
+  ASMJIT_INST_2x(punpckhdq, Punpckhdq, Mm, Mem)                        // MMX
+  ASMJIT_INST_2x(punpckhdq, Punpckhdq, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(punpckhdq, Punpckhdq, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(punpckhqdq, Punpckhqdq, Xmm, Xmm)                     // SSE2
+  ASMJIT_INST_2x(punpckhqdq, Punpckhqdq, Xmm, Mem)                     // SSE2
+  ASMJIT_INST_2x(punpckhwd, Punpckhwd, Mm, Mm)                         // MMX
+  ASMJIT_INST_2x(punpckhwd, Punpckhwd, Mm, Mem)                        // MMX
+  ASMJIT_INST_2x(punpckhwd, Punpckhwd, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(punpckhwd, Punpckhwd, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(punpcklbw, Punpcklbw, Mm, Mm)                         // MMX
+  ASMJIT_INST_2x(punpcklbw, Punpcklbw, Mm, Mem)                        // MMX
+  ASMJIT_INST_2x(punpcklbw, Punpcklbw, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(punpcklbw, Punpcklbw, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(punpckldq, Punpckldq, Mm, Mm)                         // MMX
+  ASMJIT_INST_2x(punpckldq, Punpckldq, Mm, Mem)                        // MMX
+  ASMJIT_INST_2x(punpckldq, Punpckldq, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(punpckldq, Punpckldq, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(punpcklqdq, Punpcklqdq, Xmm, Xmm)                     // SSE2
+  ASMJIT_INST_2x(punpcklqdq, Punpcklqdq, Xmm, Mem)                     // SSE2
+  ASMJIT_INST_2x(punpcklwd, Punpcklwd, Mm, Mm)                         // MMX
+  ASMJIT_INST_2x(punpcklwd, Punpcklwd, Mm, Mem)                        // MMX
+  ASMJIT_INST_2x(punpcklwd, Punpcklwd, Xmm, Xmm)                       // SSE2
+  ASMJIT_INST_2x(punpcklwd, Punpcklwd, Xmm, Mem)                       // SSE2
+  ASMJIT_INST_2x(pxor, Pxor, Mm, Mm)                                   // MMX
+  ASMJIT_INST_2x(pxor, Pxor, Mm, Mem)                                  // MMX
+  ASMJIT_INST_2x(pxor, Pxor, Xmm, Xmm)                                 // SSE2
+  ASMJIT_INST_2x(pxor, Pxor, Xmm, Mem)                                 // SSE2
+  ASMJIT_INST_2x(rcpps, Rcpps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(rcpps, Rcpps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(rcpss, Rcpss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(rcpss, Rcpss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_3x(roundpd, Roundpd, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundpd, Roundpd, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundps, Roundps, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundps, Roundps, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundsd, Roundsd, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundsd, Roundsd, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundss, Roundss, Xmm, Xmm, Imm)                      // SSE4_1
+  ASMJIT_INST_3x(roundss, Roundss, Xmm, Mem, Imm)                      // SSE4_1
+  ASMJIT_INST_2x(rsqrtps, Rsqrtps, Xmm, Xmm)                           // SSE
+  ASMJIT_INST_2x(rsqrtps, Rsqrtps, Xmm, Mem)                           // SSE
+  ASMJIT_INST_2x(rsqrtss, Rsqrtss, Xmm, Xmm)                           // SSE
+  ASMJIT_INST_2x(rsqrtss, Rsqrtss, Xmm, Mem)                           // SSE
+  ASMJIT_INST_3x(shufpd, Shufpd, Xmm, Xmm, Imm)                        // SSE2
+  ASMJIT_INST_3x(shufpd, Shufpd, Xmm, Mem, Imm)                        // SSE2
+  ASMJIT_INST_3x(shufps, Shufps, Xmm, Xmm, Imm)                        // SSE
+  ASMJIT_INST_3x(shufps, Shufps, Xmm, Mem, Imm)                        // SSE
+  ASMJIT_INST_2x(sqrtpd, Sqrtpd, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(sqrtpd, Sqrtpd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(sqrtps, Sqrtps, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(sqrtps, Sqrtps, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(sqrtsd, Sqrtsd, Xmm, Xmm)                             // SSE2
+  ASMJIT_INST_2x(sqrtsd, Sqrtsd, Xmm, Mem)                             // SSE2
+  ASMJIT_INST_2x(sqrtss, Sqrtss, Xmm, Xmm)                             // SSE
+  ASMJIT_INST_2x(sqrtss, Sqrtss, Xmm, Mem)                             // SSE
+  ASMJIT_INST_2x(subpd, Subpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(subpd, Subpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(subps, Subps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(subps, Subps, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(subsd, Subsd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(subsd, Subsd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(subss, Subss, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(subss, Subss, Xmm, Mem)                               // SSE
+  ASMJIT_INST_2x(ucomisd, Ucomisd, Xmm, Xmm)                           // SSE2
+  ASMJIT_INST_2x(ucomisd, Ucomisd, Xmm, Mem)                           // SSE2
+  ASMJIT_INST_2x(ucomiss, Ucomiss, Xmm, Xmm)                           // SSE
+  ASMJIT_INST_2x(ucomiss, Ucomiss, Xmm, Mem)                           // SSE
+  ASMJIT_INST_2x(unpckhpd, Unpckhpd, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(unpckhpd, Unpckhpd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(unpckhps, Unpckhps, Xmm, Xmm)                         // SSE
+  ASMJIT_INST_2x(unpckhps, Unpckhps, Xmm, Mem)                         // SSE
+  ASMJIT_INST_2x(unpcklpd, Unpcklpd, Xmm, Xmm)                         // SSE2
+  ASMJIT_INST_2x(unpcklpd, Unpcklpd, Xmm, Mem)                         // SSE2
+  ASMJIT_INST_2x(unpcklps, Unpcklps, Xmm, Xmm)                         // SSE
+  ASMJIT_INST_2x(unpcklps, Unpcklps, Xmm, Mem)                         // SSE
+  ASMJIT_INST_2x(xorpd, Xorpd, Xmm, Xmm)                               // SSE2
+  ASMJIT_INST_2x(xorpd, Xorpd, Xmm, Mem)                               // SSE2
+  ASMJIT_INST_2x(xorps, Xorps, Xmm, Xmm)                               // SSE
+  ASMJIT_INST_2x(xorps, Xorps, Xmm, Mem)                               // SSE
+
+  //! \}
+
+  //! \name 3DNOW and GEODE Instructions (Deprecated)
+  //! \{
+
+  ASMJIT_INST_2x(pavgusb, Pavgusb, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pavgusb, Pavgusb, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pf2id, Pf2id, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pf2id, Pf2id, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pf2iw, Pf2iw, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pf2iw, Pf2iw, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfacc, Pfacc, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfacc, Pfacc, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfadd, Pfadd, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfadd, Pfadd, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfcmpeq, Pfcmpeq, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pfcmpeq, Pfcmpeq, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pfcmpge, Pfcmpge, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pfcmpge, Pfcmpge, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pfcmpgt, Pfcmpgt, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pfcmpgt, Pfcmpgt, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pfmax, Pfmax, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfmax, Pfmax, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfmin, Pfmin, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfmin, Pfmin, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfmul, Pfmul, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfmul, Pfmul, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfnacc, Pfnacc, Mm, Mm)                               // 3DNOW
+  ASMJIT_INST_2x(pfnacc, Pfnacc, Mm, Mem)                              // 3DNOW
+  ASMJIT_INST_2x(pfpnacc, Pfpnacc, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pfpnacc, Pfpnacc, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pfrcp, Pfrcp, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfrcp, Pfrcp, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfrcpit1, Pfrcpit1, Mm, Mm)                           // 3DNOW
+  ASMJIT_INST_2x(pfrcpit1, Pfrcpit1, Mm, Mem)                          // 3DNOW
+  ASMJIT_INST_2x(pfrcpit2, Pfrcpit2, Mm, Mm)                           // 3DNOW
+  ASMJIT_INST_2x(pfrcpit2, Pfrcpit2, Mm, Mem)                          // 3DNOW
+  ASMJIT_INST_2x(pfrcpv, Pfrcpv, Mm, Mm)                               // GEODE
+  ASMJIT_INST_2x(pfrcpv, Pfrcpv, Mm, Mem)                              // GEODE
+  ASMJIT_INST_2x(pfrsqit1, Pfrsqit1, Mm, Mm)                           // 3DNOW
+  ASMJIT_INST_2x(pfrsqit1, Pfrsqit1, Mm, Mem)                          // 3DNOW
+  ASMJIT_INST_2x(pfrsqrt, Pfrsqrt, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pfrsqrt, Pfrsqrt, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pfrsqrtv, Pfrsqrtv, Mm, Mm)                           // GEODE
+  ASMJIT_INST_2x(pfrsqrtv, Pfrsqrtv, Mm, Mem)                          // GEODE
+  ASMJIT_INST_2x(pfsub, Pfsub, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pfsub, Pfsub, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pfsubr, Pfsubr, Mm, Mm)                               // 3DNOW
+  ASMJIT_INST_2x(pfsubr, Pfsubr, Mm, Mem)                              // 3DNOW
+  ASMJIT_INST_2x(pi2fd, Pi2fd, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pi2fd, Pi2fd, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pi2fw, Pi2fw, Mm, Mm)                                 // 3DNOW
+  ASMJIT_INST_2x(pi2fw, Pi2fw, Mm, Mem)                                // 3DNOW
+  ASMJIT_INST_2x(pmulhrw, Pmulhrw, Mm, Mm)                             // 3DNOW
+  ASMJIT_INST_2x(pmulhrw, Pmulhrw, Mm, Mem)                            // 3DNOW
+  ASMJIT_INST_2x(pswapd, Pswapd, Mm, Mm)                               // 3DNOW
+  ASMJIT_INST_2x(pswapd, Pswapd, Mm, Mem)                              // 3DNOW
+
+  //! \}
+
+  //! \name EMMS/FEMMS Instructions
+  //! \{
+
+  ASMJIT_INST_0x(emms, Emms)                                           // MMX
+  ASMJIT_INST_0x(femms, Femms)                                         // 3DNOW
+
+  //! \}
+
+  //! \name AESNI Instructions
+  //! \{
+
+  ASMJIT_INST_2x(aesdec, Aesdec, Xmm, Xmm)                             // AESNI
+  ASMJIT_INST_2x(aesdec, Aesdec, Xmm, Mem)                             // AESNI
+  ASMJIT_INST_2x(aesdeclast, Aesdeclast, Xmm, Xmm)                     // AESNI
+  ASMJIT_INST_2x(aesdeclast, Aesdeclast, Xmm, Mem)                     // AESNI
+  ASMJIT_INST_2x(aesenc, Aesenc, Xmm, Xmm)                             // AESNI
+  ASMJIT_INST_2x(aesenc, Aesenc, Xmm, Mem)                             // AESNI
+  ASMJIT_INST_2x(aesenclast, Aesenclast, Xmm, Xmm)                     // AESNI
+  ASMJIT_INST_2x(aesenclast, Aesenclast, Xmm, Mem)                     // AESNI
+  ASMJIT_INST_2x(aesimc, Aesimc, Xmm, Xmm)                             // AESNI
+  ASMJIT_INST_2x(aesimc, Aesimc, Xmm, Mem)                             // AESNI
+  ASMJIT_INST_3x(aeskeygenassist, Aeskeygenassist, Xmm, Xmm, Imm)      // AESNI
+  ASMJIT_INST_3x(aeskeygenassist, Aeskeygenassist, Xmm, Mem, Imm)      // AESNI
+
+  //! \}
+
+  //! \name SHA Instructions
+  //! \{
+
+  ASMJIT_INST_2x(sha1msg1, Sha1msg1, Xmm, Xmm)                         // SHA
+  ASMJIT_INST_2x(sha1msg1, Sha1msg1, Xmm, Mem)                         // SHA
+  ASMJIT_INST_2x(sha1msg2, Sha1msg2, Xmm, Xmm)                         // SHA
+  ASMJIT_INST_2x(sha1msg2, Sha1msg2, Xmm, Mem)                         // SHA
+  ASMJIT_INST_2x(sha1nexte, Sha1nexte, Xmm, Xmm)                       // SHA
+  ASMJIT_INST_2x(sha1nexte, Sha1nexte, Xmm, Mem)                       // SHA
+  ASMJIT_INST_3x(sha1rnds4, Sha1rnds4, Xmm, Xmm, Imm)                  // SHA
+  ASMJIT_INST_3x(sha1rnds4, Sha1rnds4, Xmm, Mem, Imm)                  // SHA
+  ASMJIT_INST_2x(sha256msg1, Sha256msg1, Xmm, Xmm)                     // SHA
+  ASMJIT_INST_2x(sha256msg1, Sha256msg1, Xmm, Mem)                     // SHA
+  ASMJIT_INST_2x(sha256msg2, Sha256msg2, Xmm, Xmm)                     // SHA
+  ASMJIT_INST_2x(sha256msg2, Sha256msg2, Xmm, Mem)                     // SHA
+  ASMJIT_INST_3x(sha256rnds2, Sha256rnds2, Xmm, Xmm, XMM0)             // SHA [EXPLICIT]
+  ASMJIT_INST_3x(sha256rnds2, Sha256rnds2, Xmm, Mem, XMM0)             // SHA [EXPLICIT]
+
+  //! \}
+
+  //! \name GFNI Instructions
+  //! \{
+
+  // NOTE: For some reason Doxygen is messed up here and thinks we are in cond.
+  //! \endcond
+
+  ASMJIT_INST_3x(gf2p8affineinvqb, Gf2p8affineinvqb, Xmm, Xmm, Imm)    // GFNI
+  ASMJIT_INST_3x(gf2p8affineinvqb, Gf2p8affineinvqb, Xmm, Mem, Imm)    // GFNI
+  ASMJIT_INST_3x(gf2p8affineqb, Gf2p8affineqb, Xmm, Xmm, Imm)          // GFNI
+  ASMJIT_INST_3x(gf2p8affineqb, Gf2p8affineqb, Xmm, Mem, Imm)          // GFNI
+  ASMJIT_INST_2x(gf2p8mulb, Gf2p8mulb, Xmm, Xmm)                       // GFNI
+  ASMJIT_INST_2x(gf2p8mulb, Gf2p8mulb, Xmm, Mem)                       // GFNI
+
+  //! \}
+
+  //! \name AVX, FMA, and AVX512 Instructions
+  //! \{
+
+  ASMJIT_INST_3x(kaddb, Kaddb, KReg, KReg, KReg)                       // AVX512_DQ
+  ASMJIT_INST_3x(kaddd, Kaddd, KReg, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_3x(kaddq, Kaddq, KReg, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_3x(kaddw, Kaddw, KReg, KReg, KReg)                       // AVX512_DQ
+  ASMJIT_INST_3x(kandb, Kandb, KReg, KReg, KReg)                       // AVX512_DQ
+  ASMJIT_INST_3x(kandd, Kandd, KReg, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_3x(kandnb, Kandnb, KReg, KReg, KReg)                     // AVX512_DQ
+  ASMJIT_INST_3x(kandnd, Kandnd, KReg, KReg, KReg)                     // AVX512_BW
+  ASMJIT_INST_3x(kandnq, Kandnq, KReg, KReg, KReg)                     // AVX512_BW
+  ASMJIT_INST_3x(kandnw, Kandnw, KReg, KReg, KReg)                     // AVX512_F
+  ASMJIT_INST_3x(kandq, Kandq, KReg, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_3x(kandw, Kandw, KReg, KReg, KReg)                       // AVX512_F
+  ASMJIT_INST_2x(kmovb, Kmovb, KReg, KReg)                             // AVX512_DQ
+  ASMJIT_INST_2x(kmovb, Kmovb, KReg, Mem)                              // AVX512_DQ
+  ASMJIT_INST_2x(kmovb, Kmovb, KReg, Gp)                               // AVX512_DQ
+  ASMJIT_INST_2x(kmovb, Kmovb, Mem, KReg)                              // AVX512_DQ
+  ASMJIT_INST_2x(kmovb, Kmovb, Gp, KReg)                               // AVX512_DQ
+  ASMJIT_INST_2x(kmovd, Kmovd, KReg, KReg)                             // AVX512_BW
+  ASMJIT_INST_2x(kmovd, Kmovd, KReg, Mem)                              // AVX512_BW
+  ASMJIT_INST_2x(kmovd, Kmovd, KReg, Gp)                               // AVX512_BW
+  ASMJIT_INST_2x(kmovd, Kmovd, Mem, KReg)                              // AVX512_BW
+  ASMJIT_INST_2x(kmovd, Kmovd, Gp, KReg)                               // AVX512_BW
+  ASMJIT_INST_2x(kmovq, Kmovq, KReg, KReg)                             // AVX512_BW
+  ASMJIT_INST_2x(kmovq, Kmovq, KReg, Mem)                              // AVX512_BW
+  ASMJIT_INST_2x(kmovq, Kmovq, KReg, Gp)                               // AVX512_BW
+  ASMJIT_INST_2x(kmovq, Kmovq, Mem, KReg)                              // AVX512_BW
+  ASMJIT_INST_2x(kmovq, Kmovq, Gp, KReg)                               // AVX512_BW
+  ASMJIT_INST_2x(kmovw, Kmovw, KReg, KReg)                             // AVX512_F
+  ASMJIT_INST_2x(kmovw, Kmovw, KReg, Mem)                              // AVX512_F
+  ASMJIT_INST_2x(kmovw, Kmovw, KReg, Gp)                               // AVX512_F
+  ASMJIT_INST_2x(kmovw, Kmovw, Mem, KReg)                              // AVX512_F
+  ASMJIT_INST_2x(kmovw, Kmovw, Gp, KReg)                               // AVX512_F
+  ASMJIT_INST_2x(knotb, Knotb, KReg, KReg)                             // AVX512_DQ
+  ASMJIT_INST_2x(knotd, Knotd, KReg, KReg)                             // AVX512_BW
+  ASMJIT_INST_2x(knotq, Knotq, KReg, KReg)                             // AVX512_BW
+  ASMJIT_INST_2x(knotw, Knotw, KReg, KReg)                             // AVX512_F
+  ASMJIT_INST_3x(korb, Korb, KReg, KReg, KReg)                         // AVX512_DQ
+  ASMJIT_INST_3x(kord, Kord, KReg, KReg, KReg)                         // AVX512_BW
+  ASMJIT_INST_3x(korq, Korq, KReg, KReg, KReg)                         // AVX512_BW
+  ASMJIT_INST_2x(kortestb, Kortestb, KReg, KReg)                       // AVX512_DQ
+  ASMJIT_INST_2x(kortestd, Kortestd, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_2x(kortestq, Kortestq, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_2x(kortestw, Kortestw, KReg, KReg)                       // AVX512_F
+  ASMJIT_INST_3x(korw, Korw, KReg, KReg, KReg)                         // AVX512_F
+  ASMJIT_INST_3x(kshiftlb, Kshiftlb, KReg, KReg, Imm)                  // AVX512_DQ
+  ASMJIT_INST_3x(kshiftld, Kshiftld, KReg, KReg, Imm)                  // AVX512_BW
+  ASMJIT_INST_3x(kshiftlq, Kshiftlq, KReg, KReg, Imm)                  // AVX512_BW
+  ASMJIT_INST_3x(kshiftlw, Kshiftlw, KReg, KReg, Imm)                  // AVX512_F
+  ASMJIT_INST_3x(kshiftrb, Kshiftrb, KReg, KReg, Imm)                  // AVX512_DQ
+  ASMJIT_INST_3x(kshiftrd, Kshiftrd, KReg, KReg, Imm)                  // AVX512_BW
+  ASMJIT_INST_3x(kshiftrq, Kshiftrq, KReg, KReg, Imm)                  // AVX512_BW
+  ASMJIT_INST_3x(kshiftrw, Kshiftrw, KReg, KReg, Imm)                  // AVX512_F
+  ASMJIT_INST_2x(ktestb, Ktestb, KReg, KReg)                           // AVX512_DQ
+  ASMJIT_INST_2x(ktestd, Ktestd, KReg, KReg)                           // AVX512_BW
+  ASMJIT_INST_2x(ktestq, Ktestq, KReg, KReg)                           // AVX512_BW
+  ASMJIT_INST_2x(ktestw, Ktestw, KReg, KReg)                           // AVX512_DQ
+  ASMJIT_INST_3x(kunpckbw, Kunpckbw, KReg, KReg, KReg)                 // AVX512_F
+  ASMJIT_INST_3x(kunpckdq, Kunpckdq, KReg, KReg, KReg)                 // AVX512_BW
+  ASMJIT_INST_3x(kunpckwd, Kunpckwd, KReg, KReg, KReg)                 // AVX512_BW
+  ASMJIT_INST_3x(kxnorb, Kxnorb, KReg, KReg, KReg)                     // AVX512_DQ
+  ASMJIT_INST_3x(kxnord, Kxnord, KReg, KReg, KReg)                     // AVX512_BW
+  ASMJIT_INST_3x(kxnorq, Kxnorq, KReg, KReg, KReg)                     // AVX512_BW
+  ASMJIT_INST_3x(kxnorw, Kxnorw, KReg, KReg, KReg)                     // AVX512_F
+  ASMJIT_INST_3x(kxorb, Kxorb, KReg, KReg, KReg)                       // AVX512_DQ
+  ASMJIT_INST_3x(kxord, Kxord, KReg, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_3x(kxorq, Kxorq, KReg, KReg, KReg)                       // AVX512_BW
+  ASMJIT_INST_3x(kxorw, Kxorw, KReg, KReg, KReg)                       // AVX512_F
+  ASMJIT_INST_6x(v4fmaddps, V4fmaddps, Zmm, Zmm, Zmm, Zmm, Zmm, Mem)   // AVX512_4FMAPS{kz}
+  ASMJIT_INST_6x(v4fmaddss, V4fmaddss, Xmm, Xmm, Xmm, Xmm, Xmm, Mem)   // AVX512_4FMAPS{kz}
+  ASMJIT_INST_6x(v4fnmaddps, V4fnmaddps, Zmm, Zmm, Zmm, Zmm, Zmm, Mem) // AVX512_4FMAPS{kz}
+  ASMJIT_INST_6x(v4fnmaddss, V4fnmaddss, Xmm, Xmm, Xmm, Xmm, Xmm, Mem) // AVX512_4FMAPS{kz}
+  ASMJIT_INST_3x(vaddpd, Vaddpd, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vaddpd, Vaddpd, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vaddps, Vaddps, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vaddps, Vaddps, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vaddsd, Vaddsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vaddsd, Vaddsd, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vaddss, Vaddss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vaddss, Vaddss, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Vec, Vec, Vec)                  // AVX
+  ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Vec, Vec, Mem)                  // AVX
+  ASMJIT_INST_3x(vaddsubps, Vaddsubps, Vec, Vec, Vec)                  // AVX
+  ASMJIT_INST_3x(vaddsubps, Vaddsubps, Vec, Vec, Mem)                  // AVX
+  ASMJIT_INST_3x(vaesdec, Vaesdec, Vec, Vec, Vec)                      // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesdec, Vaesdec, Vec, Vec, Mem)                      // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Vec, Vec, Vec)              // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Vec, Vec, Mem)              // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesenc, Vaesenc, Vec, Vec, Vec)                      // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesenc, Vaesenc, Vec, Vec, Mem)                      // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesenclast, Vaesenclast, Vec, Vec, Vec)              // AVX+AESNI VAES
+  ASMJIT_INST_3x(vaesenclast, Vaesenclast, Vec, Vec, Mem)              // AVX+AESNI VAES
+  ASMJIT_INST_2x(vaesimc, Vaesimc, Xmm, Xmm)                           // AVX+AESNI
+  ASMJIT_INST_2x(vaesimc, Vaesimc, Xmm, Mem)                           // AVX+AESNI
+  ASMJIT_INST_3x(vaeskeygenassist, Vaeskeygenassist, Xmm, Xmm, Imm)    // AVX+AESNI
+  ASMJIT_INST_3x(vaeskeygenassist, Vaeskeygenassist, Xmm, Mem, Imm)    // AVX+AESNI
+  ASMJIT_INST_4x(valignd, Valignd, Vec, Vec, Vec, Imm)                 //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(valignd, Valignd, Vec, Vec, Mem, Imm)                 //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(valignq, Valignq, Vec, Vec, Vec, Imm)                 //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(valignq, Valignq, Vec, Vec, Mem, Imm)                 //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vandnpd, Vandnpd, Vec, Vec, Vec)                      // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vandnpd, Vandnpd, Vec, Vec, Mem)                      // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vandnps, Vandnps, Vec, Vec, Vec)                      // AVX  AVX512_DQ{kz|b32}
+  ASMJIT_INST_3x(vandnps, Vandnps, Vec, Vec, Mem)                      // AVX  AVX512_DQ{kz|b32}
+  ASMJIT_INST_3x(vandpd, Vandpd, Vec, Vec, Vec)                        // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vandpd, Vandpd, Vec, Vec, Mem)                        // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vandps, Vandps, Vec, Vec, Vec)                        // AVX  AVX512_DQ{kz|b32}
+  ASMJIT_INST_3x(vandps, Vandps, Vec, Vec, Mem)                        // AVX  AVX512_DQ{kz|b32}
+  ASMJIT_INST_3x(vblendmpd, Vblendmpd, Vec, Vec, Vec)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vblendmpd, Vblendmpd, Vec, Vec, Mem)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vblendmps, Vblendmps, Vec, Vec, Vec)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vblendmps, Vblendmps, Vec, Vec, Mem)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vblendpd, Vblendpd, Vec, Vec, Vec, Imm)               // AVX
+  ASMJIT_INST_4x(vblendpd, Vblendpd, Vec, Vec, Mem, Imm)               // AVX
+  ASMJIT_INST_4x(vblendps, Vblendps, Vec, Vec, Vec, Imm)               // AVX
+  ASMJIT_INST_4x(vblendps, Vblendps, Vec, Vec, Mem, Imm)               // AVX
+  ASMJIT_INST_4x(vblendvpd, Vblendvpd, Vec, Vec, Vec, Vec)             // AVX
+  ASMJIT_INST_4x(vblendvpd, Vblendvpd, Vec, Vec, Mem, Vec)             // AVX
+  ASMJIT_INST_4x(vblendvps, Vblendvps, Vec, Vec, Vec, Vec)             // AVX
+  ASMJIT_INST_4x(vblendvps, Vblendvps, Vec, Vec, Mem, Vec)             // AVX
+  ASMJIT_INST_2x(vbroadcastf128, Vbroadcastf128, Vec, Mem)             // AVX
+  ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Vec, Vec)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Vec, Mem)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcastf32x4, Vbroadcastf32x4, Vec, Mem)           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcastf32x8, Vbroadcastf32x8, Vec, Mem)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcastf64x2, Vbroadcastf64x2, Vec, Mem)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcastf64x4, Vbroadcastf64x4, Vec, Mem)           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcasti128, Vbroadcasti128, Vec, Mem)             // AVX2
+  ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Vec, Vec)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Vec, Mem)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcasti32x4, Vbroadcasti32x4, Vec, Mem)           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcasti32x8, Vbroadcasti32x8, Vec, Mem)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Vec, Vec)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Vec, Mem)           //      AVX512_DQ{kz}
+  ASMJIT_INST_2x(vbroadcasti64x4, Vbroadcasti64x4, Vec, Vec)           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcasti64x4, Vbroadcasti64x4, Vec, Mem)           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Vec, Mem)                 // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Vec, Xmm)                 // AVX2 AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Vec, Mem)                 // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Vec, Xmm)                 // AVX2 AVX512_F{kz}
+  ASMJIT_INST_4x(vcmppd, Vcmppd, Vec, Vec, Vec, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmppd, Vcmppd, Vec, Vec, Mem, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmppd, Vcmppd, KReg, Vec, Vec, Imm)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vcmppd, Vcmppd, KReg, Vec, Mem, Imm)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vcmpps, Vcmpps, Vec, Vec, Vec, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmpps, Vcmpps, Vec, Vec, Mem, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmpps, Vcmpps, KReg, Vec, Vec, Imm)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vcmpps, Vcmpps, KReg, Vec, Mem, Imm)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vcmpsd, Vcmpsd, Xmm, Xmm, Xmm, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmpsd, Vcmpsd, Xmm, Xmm, Mem, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmpsd, Vcmpsd, KReg, Xmm, Xmm, Imm)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vcmpsd, Vcmpsd, KReg, Xmm, Mem, Imm)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vcmpss, Vcmpss, Xmm, Xmm, Xmm, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmpss, Vcmpss, Xmm, Xmm, Mem, Imm)                   // AVX
+  ASMJIT_INST_4x(vcmpss, Vcmpss, KReg, Xmm, Xmm, Imm)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vcmpss, Vcmpss, KReg, Xmm, Mem, Imm)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_2x(vcomisd, Vcomisd, Xmm, Xmm)                           // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcomisd, Vcomisd, Xmm, Mem)                           // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcomiss, Vcomiss, Xmm, Xmm)                           // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcomiss, Vcomiss, Xmm, Mem)                           // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcompresspd, Vcompresspd, Vec, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vcompresspd, Vcompresspd, Mem, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vcompressps, Vcompressps, Vec, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vcompressps, Vcompressps, Mem, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Vec, Vec)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Vec, Mem)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Vec, Vec)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Vec, Mem)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Vec, Vec, Vec)        //      AVX512_BF16{kz|b32}
+  ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Vec, Vec, Mem)        //      AVX512_BF16{kz|b32}
+  ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Vec, Vec)               //      AVX512_BF16{kz|b32}
+  ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Vec, Mem)               //      AVX512_BF16{kz|b32}
+  ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Vec, Vec)                       // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Vec, Mem)                       // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Vec, Vec)                       // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Vec, Mem)                       // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Vec, Vec)                       //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Vec, Mem)                       //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Vec, Vec)                     //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Vec, Mem)                     //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Vec, Vec)                     //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Vec, Mem)                     //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Vec, Vec)                       // F16C AVX512_F{kz}
+  ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Vec, Mem)                       // F16C AVX512_F{kz}
+  ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Vec, Vec)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Vec, Mem)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Vec, Vec)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Vec, Mem)                       // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vcvtps2ph, Vcvtps2ph, Vec, Vec, Imm)                  // F16C AVX512_F{kz}
+  ASMJIT_INST_3x(vcvtps2ph, Vcvtps2ph, Mem, Vec, Imm)                  // F16C AVX512_F{kz}
+  ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Vec, Vec)                       //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Vec, Mem)                       //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Vec, Vec)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Vec, Mem)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Vec, Vec)                     //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Vec, Mem)                     //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Vec, Vec)                       //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Vec, Mem)                       //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Vec, Vec)                       //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Vec, Mem)                       //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtsd2si, Vcvtsd2si, Gp, Xmm)                        // AVX  AVX512_F{er}
+  ASMJIT_INST_2x(vcvtsd2si, Vcvtsd2si, Gp, Mem)                        // AVX  AVX512_F{er}
+  ASMJIT_INST_3x(vcvtsd2ss, Vcvtsd2ss, Xmm, Xmm, Xmm)                  // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vcvtsd2ss, Vcvtsd2ss, Xmm, Xmm, Mem)                  // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_2x(vcvtsd2usi, Vcvtsd2usi, Gp, Xmm)                      //      AVX512_F{er}
+  ASMJIT_INST_2x(vcvtsd2usi, Vcvtsd2usi, Gp, Mem)                      //      AVX512_F{er}
+  ASMJIT_INST_3x(vcvtsi2sd, Vcvtsi2sd, Xmm, Xmm, Gp)                   // AVX  AVX512_F{er}
+  ASMJIT_INST_3x(vcvtsi2sd, Vcvtsi2sd, Xmm, Xmm, Mem)                  // AVX  AVX512_F{er}
+  ASMJIT_INST_3x(vcvtsi2ss, Vcvtsi2ss, Xmm, Xmm, Gp)                   // AVX  AVX512_F{er}
+  ASMJIT_INST_3x(vcvtsi2ss, Vcvtsi2ss, Xmm, Xmm, Mem)                  // AVX  AVX512_F{er}
+  ASMJIT_INST_3x(vcvtss2sd, Vcvtss2sd, Xmm, Xmm, Xmm)                  // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vcvtss2sd, Vcvtss2sd, Xmm, Xmm, Mem)                  // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_2x(vcvtss2si, Vcvtss2si, Gp, Xmm)                        // AVX  AVX512_F{er}
+  ASMJIT_INST_2x(vcvtss2si, Vcvtss2si, Gp, Mem)                        // AVX  AVX512_F{er}
+  ASMJIT_INST_2x(vcvtss2usi, Vcvtss2usi, Gp, Xmm)                      //      AVX512_F{er}
+  ASMJIT_INST_2x(vcvtss2usi, Vcvtss2usi, Gp, Mem)                      //      AVX512_F{er}
+  ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Vec, Vec)                     // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Vec, Mem)                     // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Vec, Vec)                     //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Vec, Mem)                     //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Vec, Vec)                   //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Vec, Mem)                   //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Vec, Vec)                   //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Vec, Mem)                   //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Vec, Vec)                     // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Vec, Mem)                     // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Vec, Vec)                     //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Vec, Mem)                     //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Vec, Vec)                   //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Vec, Mem)                   //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Vec, Vec)                   //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Vec, Mem)                   //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_2x(vcvttsd2si, Vcvttsd2si, Gp, Xmm)                      // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttsd2si, Vcvttsd2si, Gp, Mem)                      // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttsd2usi, Vcvttsd2usi, Gp, Xmm)                    //      AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttsd2usi, Vcvttsd2usi, Gp, Mem)                    //      AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttss2si, Vcvttss2si, Gp, Xmm)                      // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttss2si, Vcvttss2si, Gp, Mem)                      // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttss2usi, Vcvttss2usi, Gp, Xmm)                    //      AVX512_F{sae}
+  ASMJIT_INST_2x(vcvttss2usi, Vcvttss2usi, Gp, Mem)                    //      AVX512_F{sae}
+  ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Vec, Vec)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Vec, Mem)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Vec, Vec)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Vec, Mem)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Vec, Vec)                     //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Vec, Mem)                     //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Vec, Vec)                     //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Vec, Mem)                     //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vcvtusi2sd, Vcvtusi2sd, Xmm, Xmm, Gp)                 //      AVX512_F{er}
+  ASMJIT_INST_3x(vcvtusi2sd, Vcvtusi2sd, Xmm, Xmm, Mem)                //      AVX512_F{er}
+  ASMJIT_INST_3x(vcvtusi2ss, Vcvtusi2ss, Xmm, Xmm, Gp)                 //      AVX512_F{er}
+  ASMJIT_INST_3x(vcvtusi2ss, Vcvtusi2ss, Xmm, Xmm, Mem)                //      AVX512_F{er}
+  ASMJIT_INST_4x(vdbpsadbw, Vdbpsadbw, Vec, Vec, Vec, Imm)             //      AVX512_BW{kz}
+  ASMJIT_INST_4x(vdbpsadbw, Vdbpsadbw, Vec, Vec, Mem, Imm)             //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vdivpd, Vdivpd, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vdivpd, Vdivpd, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vdivps, Vdivps, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vdivps, Vdivps, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vdivsd, Vdivsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vdivsd, Vdivsd, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vdivss, Vdivss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vdivss, Vdivss, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Vec, Vec, Vec)                  //      AVX512_BF16{kz|b32}
+  ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Vec, Vec, Mem)                  //      AVX512_BF16{kz|b32}
+  ASMJIT_INST_4x(vdppd, Vdppd, Vec, Vec, Vec, Imm)                     // AVX
+  ASMJIT_INST_4x(vdppd, Vdppd, Vec, Vec, Mem, Imm)                     // AVX
+  ASMJIT_INST_4x(vdpps, Vdpps, Vec, Vec, Vec, Imm)                     // AVX
+  ASMJIT_INST_4x(vdpps, Vdpps, Vec, Vec, Mem, Imm)                     // AVX
+  ASMJIT_INST_2x(vexp2pd, Vexp2pd, Vec, Vec)                           //      AVX512_ER{kz|sae|b64}
+  ASMJIT_INST_2x(vexp2pd, Vexp2pd, Vec, Mem)                           //      AVX512_ER{kz|sae|b64}
+  ASMJIT_INST_2x(vexp2ps, Vexp2ps, Vec, Vec)                           //      AVX512_ER{kz|sae|b32}
+  ASMJIT_INST_2x(vexp2ps, Vexp2ps, Vec, Mem)                           //      AVX512_ER{kz|sae|b32}
+  ASMJIT_INST_2x(vexpandpd, Vexpandpd, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vexpandpd, Vexpandpd, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vexpandps, Vexpandps, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vexpandps, Vexpandps, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextractf128, Vextractf128, Vec, Vec, Imm)            // AVX
+  ASMJIT_INST_3x(vextractf128, Vextractf128, Mem, Vec, Imm)            // AVX
+  ASMJIT_INST_3x(vextractf32x4, Vextractf32x4, Vec, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextractf32x4, Vextractf32x4, Mem, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextractf32x8, Vextractf32x8, Vec, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextractf32x8, Vextractf32x8, Mem, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextractf64x2, Vextractf64x2, Vec, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextractf64x2, Vextractf64x2, Mem, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextractf64x4, Vextractf64x4, Vec, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextractf64x4, Vextractf64x4, Mem, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextracti128, Vextracti128, Vec, Vec, Imm)            // AVX2
+  ASMJIT_INST_3x(vextracti128, Vextracti128, Mem, Vec, Imm)            // AVX2
+  ASMJIT_INST_3x(vextracti32x4, Vextracti32x4, Vec, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextracti32x4, Vextracti32x4, Mem, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextracti32x8, Vextracti32x8, Vec, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextracti32x8, Vextracti32x8, Mem, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextracti64x2, Vextracti64x2, Vec, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextracti64x2, Vextracti64x2, Mem, Vec, Imm)          //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vextracti64x4, Vextracti64x4, Vec, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextracti64x4, Vextracti64x4, Mem, Vec, Imm)          //      AVX512_F{kz}
+  ASMJIT_INST_3x(vextractps, Vextractps, Gp, Xmm, Imm)                 // AVX  AVX512_F
+  ASMJIT_INST_3x(vextractps, Vextractps, Mem, Xmm, Imm)                // AVX  AVX512_F
+  ASMJIT_INST_4x(vfixupimmpd, Vfixupimmpd, Vec, Vec, Vec, Imm)         //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vfixupimmpd, Vfixupimmpd, Vec, Vec, Mem, Imm)         //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vfixupimmps, Vfixupimmps, Vec, Vec, Vec, Imm)         //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vfixupimmps, Vfixupimmps, Vec, Vec, Mem, Imm)         //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vfixupimmsd, Vfixupimmsd, Xmm, Xmm, Xmm, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vfixupimmsd, Vfixupimmsd, Xmm, Xmm, Mem, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vfixupimmss, Vfixupimmss, Xmm, Xmm, Xmm, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vfixupimmss, Vfixupimmss, Xmm, Xmm, Mem, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmadd132sd, Vfmadd132sd, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd132sd, Vfmadd132sd, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd132ss, Vfmadd132ss, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd132ss, Vfmadd132ss, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmadd213sd, Vfmadd213sd, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd213sd, Vfmadd213sd, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd213ss, Vfmadd213ss, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd213ss, Vfmadd213ss, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmadd231sd, Vfmadd231sd, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd231sd, Vfmadd231sd, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd231ss, Vfmadd231ss, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmadd231ss, Vfmadd231ss, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub132sd, Vfmsub132sd, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub132sd, Vfmsub132sd, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub132ss, Vfmsub132ss, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub132ss, Vfmsub132ss, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub213sd, Vfmsub213sd, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub213sd, Vfmsub213sd, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub213ss, Vfmsub213ss, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub213ss, Vfmsub213ss, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Vec, Vec, Vec)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Vec, Vec, Mem)              // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsub231sd, Vfmsub231sd, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub231sd, Vfmsub231sd, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub231ss, Vfmsub231ss, Xmm, Xmm, Xmm)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsub231ss, Vfmsub231ss, Xmm, Xmm, Mem)              // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Vec, Vec, Vec)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Vec, Vec, Mem)        // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd132sd, Vfnmadd132sd, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd132sd, Vfnmadd132sd, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd132ss, Vfnmadd132ss, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd132ss, Vfnmadd132ss, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd213sd, Vfnmadd213sd, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd213sd, Vfnmadd213sd, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd213ss, Vfnmadd213ss, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd213ss, Vfnmadd213ss, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmadd231sd, Vfnmadd231sd, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd231sd, Vfnmadd231sd, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd231ss, Vfnmadd231ss, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmadd231ss, Vfnmadd231ss, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmsub132sd, Vfnmsub132sd, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub132sd, Vfnmsub132sd, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub132ss, Vfnmsub132ss, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub132ss, Vfnmsub132ss, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmsub213sd, Vfnmsub213sd, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub213sd, Vfnmsub213sd, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub213ss, Vfnmsub213ss, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub213ss, Vfnmsub213ss, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Vec, Vec, Vec)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Vec, Vec, Mem)            // FMA  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vfnmsub231sd, Vfnmsub231sd, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub231sd, Vfnmsub231sd, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub231ss, Vfnmsub231ss, Xmm, Xmm, Xmm)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfnmsub231ss, Vfnmsub231ss, Xmm, Xmm, Mem)            // FMA  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vfpclasspd, Vfpclasspd, KReg, Vec, Imm)               //      AVX512_DQ{k|b64}
+  ASMJIT_INST_3x(vfpclasspd, Vfpclasspd, KReg, Mem, Imm)               //      AVX512_DQ{k|b64}
+  ASMJIT_INST_3x(vfpclassps, Vfpclassps, KReg, Vec, Imm)               //      AVX512_DQ{k|b32}
+  ASMJIT_INST_3x(vfpclassps, Vfpclassps, KReg, Mem, Imm)               //      AVX512_DQ{k|b32}
+  ASMJIT_INST_3x(vfpclasssd, Vfpclasssd, KReg, Xmm, Imm)               //      AVX512_DQ{k}
+  ASMJIT_INST_3x(vfpclasssd, Vfpclasssd, KReg, Mem, Imm)               //      AVX512_DQ{k}
+  ASMJIT_INST_3x(vfpclassss, Vfpclassss, KReg, Xmm, Imm)               //      AVX512_DQ{k}
+  ASMJIT_INST_3x(vfpclassss, Vfpclassss, KReg, Mem, Imm)               //      AVX512_DQ{k}
+  ASMJIT_INST_2x(vgatherdpd, Vgatherdpd, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vgatherdpd, Vgatherdpd, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_2x(vgatherdps, Vgatherdps, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vgatherdps, Vgatherdps, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_1x(vgatherpf0dpd, Vgatherpf0dpd, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf0dps, Vgatherpf0dps, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf0qpd, Vgatherpf0qpd, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf0qps, Vgatherpf0qps, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf1dpd, Vgatherpf1dpd, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf1dps, Vgatherpf1dps, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf1qpd, Vgatherpf1qpd, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_1x(vgatherpf1qps, Vgatherpf1qps, Mem)                    //      AVX512_PF{k}
+  ASMJIT_INST_2x(vgatherqpd, Vgatherqpd, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vgatherqpd, Vgatherqpd, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_2x(vgatherqps, Vgatherqps, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vgatherqps, Vgatherqps, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_2x(vgetexppd, Vgetexppd, Vec, Vec)                       //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vgetexppd, Vgetexppd, Vec, Mem)                       //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vgetexpps, Vgetexpps, Vec, Vec)                       //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vgetexpps, Vgetexpps, Vec, Mem)                       //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vgetexpsd, Vgetexpsd, Xmm, Xmm, Xmm)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vgetexpsd, Vgetexpsd, Xmm, Xmm, Mem)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vgetexpss, Vgetexpss, Xmm, Xmm, Xmm)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vgetexpss, Vgetexpss, Xmm, Xmm, Mem)                  //      AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vgetmantpd, Vgetmantpd, Vec, Vec, Imm)                //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vgetmantpd, Vgetmantpd, Vec, Mem, Imm)                //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vgetmantps, Vgetmantps, Vec, Vec, Imm)                //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vgetmantps, Vgetmantps, Vec, Mem, Imm)                //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vgetmantsd, Vgetmantsd, Xmm, Xmm, Xmm, Imm)           //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vgetmantsd, Vgetmantsd, Xmm, Xmm, Mem, Imm)           //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vgetmantss, Vgetmantss, Xmm, Xmm, Xmm, Imm)           //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vgetmantss, Vgetmantss, Xmm, Xmm, Mem, Imm)           //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vgf2p8affineinvqb, Vgf2p8affineinvqb,Vec,Vec,Vec,Imm) // AVX  AVX512_VL{kz} GFNI
+  ASMJIT_INST_4x(vgf2p8affineinvqb, Vgf2p8affineinvqb,Vec,Vec,Mem,Imm) // AVX  AVX512_VL{kz} GFNI
+  ASMJIT_INST_4x(vgf2p8affineqb, Vgf2p8affineqb, Vec, Vec, Vec, Imm)   // AVX  AVX512_VL{kz} GFNI
+  ASMJIT_INST_4x(vgf2p8affineqb, Vgf2p8affineqb, Vec, Vec, Mem, Imm)   // AVX  AVX512_VL{kz} GFNI
+  ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Vec, Vec, Vec)                // AVX  AVX512_VL{kz} GFNI
+  ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Vec, Vec, Mem)                // AVX  AVX512_VL{kz} GFNI
+  ASMJIT_INST_3x(vhaddpd, Vhaddpd, Vec, Vec, Vec)                      // AVX
+  ASMJIT_INST_3x(vhaddpd, Vhaddpd, Vec, Vec, Mem)                      // AVX
+  ASMJIT_INST_3x(vhaddps, Vhaddps, Vec, Vec, Vec)                      // AVX
+  ASMJIT_INST_3x(vhaddps, Vhaddps, Vec, Vec, Mem)                      // AVX
+  ASMJIT_INST_3x(vhsubpd, Vhsubpd, Vec, Vec, Vec)                      // AVX
+  ASMJIT_INST_3x(vhsubpd, Vhsubpd, Vec, Vec, Mem)                      // AVX
+  ASMJIT_INST_3x(vhsubps, Vhsubps, Vec, Vec, Vec)                      // AVX
+  ASMJIT_INST_3x(vhsubps, Vhsubps, Vec, Vec, Mem)                      // AVX
+  ASMJIT_INST_4x(vinsertf128, Vinsertf128, Vec, Vec, Vec, Imm)         // AVX
+  ASMJIT_INST_4x(vinsertf128, Vinsertf128, Vec, Vec, Mem, Imm)         // AVX
+  ASMJIT_INST_4x(vinsertf32x4, Vinsertf32x4, Vec, Vec, Vec, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinsertf32x4, Vinsertf32x4, Vec, Vec, Mem, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinsertf32x8, Vinsertf32x8, Vec, Vec, Vec, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinsertf32x8, Vinsertf32x8, Vec, Vec, Mem, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinsertf64x2, Vinsertf64x2, Vec, Vec, Vec, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinsertf64x2, Vinsertf64x2, Vec, Vec, Mem, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinsertf64x4, Vinsertf64x4, Vec, Vec, Vec, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinsertf64x4, Vinsertf64x4, Vec, Vec, Mem, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinserti128, Vinserti128, Vec, Vec, Vec, Imm)         // AVX2
+  ASMJIT_INST_4x(vinserti128, Vinserti128, Vec, Vec, Mem, Imm)         // AVX2
+  ASMJIT_INST_4x(vinserti32x4, Vinserti32x4, Vec, Vec, Vec, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinserti32x4, Vinserti32x4, Vec, Vec, Mem, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinserti32x8, Vinserti32x8, Vec, Vec, Vec, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinserti32x8, Vinserti32x8, Vec, Vec, Mem, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinserti64x2, Vinserti64x2, Vec, Vec, Vec, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinserti64x2, Vinserti64x2, Vec, Vec, Mem, Imm)       //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vinserti64x4, Vinserti64x4, Vec, Vec, Vec, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinserti64x4, Vinserti64x4, Vec, Vec, Mem, Imm)       //      AVX512_F{kz}
+  ASMJIT_INST_4x(vinsertps, Vinsertps, Xmm, Xmm, Xmm, Imm)             // AVX  AVX512_F
+  ASMJIT_INST_4x(vinsertps, Vinsertps, Xmm, Xmm, Mem, Imm)             // AVX  AVX512_F
+  ASMJIT_INST_2x(vlddqu, Vlddqu, Vec, Mem)                             // AVX
+  ASMJIT_INST_1x(vldmxcsr, Vldmxcsr, Mem)                              // AVX
+  ASMJIT_INST_3x(vmaskmovdqu, Vmaskmovdqu, Vec, Vec, DS_ZDI)           // AVX  [EXPLICIT]
+  ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Mem, Vec, Vec)                // AVX
+  ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Vec, Vec, Mem)                // AVX
+  ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Mem, Vec, Vec)                // AVX
+  ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Vec, Vec, Mem)                // AVX
+  ASMJIT_INST_3x(vmaxpd, Vmaxpd, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vmaxpd, Vmaxpd, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vmaxps, Vmaxps, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vmaxps, Vmaxps, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vmaxsd, Vmaxsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vmaxsd, Vmaxsd, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vmaxss, Vmaxss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vmaxss, Vmaxss, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vminpd, Vminpd, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vminpd, Vminpd, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vminps, Vminps, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vminps, Vminps, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vminsd, Vminsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vminsd, Vminsd, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vminss, Vminss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vminss, Vminss, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|sae}
+  ASMJIT_INST_2x(vmovapd, Vmovapd, Vec, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovapd, Vmovapd, Vec, Mem)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovapd, Vmovapd, Mem, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovaps, Vmovaps, Vec, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovaps, Vmovaps, Vec, Mem)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovaps, Vmovaps, Mem, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovd, Vmovd, Gp, Xmm)                                // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovd, Vmovd, Mem, Xmm)                               // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovd, Vmovd, Xmm, Gp)                                // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovd, Vmovd, Xmm, Mem)                               // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovddup, Vmovddup, Vec, Vec)                         // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovddup, Vmovddup, Vec, Mem)                         // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqa, Vmovdqa, Vec, Vec)                           // AVX
+  ASMJIT_INST_2x(vmovdqa, Vmovdqa, Vec, Mem)                           // AVX
+  ASMJIT_INST_2x(vmovdqa, Vmovdqa, Mem, Vec)                           // AVX
+  ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu, Vmovdqu, Vec, Vec)                           // AVX
+  ASMJIT_INST_2x(vmovdqu, Vmovdqu, Vec, Mem)                           // AVX
+  ASMJIT_INST_2x(vmovdqu, Vmovdqu, Mem, Vec)                           // AVX
+  ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Vec, Vec)                       //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Vec, Mem)                       //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Mem, Vec)                       //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Vec, Vec)                         //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Vec, Mem)                         //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Mem, Vec)                         //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vmovhlps, Vmovhlps, Xmm, Xmm, Xmm)                    // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovhpd, Vmovhpd, Mem, Xmm)                           // AVX  AVX512_F
+  ASMJIT_INST_3x(vmovhpd, Vmovhpd, Xmm, Xmm, Mem)                      // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovhps, Vmovhps, Mem, Xmm)                           // AVX  AVX512_F
+  ASMJIT_INST_3x(vmovhps, Vmovhps, Xmm, Xmm, Mem)                      // AVX  AVX512_F
+  ASMJIT_INST_3x(vmovlhps, Vmovlhps, Xmm, Xmm, Xmm)                    // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovlpd, Vmovlpd, Mem, Xmm)                           // AVX  AVX512_F
+  ASMJIT_INST_3x(vmovlpd, Vmovlpd, Xmm, Xmm, Mem)                      // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovlps, Vmovlps, Mem, Xmm)                           // AVX  AVX512_F
+  ASMJIT_INST_3x(vmovlps, Vmovlps, Xmm, Xmm, Mem)                      // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovmskpd, Vmovmskpd, Gp, Vec)                        // AVX
+  ASMJIT_INST_2x(vmovmskps, Vmovmskps, Gp, Vec)                        // AVX
+  ASMJIT_INST_2x(vmovntdq, Vmovntdq, Mem, Vec)                         // AVX+ AVX512_F
+  ASMJIT_INST_2x(vmovntdqa, Vmovntdqa, Vec, Mem)                       // AVX+ AVX512_F
+  ASMJIT_INST_2x(vmovntpd, Vmovntpd, Mem, Vec)                         // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovntps, Vmovntps, Mem, Vec)                         // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovq, Vmovq, Gp, Xmm)                                // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovq, Vmovq, Mem, Xmm)                               // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovq, Vmovq, Xmm, Mem)                               // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovq, Vmovq, Xmm, Gp)                                // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovq, Vmovq, Xmm, Xmm)                               // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovsd, Vmovsd, Mem, Xmm)                             // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovsd, Vmovsd, Xmm, Mem)                             // AVX  AVX512_F{kz}
+  ASMJIT_INST_3x(vmovsd, Vmovsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovshdup, Vmovshdup, Vec, Vec)                       // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovshdup, Vmovshdup, Vec, Mem)                       // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovsldup, Vmovsldup, Vec, Vec)                       // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovsldup, Vmovsldup, Vec, Mem)                       // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovss, Vmovss, Mem, Xmm)                             // AVX  AVX512_F
+  ASMJIT_INST_2x(vmovss, Vmovss, Xmm, Mem)                             // AVX  AVX512_F{kz}
+  ASMJIT_INST_3x(vmovss, Vmovss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovupd, Vmovupd, Vec, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovupd, Vmovupd, Vec, Mem)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovupd, Vmovupd, Mem, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovups, Vmovups, Vec, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovups, Vmovups, Vec, Mem)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_2x(vmovups, Vmovups, Mem, Vec)                           // AVX  AVX512_F{kz}
+  ASMJIT_INST_4x(vmpsadbw, Vmpsadbw, Vec, Vec, Vec, Imm)               // AVX+
+  ASMJIT_INST_4x(vmpsadbw, Vmpsadbw, Vec, Vec, Mem, Imm)               // AVX+
+  ASMJIT_INST_3x(vmulpd, Vmulpd, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vmulpd, Vmulpd, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vmulps, Vmulps, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vmulps, Vmulps, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vmulsd, Vmulsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vmulsd, Vmulsd, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vmulss, Vmulss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vmulss, Vmulss, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vorpd, Vorpd, Vec, Vec, Vec)                          // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vorpd, Vorpd, Vec, Vec, Mem)                          // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vorps, Vorps, Vec, Vec, Vec)                          // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vorps, Vorps, Vec, Vec, Mem)                          // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vp2intersectd, Vp2intersectd, KReg, KReg, Vec, Vec)   //      AVX512_VP2INTERSECT{kz}
+  ASMJIT_INST_4x(vp2intersectd, Vp2intersectd, KReg, KReg, Vec, Mem)   //      AVX512_VP2INTERSECT{kz}
+  ASMJIT_INST_4x(vp2intersectq, Vp2intersectq, KReg, KReg, Vec, Vec)   //      AVX512_VP2INTERSECT{kz}
+  ASMJIT_INST_4x(vp2intersectq, Vp2intersectq, KReg, KReg, Vec, Mem)   //      AVX512_VP2INTERSECT{kz}
+  ASMJIT_INST_6x(vp4dpwssd, Vp4dpwssd, Zmm, Zmm, Zmm, Zmm, Zmm, Mem)   //      AVX512_4FMAPS{kz}
+  ASMJIT_INST_6x(vp4dpwssds, Vp4dpwssds, Zmm, Zmm, Zmm, Zmm, Zmm, Mem) //      AVX512_4FMAPS{kz}
+  ASMJIT_INST_2x(vpabsb, Vpabsb, Vec, Vec)                             // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpabsb, Vpabsb, Vec, Mem)                             // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpabsd, Vpabsd, Vec, Vec)                             // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpabsd, Vpabsd, Vec, Mem)                             // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpabsq, Vpabsq, Vec, Vec)                             //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpabsq, Vpabsq, Vec, Mem)                             //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpabsw, Vpabsw, Vec, Vec)                             // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpabsw, Vpabsw, Vec, Mem)                             // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpackssdw, Vpackssdw, Vec, Vec, Vec)                  // AVX+ AVX512_BW{kz|b32}
+  ASMJIT_INST_3x(vpackssdw, Vpackssdw, Vec, Vec, Mem)                  // AVX+ AVX512_BW{kz|b32}
+  ASMJIT_INST_3x(vpacksswb, Vpacksswb, Vec, Vec, Vec)                  // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpacksswb, Vpacksswb, Vec, Vec, Mem)                  // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpackusdw, Vpackusdw, Vec, Vec, Vec)                  // AVX+ AVX512_BW{kz|b32}
+  ASMJIT_INST_3x(vpackusdw, Vpackusdw, Vec, Vec, Mem)                  // AVX+ AVX512_BW{kz|b32}
+  ASMJIT_INST_3x(vpackuswb, Vpackuswb, Vec, Vec, Vec)                  // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpackuswb, Vpackuswb, Vec, Vec, Mem)                  // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddb, Vpaddb, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddb, Vpaddb, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddd, Vpaddd, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpaddd, Vpaddd, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpaddq, Vpaddq, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpaddq, Vpaddq, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpaddsb, Vpaddsb, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddsb, Vpaddsb, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddsw, Vpaddsw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddsw, Vpaddsw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddusb, Vpaddusb, Vec, Vec, Vec)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddusb, Vpaddusb, Vec, Vec, Mem)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddusw, Vpaddusw, Vec, Vec, Vec)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddusw, Vpaddusw, Vec, Vec, Mem)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddw, Vpaddw, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpaddw, Vpaddw, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_4x(vpalignr, Vpalignr, Vec, Vec, Vec, Imm)               // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_4x(vpalignr, Vpalignr, Vec, Vec, Mem, Imm)               // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpand, Vpand, Vec, Vec, Vec)                          // AVX+
+  ASMJIT_INST_3x(vpand, Vpand, Vec, Vec, Mem)                          // AVX+
+  ASMJIT_INST_3x(vpandd, Vpandd, Vec, Vec, Vec)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpandd, Vpandd, Vec, Vec, Mem)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpandn, Vpandn, Vec, Vec, Vec)                        // AV+
+  ASMJIT_INST_3x(vpandn, Vpandn, Vec, Vec, Mem)                        // AVX+
+  ASMJIT_INST_3x(vpandnd, Vpandnd, Vec, Vec, Vec)                      //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpandnd, Vpandnd, Vec, Vec, Mem)                      //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpandnq, Vpandnq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpandnq, Vpandnq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpandq, Vpandq, Vec, Vec, Vec)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpandq, Vpandq, Vec, Vec, Mem)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpavgb, Vpavgb, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpavgb, Vpavgb, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpavgw, Vpavgw, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpavgw, Vpavgw, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_4x(vpblendd, Vpblendd, Vec, Vec, Vec, Imm)               // AVX2
+  ASMJIT_INST_4x(vpblendd, Vpblendd, Vec, Vec, Mem, Imm)               // AVX2
+  ASMJIT_INST_3x(vpblendmb, Vpblendmb, Vec, Vec, Vec)                  //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpblendmb, Vpblendmb, Vec, Vec, Mem)                  //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpblendmd, Vpblendmd, Vec, Vec, Vec)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpblendmd, Vpblendmd, Vec, Vec, Mem)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpblendmq, Vpblendmq, Vec, Vec, Vec)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpblendmq, Vpblendmq, Vec, Vec, Mem)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpblendmw, Vpblendmw, Vec, Vec, Vec)                  //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpblendmw, Vpblendmw, Vec, Vec, Mem)                  //      AVX512_BW{kz}
+  ASMJIT_INST_4x(vpblendvb, Vpblendvb, Vec, Vec, Vec, Vec)             // AVX+
+  ASMJIT_INST_4x(vpblendvb, Vpblendvb, Vec, Vec, Mem, Vec)             // AVX+
+  ASMJIT_INST_4x(vpblendw, Vpblendw, Vec, Vec, Vec, Imm)               // AVX+
+  ASMJIT_INST_4x(vpblendw, Vpblendw, Vec, Vec, Mem, Imm)               // AVX+
+  ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Vec, Vec)                 // AVX2 AVX512_BW{kz}
+  ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Vec, Mem)                 // AVX2 AVX512_BW{kz}
+  ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Vec, Gp)                  //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Vec, Vec)                 // AVX2 AVX512_F{kz}
+  ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Vec, Mem)                 // AVX2 AVX512_F{kz}
+  ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Vec, Gp)                  //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpbroadcastmb2q, Vpbroadcastmb2q, Vec, KReg)          //      AVX512_CD
+  ASMJIT_INST_2x(vpbroadcastmw2d, Vpbroadcastmw2d, Vec, KReg)          //      AVX512_CD
+  ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Vec, Vec)                 // AVX2 AVX512_F{kz}
+  ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Vec, Mem)                 // AVX2 AVX512_F{kz}
+  ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Vec, Gp)                  //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Vec, Vec)                 // AVX2 AVX512_BW{kz}
+  ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Vec, Mem)                 // AVX2 AVX512_BW{kz}
+  ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Vec, Gp)                  //      AVX512_BW{kz}
+  ASMJIT_INST_4x(vpclmulqdq, Vpclmulqdq, Vec, Vec, Vec, Imm)           // AVX  VPCLMULQDQ AVX512_F
+  ASMJIT_INST_4x(vpclmulqdq, Vpclmulqdq, Vec, Vec, Mem, Imm)           // AVX  VPCLMULQDQ AVX512_F
+  ASMJIT_INST_4x(vpcmpb, Vpcmpb, KReg, Vec, Vec, Imm)                  //      AVX512_BW{k}
+  ASMJIT_INST_4x(vpcmpb, Vpcmpb, KReg, Vec, Mem, Imm)                  //      AVX512_BW{k}
+  ASMJIT_INST_4x(vpcmpd, Vpcmpd, KReg, Vec, Vec, Imm)                  //      AVX512_F{k|b32}
+  ASMJIT_INST_4x(vpcmpd, Vpcmpd, KReg, Vec, Mem, Imm)                  //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Vec, Vec)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Vec, Mem)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Vec, Vec)                   //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Vec, Mem)                   //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Vec, Vec)                   //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Vec, Mem)                   //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Vec, Vec)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Vec, Mem)                   //      AVX512_BW{k}
+  ASMJIT_INST_6x(vpcmpestri, Vpcmpestri, Vec, Vec, Imm, Gp_ECX, Gp_EAX, Gp_EDX) // AVX  [EXPLICIT]
+  ASMJIT_INST_6x(vpcmpestri, Vpcmpestri, Vec, Mem, Imm, Gp_ECX, Gp_EAX, Gp_EDX) // AVX  [EXPLICIT]
+  ASMJIT_INST_6x(vpcmpestrm, Vpcmpestrm, Vec, Vec, Imm, XMM0, Gp_EAX, Gp_EDX)   // AVX  [EXPLICIT]
+  ASMJIT_INST_6x(vpcmpestrm, Vpcmpestrm, Vec, Mem, Imm, XMM0, Gp_EAX, Gp_EDX)   // AVX  [EXPLICIT]
+  ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Vec, Vec)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Vec, Mem)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Vec, Vec)                   //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Vec, Mem)                   //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Vec, Vec)                   //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Vec, Mem)                   //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Vec, Vec)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Vec, Mem)                   //      AVX512_BW{k}
+  ASMJIT_INST_4x(vpcmpistri, Vpcmpistri, Vec, Vec, Imm, Gp_ECX)        // AVX  [EXPLICIT]
+  ASMJIT_INST_4x(vpcmpistri, Vpcmpistri, Vec, Mem, Imm, Gp_ECX)        // AVX  [EXPLICIT]
+  ASMJIT_INST_4x(vpcmpistrm, Vpcmpistrm, Vec, Vec, Imm, XMM0)          // AVX  [EXPLICIT]
+  ASMJIT_INST_4x(vpcmpistrm, Vpcmpistrm, Vec, Mem, Imm, XMM0)          // AVX  [EXPLICIT]
+  ASMJIT_INST_4x(vpcmpq, Vpcmpq, KReg, Vec, Vec, Imm)                  //      AVX512_F{k|b64}
+  ASMJIT_INST_4x(vpcmpq, Vpcmpq, KReg, Vec, Mem, Imm)                  //      AVX512_F{k|b64}
+  ASMJIT_INST_4x(vpcmpub, Vpcmpub, KReg, Vec, Vec, Imm)                //      AVX512_BW{k}
+  ASMJIT_INST_4x(vpcmpub, Vpcmpub, KReg, Vec, Mem, Imm)                //      AVX512_BW{k}
+  ASMJIT_INST_4x(vpcmpud, Vpcmpud, KReg, Vec, Vec, Imm)                //      AVX512_F{k|b32}
+  ASMJIT_INST_4x(vpcmpud, Vpcmpud, KReg, Vec, Mem, Imm)                //      AVX512_F{k|b32}
+  ASMJIT_INST_4x(vpcmpuq, Vpcmpuq, KReg, Vec, Vec, Imm)                //      AVX512_F{k|b64}
+  ASMJIT_INST_4x(vpcmpuq, Vpcmpuq, KReg, Vec, Mem, Imm)                //      AVX512_F{k|b64}
+  ASMJIT_INST_4x(vpcmpuw, Vpcmpuw, KReg, Vec, Vec, Imm)                //      AVX512_BW{k|b64}
+  ASMJIT_INST_4x(vpcmpuw, Vpcmpuw, KReg, Vec, Mem, Imm)                //      AVX512_BW{k|b64}
+  ASMJIT_INST_4x(vpcmpw, Vpcmpw, KReg, Vec, Vec, Imm)                  //      AVX512_BW{k|b64}
+  ASMJIT_INST_4x(vpcmpw, Vpcmpw, KReg, Vec, Mem, Imm)                  //      AVX512_BW{k|b64}
+  ASMJIT_INST_2x(vpcompressb, Vpcompressb, Vec, Vec)                   //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpcompressb, Vpcompressb, Mem, Vec)                   //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpcompressd, Vpcompressd, Vec, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpcompressd, Vpcompressd, Mem, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpcompressq, Vpcompressq, Vec, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpcompressq, Vpcompressq, Mem, Vec)                   //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpcompressw, Vpcompressw, Vec, Vec)                   //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpcompressw, Vpcompressw, Mem, Vec)                   //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpconflictd, Vpconflictd, Vec, Vec)                   //      AVX512_CD{kz|b32}
+  ASMJIT_INST_2x(vpconflictd, Vpconflictd, Vec, Mem)                   //      AVX512_CD{kz|b32}
+  ASMJIT_INST_2x(vpconflictq, Vpconflictq, Vec, Vec)                   //      AVX512_CD{kz|b32}
+  ASMJIT_INST_2x(vpconflictq, Vpconflictq, Vec, Mem)                   //      AVX512_CD{kz|b32}
+  ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Vec, Vec, Vec)                    // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Vec, Vec, Mem)                    // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Vec, Vec, Vec)                  // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Vec, Vec, Mem)                  // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Vec, Vec, Vec)                    // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Vec, Vec, Mem)                    // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Vec, Vec, Vec)                  // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Vec, Vec, Mem)                  // AVX_VNNI AVX512_VNNI{kz|b32}
+  ASMJIT_INST_4x(vperm2f128, Vperm2f128, Vec, Vec, Vec, Imm)           // AVX
+  ASMJIT_INST_4x(vperm2f128, Vperm2f128, Vec, Vec, Mem, Imm)           // AVX
+  ASMJIT_INST_4x(vperm2i128, Vperm2i128, Vec, Vec, Vec, Imm)           // AVX2
+  ASMJIT_INST_4x(vperm2i128, Vperm2i128, Vec, Vec, Mem, Imm)           // AVX2
+  ASMJIT_INST_3x(vpermb, Vpermb, Vec, Vec, Vec)                        //      AVX512_VBMI{kz}
+  ASMJIT_INST_3x(vpermb, Vpermb, Vec, Vec, Mem)                        //      AVX512_VBMI{kz}
+  ASMJIT_INST_3x(vpermd, Vpermd, Vec, Vec, Vec)                        // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermd, Vpermd, Vec, Vec, Mem)                        // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermi2b, Vpermi2b, Vec, Vec, Vec)                    //      AVX512_VBMI{kz}
+  ASMJIT_INST_3x(vpermi2b, Vpermi2b, Vec, Vec, Mem)                    //      AVX512_VBMI{kz}
+  ASMJIT_INST_3x(vpermi2d, Vpermi2d, Vec, Vec, Vec)                    //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermi2d, Vpermi2d, Vec, Vec, Mem)                    //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Vec, Vec, Vec)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Vec, Vec, Mem)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Vec, Vec, Vec)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Vec, Vec, Mem)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermi2q, Vpermi2q, Vec, Vec, Vec)                    //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermi2q, Vpermi2q, Vec, Vec, Mem)                    //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermi2w, Vpermi2w, Vec, Vec, Vec)                    //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpermi2w, Vpermi2w, Vec, Vec, Mem)                    //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpermilpd, Vpermilpd, Vec, Vec, Vec)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilpd, Vpermilpd, Vec, Vec, Mem)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilpd, Vpermilpd, Vec, Vec, Imm)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilpd, Vpermilpd, Vec, Mem, Imm)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilps, Vpermilps, Vec, Vec, Vec)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilps, Vpermilps, Vec, Vec, Mem)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilps, Vpermilps, Vec, Vec, Imm)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermilps, Vpermilps, Vec, Mem, Imm)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermpd, Vpermpd, Vec, Vec, Imm)                      // AVX2
+  ASMJIT_INST_3x(vpermpd, Vpermpd, Vec, Mem, Imm)                      // AVX2
+  ASMJIT_INST_3x(vpermpd, Vpermpd, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermpd, Vpermpd, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermps, Vpermps, Vec, Vec, Vec)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermps, Vpermps, Vec, Vec, Mem)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermq, Vpermq, Vec, Vec, Imm)                        // AVX2 AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermq, Vpermq, Vec, Mem, Imm)                        // AVX2 AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermq, Vpermq, Vec, Vec, Vec)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermq, Vpermq, Vec, Vec, Mem)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermt2b, Vpermt2b, Vec, Vec, Vec)                    //      AVX512_VBMI{kz}
+  ASMJIT_INST_3x(vpermt2b, Vpermt2b, Vec, Vec, Mem)                    //      AVX512_VBMI{kz}
+  ASMJIT_INST_3x(vpermt2d, Vpermt2d, Vec, Vec, Vec)                    //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermt2d, Vpermt2d, Vec, Vec, Mem)                    //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Vec, Vec, Vec)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Vec, Vec, Mem)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Vec, Vec, Vec)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Vec, Vec, Mem)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpermt2q, Vpermt2q, Vec, Vec, Vec)                    //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermt2q, Vpermt2q, Vec, Vec, Mem)                    //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpermt2w, Vpermt2w, Vec, Vec, Vec)                    //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpermt2w, Vpermt2w, Vec, Vec, Mem)                    //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpermw, Vpermw, Vec, Vec, Vec)                        //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpermw, Vpermw, Vec, Vec, Mem)                        //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpexpandb, Vpexpandb, Vec, Vec)                       //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpexpandb, Vpexpandb, Vec, Mem)                       //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpexpandd, Vpexpandd, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpexpandd, Vpexpandd, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpexpandq, Vpexpandq, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpexpandq, Vpexpandq, Vec, Mem)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpexpandw, Vpexpandw, Vec, Vec)                       //      AVX512_VBMI2{kz}
+  ASMJIT_INST_2x(vpexpandw, Vpexpandw, Vec, Mem)                       //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpextrb, Vpextrb, Gp, Xmm, Imm)                       // AVX  AVX512_BW
+  ASMJIT_INST_3x(vpextrb, Vpextrb, Mem, Xmm, Imm)                      // AVX  AVX512_BW
+  ASMJIT_INST_3x(vpextrd, Vpextrd, Gp, Xmm, Imm)                       // AVX  AVX512_DQ
+  ASMJIT_INST_3x(vpextrd, Vpextrd, Mem, Xmm, Imm)                      // AVX  AVX512_DQ
+  ASMJIT_INST_3x(vpextrq, Vpextrq, Gp, Xmm, Imm)                       // AVX  AVX512_DQ
+  ASMJIT_INST_3x(vpextrq, Vpextrq, Mem, Xmm, Imm)                      // AVX  AVX512_DQ
+  ASMJIT_INST_3x(vpextrw, Vpextrw, Gp, Xmm, Imm)                       // AVX  AVX512_BW
+  ASMJIT_INST_3x(vpextrw, Vpextrw, Mem, Xmm, Imm)                      // AVX  AVX512_BW
+  ASMJIT_INST_2x(vpgatherdd, Vpgatherdd, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vpgatherdd, Vpgatherdd, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_2x(vpgatherdq, Vpgatherdq, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vpgatherdq, Vpgatherdq, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_2x(vpgatherqd, Vpgatherqd, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vpgatherqd, Vpgatherqd, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_2x(vpgatherqq, Vpgatherqq, Vec, Mem)                     //      AVX512_F{k}
+  ASMJIT_INST_3x(vpgatherqq, Vpgatherqq, Vec, Mem, Vec)                // AVX2
+  ASMJIT_INST_3x(vphaddd, Vphaddd, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vphaddd, Vphaddd, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_3x(vphaddsw, Vphaddsw, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vphaddsw, Vphaddsw, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vphaddw, Vphaddw, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vphaddw, Vphaddw, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_2x(vphminposuw, Vphminposuw, Vec, Vec)                   // AVX
+  ASMJIT_INST_2x(vphminposuw, Vphminposuw, Vec, Mem)                   // AVX
+  ASMJIT_INST_3x(vphsubd, Vphsubd, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vphsubd, Vphsubd, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_3x(vphsubsw, Vphsubsw, Vec, Vec, Vec)                    // AVX+
+  ASMJIT_INST_3x(vphsubsw, Vphsubsw, Vec, Vec, Mem)                    // AVX+
+  ASMJIT_INST_3x(vphsubw, Vphsubw, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vphsubw, Vphsubw, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_4x(vpinsrb, Vpinsrb, Xmm, Xmm, Gp, Imm)                  // AVX  AVX512_BW{kz}
+  ASMJIT_INST_4x(vpinsrb, Vpinsrb, Xmm, Xmm, Mem, Imm)                 // AVX  AVX512_BW{kz}
+  ASMJIT_INST_4x(vpinsrd, Vpinsrd, Xmm, Xmm, Gp, Imm)                  // AVX  AVX512_DQ{kz}
+  ASMJIT_INST_4x(vpinsrd, Vpinsrd, Xmm, Xmm, Mem, Imm)                 // AVX  AVX512_DQ{kz}
+  ASMJIT_INST_4x(vpinsrq, Vpinsrq, Xmm, Xmm, Gp, Imm)                  // AVX  AVX512_DQ{kz}
+  ASMJIT_INST_4x(vpinsrq, Vpinsrq, Xmm, Xmm, Mem, Imm)                 // AVX  AVX512_DQ{kz}
+  ASMJIT_INST_4x(vpinsrw, Vpinsrw, Xmm, Xmm, Gp, Imm)                  // AVX  AVX512_BW{kz}
+  ASMJIT_INST_4x(vpinsrw, Vpinsrw, Xmm, Xmm, Mem, Imm)                 // AVX  AVX512_BW{kz}
+  ASMJIT_INST_2x(vplzcntd, Vplzcntd, Vec, Vec)                         //      AVX512_CD{kz|b32}
+  ASMJIT_INST_2x(vplzcntd, Vplzcntd, Vec, Mem)                         //      AVX512_CD{kz|b32}
+  ASMJIT_INST_2x(vplzcntq, Vplzcntq, Vec, Vec)                         //      AVX512_CD{kz|b64}
+  ASMJIT_INST_2x(vplzcntq, Vplzcntq, Vec, Mem)                         //      AVX512_CD{kz|b64}
+  ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Vec, Vec, Vec)              //      AVX512_IFMA{kz|b64}
+  ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Vec, Vec, Mem)              //      AVX512_IFMA{kz|b64}
+  ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Vec, Vec, Vec)              //      AVX512_IFMA{kz|b64}
+  ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Vec, Vec, Mem)              //      AVX512_IFMA{kz|b64}
+  ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Vec, Vec, Vec)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Vec, Vec, Mem)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Vec, Vec, Vec)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Vec, Vec, Mem)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Mem, Vec, Vec)                // AVX2
+  ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Vec, Vec, Mem)                // AVX2
+  ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Mem, Vec, Vec)                // AVX2
+  ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Vec, Vec, Mem)                // AVX2
+  ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Vec, Vec, Vec)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Vec, Vec, Mem)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxub, Vpmaxub, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxub, Vpmaxub, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxud, Vpmaxud, Vec, Vec, Vec)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpmaxud, Vpmaxud, Vec, Vec, Mem)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminsb, Vpminsb, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminsb, Vpminsb, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminsd, Vpminsd, Vec, Vec, Vec)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpminsd, Vpminsd, Vec, Vec, Mem)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpminsq, Vpminsq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpminsq, Vpminsq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpminsw, Vpminsw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminsw, Vpminsw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminub, Vpminub, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminub, Vpminub, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminud, Vpminud, Vec, Vec, Vec)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpminud, Vpminud, Vec, Vec, Mem)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpminuq, Vpminuq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpminuq, Vpminuq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpminuw, Vpminuw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpminuw, Vpminuw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovb2m, Vpmovb2m, KReg, Vec)                        //      AVX512_BW
+  ASMJIT_INST_2x(vpmovd2m, Vpmovd2m, KReg, Vec)                        //      AVX512_DQ
+  ASMJIT_INST_2x(vpmovdb, Vpmovdb, Vec, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovdb, Vpmovdb, Mem, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovdw, Vpmovdw, Vec, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovdw, Vpmovdw, Mem, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovm2b, Vpmovm2b, Vec, KReg)                        //      AVX512_BW
+  ASMJIT_INST_2x(vpmovm2d, Vpmovm2d, Vec, KReg)                        //      AVX512_DQ
+  ASMJIT_INST_2x(vpmovm2q, Vpmovm2q, Vec, KReg)                        //      AVX512_DQ
+  ASMJIT_INST_2x(vpmovm2w, Vpmovm2w, Vec, KReg)                        //      AVX512_BW
+  ASMJIT_INST_2x(vpmovmskb, Vpmovmskb, Gp, Vec)                        // AVX+
+  ASMJIT_INST_2x(vpmovq2m, Vpmovq2m, KReg, Vec)                        //      AVX512_DQ
+  ASMJIT_INST_2x(vpmovqb, Vpmovqb, Vec, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovqb, Vpmovqb, Mem, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovqd, Vpmovqd, Vec, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovqd, Vpmovqd, Mem, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovqw, Vpmovqw, Vec, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovqw, Vpmovqw, Mem, Vec)                           //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Vec, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Mem, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Vec, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Mem, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Vec, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Mem, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Vec, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Mem, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Vec, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Mem, Vec)                         //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovswb, Vpmovswb, Vec, Vec)                         //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovswb, Vpmovswb, Mem, Vec)                         //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Vec, Vec)                       // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Vec, Mem)                       // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Vec, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Mem, Vec)                       //      AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Vec, Vec)                       //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Mem, Vec)                       //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovw2m, Vpmovw2m, KReg, Vec)                        //      AVX512_BW
+  ASMJIT_INST_2x(vpmovwb, Vpmovwb, Vec, Vec)                           //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovwb, Vpmovwb, Mem, Vec)                           //      AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Vec, Vec)                       // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Vec, Mem)                       // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Vec, Vec)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Vec, Mem)                       // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpmuldq, Vpmuldq, Vec, Vec, Vec)                      // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmuldq, Vpmuldq, Vec, Vec, Mem)                      // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Vec, Vec, Vec)                  // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Vec, Vec, Mem)                  // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Vec, Vec, Vec)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Vec, Vec, Mem)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmulhw, Vpmulhw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmulhw, Vpmulhw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmulld, Vpmulld, Vec, Vec, Vec)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpmulld, Vpmulld, Vec, Vec, Mem)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpmullq, Vpmullq, Vec, Vec, Vec)                      //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vpmullq, Vpmullq, Vec, Vec, Mem)                      //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vpmullw, Vpmullw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmullw, Vpmullw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Vec, Vec, Vec)        //      AVX512_VBMI{kz|b64}
+  ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Vec, Vec, Mem)        //      AVX512_VBMI{kz|b64}
+  ASMJIT_INST_3x(vpmuludq, Vpmuludq, Vec, Vec, Vec)                    // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpmuludq, Vpmuludq, Vec, Vec, Mem)                    // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vpopcntb, Vpopcntb, Vec, Vec)                         //      AVX512_BITALG{kz|b32}
+  ASMJIT_INST_2x(vpopcntb, Vpopcntb, Vec, Mem)                         //      AVX512_BITALG{kz|b32}
+  ASMJIT_INST_2x(vpopcntd, Vpopcntd, Vec, Vec)                         //      AVX512_VPOPCNTDQ{kz|b32}
+  ASMJIT_INST_2x(vpopcntd, Vpopcntd, Vec, Mem)                         //      AVX512_VPOPCNTDQ{kz|b32}
+  ASMJIT_INST_2x(vpopcntq, Vpopcntq, Vec, Vec)                         //      AVX512_VPOPCNTDQ{kz|b64}
+  ASMJIT_INST_2x(vpopcntq, Vpopcntq, Vec, Mem)                         //      AVX512_VPOPCNTDQ{kz|b64}
+  ASMJIT_INST_2x(vpopcntw, Vpopcntw, Vec, Vec)                         //      AVX512_BITALG{kz|b32}
+  ASMJIT_INST_2x(vpopcntw, Vpopcntw, Vec, Mem)                         //      AVX512_BITALG{kz|b32}
+  ASMJIT_INST_3x(vpor, Vpor, Vec, Vec, Vec)                            // AV+
+  ASMJIT_INST_3x(vpor, Vpor, Vec, Vec, Mem)                            // AVX+
+  ASMJIT_INST_3x(vpord, Vpord, Vec, Vec, Vec)                          //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpord, Vpord, Vec, Vec, Mem)                          //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vporq, Vporq, Vec, Vec, Vec)                          //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vporq, Vporq, Vec, Vec, Mem)                          //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprold, Vprold, Vec, Vec, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprold, Vprold, Vec, Mem, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprolq, Vprolq, Vec, Vec, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprolq, Vprolq, Vec, Mem, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprolvd, Vprolvd, Vec, Vec, Vec)                      //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprolvd, Vprolvd, Vec, Vec, Mem)                      //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprolvq, Vprolvq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprolvq, Vprolvq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprord, Vprord, Vec, Vec, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprord, Vprord, Vec, Mem, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprorq, Vprorq, Vec, Vec, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprorq, Vprorq, Vec, Mem, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprorvd, Vprorvd, Vec, Vec, Vec)                      //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprorvd, Vprorvd, Vec, Vec, Mem)                      //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vprorvq, Vprorvq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vprorvq, Vprorvq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsadbw, Vpsadbw, Vec, Vec, Vec)                      // AVX+ AVX512_BW
+  ASMJIT_INST_3x(vpsadbw, Vpsadbw, Vec, Vec, Mem)                      // AVX+ AVX512_BW
+  ASMJIT_INST_2x(vpscatterdd, Vpscatterdd, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_2x(vpscatterdq, Vpscatterdq, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_2x(vpscatterqd, Vpscatterqd, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_2x(vpscatterqq, Vpscatterqq, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_4x(vpshldd, Vpshldd, Vec, Vec, Vec, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshldd, Vpshldd, Vec, Vec, Mem, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshldq, Vpshldq, Vec, Vec, Vec, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshldq, Vpshldq, Vec, Vec, Mem, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshldvd, Vpshldvd, Vec, Vec, Vec)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshldvd, Vpshldvd, Vec, Vec, Mem)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshldvq, Vpshldvq, Vec, Vec, Vec)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshldvq, Vpshldvq, Vec, Vec, Mem)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshldvw, Vpshldvw, Vec, Vec, Vec)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshldvw, Vpshldvw, Vec, Vec, Mem)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshldw, Vpshldw, Vec, Vec, Vec, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshldw, Vpshldw, Vec, Vec, Mem, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshrdd, Vpshrdd, Vec, Vec, Vec, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshrdd, Vpshrdd, Vec, Vec, Mem, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshrdq, Vpshrdq, Vec, Vec, Vec, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshrdq, Vpshrdq, Vec, Vec, Mem, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Vec, Vec, Vec)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Vec, Vec, Mem)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Vec, Vec, Vec)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Vec, Vec, Mem)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Vec, Vec, Vec)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Vec, Vec, Mem)                    //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshrdw, Vpshrdw, Vec, Vec, Vec, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_4x(vpshrdw, Vpshrdw, Vec, Vec, Mem, Imm)                 //      AVX512_VBMI2{kz}
+  ASMJIT_INST_3x(vpshufb, Vpshufb, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpshufb, Vpshufb, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Vec, Vec)           //      AVX512_BITALG{k}
+  ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Vec, Mem)           //      AVX512_BITALG{k}
+  ASMJIT_INST_3x(vpshufd, Vpshufd, Vec, Vec, Imm)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpshufd, Vpshufd, Vec, Mem, Imm)                      // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpshufhw, Vpshufhw, Vec, Vec, Imm)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpshufhw, Vpshufhw, Vec, Mem, Imm)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpshuflw, Vpshuflw, Vec, Vec, Imm)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpshuflw, Vpshuflw, Vec, Mem, Imm)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsignb, Vpsignb, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vpsignb, Vpsignb, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_3x(vpsignd, Vpsignd, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vpsignd, Vpsignd, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_3x(vpsignw, Vpsignw, Vec, Vec, Vec)                      // AVX+
+  ASMJIT_INST_3x(vpsignw, Vpsignw, Vec, Vec, Mem)                      // AVX+
+  ASMJIT_INST_3x(vpslld, Vpslld, Vec, Vec, Imm)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpslld, Vpslld, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpslld, Vpslld, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpslld, Vpslld, Vec, Mem, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpslldq, Vpslldq, Vec, Vec, Imm)                      // AVX+ AVX512_BW
+  ASMJIT_INST_3x(vpslldq, Vpslldq, Vec, Mem, Imm)                      //      AVX512_BW
+  ASMJIT_INST_3x(vpsllq, Vpsllq, Vec, Vec, Imm)                        // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsllq, Vpsllq, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpsllq, Vpsllq, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpsllq, Vpsllq, Vec, Mem, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsllvd, Vpsllvd, Vec, Vec, Vec)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsllvd, Vpsllvd, Vec, Vec, Mem)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsllvq, Vpsllvq, Vec, Vec, Vec)                      // AVX2 AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsllvq, Vpsllvq, Vec, Vec, Mem)                      // AVX2 AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsllvw, Vpsllvw, Vec, Vec, Vec)                      //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsllvw, Vpsllvw, Vec, Vec, Mem)                      //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsllw, Vpsllw, Vec, Vec, Imm)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsllw, Vpsllw, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsllw, Vpsllw, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsllw, Vpsllw, Vec, Mem, Imm)                        //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrad, Vpsrad, Vec, Vec, Imm)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsrad, Vpsrad, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpsrad, Vpsrad, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpsrad, Vpsrad, Vec, Mem, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsraq, Vpsraq, Vec, Vec, Vec)                        //      AVX512_F{kz}
+  ASMJIT_INST_3x(vpsraq, Vpsraq, Vec, Vec, Mem)                        //      AVX512_F{kz}
+  ASMJIT_INST_3x(vpsraq, Vpsraq, Vec, Vec, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsraq, Vpsraq, Vec, Mem, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsravd, Vpsravd, Vec, Vec, Vec)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsravd, Vpsravd, Vec, Vec, Mem)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsravq, Vpsravq, Vec, Vec, Vec)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsravq, Vpsravq, Vec, Vec, Mem)                      //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsravw, Vpsravw, Vec, Vec, Vec)                      //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsravw, Vpsravw, Vec, Vec, Mem)                      //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsraw, Vpsraw, Vec, Vec, Imm)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsraw, Vpsraw, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsraw, Vpsraw, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsraw, Vpsraw, Vec, Mem, Imm)                        //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrld, Vpsrld, Vec, Vec, Imm)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsrld, Vpsrld, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpsrld, Vpsrld, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz}
+  ASMJIT_INST_3x(vpsrld, Vpsrld, Vec, Mem, Imm)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsrldq, Vpsrldq, Vec, Vec, Imm)                      // AVX+ AVX512_BW
+  ASMJIT_INST_3x(vpsrldq, Vpsrldq, Vec, Mem, Imm)                      //      AVX512_BW
+  ASMJIT_INST_3x(vpsrlq, Vpsrlq, Vec, Vec, Imm)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsrlq, Vpsrlq, Vec, Vec, Vec)                        // AVX  AVX512_F{kz}
+  ASMJIT_INST_3x(vpsrlq, Vpsrlq, Vec, Vec, Mem)                        // AVX  AVX512_F{kz}
+  ASMJIT_INST_3x(vpsrlq, Vpsrlq, Vec, Mem, Imm)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Vec, Vec, Vec)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Vec, Vec, Mem)                      // AVX2 AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Vec, Vec, Vec)                      // AVX2 AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Vec, Vec, Mem)                      // AVX2 AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Vec, Vec, Vec)                      //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Vec, Vec, Mem)                      //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrlw, Vpsrlw, Vec, Vec, Imm)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrlw, Vpsrlw, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrlw, Vpsrlw, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsrlw, Vpsrlw, Vec, Mem, Imm)                        //      AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubb, Vpsubb, Vec, Vec, Vec)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubb, Vpsubb, Vec, Vec, Mem)                        // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubd, Vpsubd, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsubd, Vpsubd, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpsubq, Vpsubq, Vec, Vec, Vec)                        // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsubq, Vpsubq, Vec, Vec, Mem)                        // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpsubsb, Vpsubsb, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubsb, Vpsubsb, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubsw, Vpsubsw, Vec, Vec, Vec)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubsw, Vpsubsw, Vec, Vec, Mem)                      // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubusb, Vpsubusb, Vec, Vec, Vec)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubusb, Vpsubusb, Vec, Vec, Mem)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubusw, Vpsubusw, Vec, Vec, Vec)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubusw, Vpsubusw, Vec, Vec, Mem)                    // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubw, Vpsubw, Vec, Vec, Vec)                        // AVX  AVX512_BW{kz}
+  ASMJIT_INST_3x(vpsubw, Vpsubw, Vec, Vec, Mem)                        // AVX  AVX512_BW{kz}
+  ASMJIT_INST_4x(vpternlogd, Vpternlogd, Vec, Vec, Vec, Imm)           //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vpternlogd, Vpternlogd, Vec, Vec, Mem, Imm)           //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vpternlogq, Vpternlogq, Vec, Vec, Vec, Imm)           //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vpternlogq, Vpternlogq, Vec, Vec, Mem, Imm)           //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vptest, Vptest, Vec, Vec)                             // AVX
+  ASMJIT_INST_2x(vptest, Vptest, Vec, Mem)                             // AVX
+  ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Vec, Vec)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Vec, Mem)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Vec, Vec)                   //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Vec, Mem)                   //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Vec, Vec)                   //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Vec, Mem)                   //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Vec, Vec)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Vec, Mem)                   //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Vec, Vec)                 //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Vec, Mem)                 //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Vec, Vec)                 //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Vec, Mem)                 //      AVX512_F{k|b32}
+  ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Vec, Vec)                 //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Vec, Mem)                 //      AVX512_F{k|b64}
+  ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Vec, Vec)                 //      AVX512_BW{k}
+  ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Vec, Mem)                 //      AVX512_BW{k}
+  ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Vec, Vec, Vec)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Vec, Vec, Mem)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Vec, Vec, Vec)                // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Vec, Vec, Mem)                // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Vec, Vec, Vec)              // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Vec, Vec, Mem)              // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Vec, Vec, Vec)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Vec, Vec, Mem)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Vec, Vec, Vec)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Vec, Vec, Mem)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Vec, Vec, Vec)                // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Vec, Vec, Mem)                // AVX+ AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Vec, Vec, Vec)              // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Vec, Vec, Mem)              // AVX+ AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Vec, Vec, Vec)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Vec, Vec, Mem)                // AVX+ AVX512_BW{kz}
+  ASMJIT_INST_3x(vpxor, Vpxor, Vec, Vec, Vec)                          // AVX+
+  ASMJIT_INST_3x(vpxor, Vpxor, Vec, Vec, Mem)                          // AVX+
+  ASMJIT_INST_3x(vpxord, Vpxord, Vec, Vec, Vec)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpxord, Vpxord, Vec, Vec, Mem)                        //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vpxorq, Vpxorq, Vec, Vec, Vec)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vpxorq, Vpxorq, Vec, Vec, Mem)                        //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vrangepd, Vrangepd, Vec, Vec, Vec, Imm)               //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_4x(vrangepd, Vrangepd, Vec, Vec, Mem, Imm)               //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_4x(vrangeps, Vrangeps, Vec, Vec, Vec, Imm)               //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_4x(vrangeps, Vrangeps, Vec, Vec, Mem, Imm)               //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_4x(vrangesd, Vrangesd, Xmm, Xmm, Xmm, Imm)               //      AVX512_DQ{kz|sae}
+  ASMJIT_INST_4x(vrangesd, Vrangesd, Xmm, Xmm, Mem, Imm)               //      AVX512_DQ{kz|sae}
+  ASMJIT_INST_4x(vrangess, Vrangess, Xmm, Xmm, Xmm, Imm)               //      AVX512_DQ{kz|sae}
+  ASMJIT_INST_4x(vrangess, Vrangess, Xmm, Xmm, Mem, Imm)               //      AVX512_DQ{kz|sae}
+  ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Vec, Vec)                         //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Vec, Mem)                         //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Vec, Vec)                         //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Vec, Mem)                         //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vrcp14sd, Vrcp14sd, Xmm, Xmm, Xmm)                    //      AVX512_F{kz}
+  ASMJIT_INST_3x(vrcp14sd, Vrcp14sd, Xmm, Xmm, Mem)                    //      AVX512_F{kz}
+  ASMJIT_INST_3x(vrcp14ss, Vrcp14ss, Xmm, Xmm, Xmm)                    //      AVX512_F{kz}
+  ASMJIT_INST_3x(vrcp14ss, Vrcp14ss, Xmm, Xmm, Mem)                    //      AVX512_F{kz}
+  ASMJIT_INST_2x(vrcp28pd, Vrcp28pd, Vec, Vec)                         //      AVX512_ER{kz|sae|b64}
+  ASMJIT_INST_2x(vrcp28pd, Vrcp28pd, Vec, Mem)                         //      AVX512_ER{kz|sae|b64}
+  ASMJIT_INST_2x(vrcp28ps, Vrcp28ps, Vec, Vec)                         //      AVX512_ER{kz|sae|b32}
+  ASMJIT_INST_2x(vrcp28ps, Vrcp28ps, Vec, Mem)                         //      AVX512_ER{kz|sae|b32}
+  ASMJIT_INST_3x(vrcp28sd, Vrcp28sd, Xmm, Xmm, Xmm)                    //      AVX512_ER{kz|sae}
+  ASMJIT_INST_3x(vrcp28sd, Vrcp28sd, Xmm, Xmm, Mem)                    //      AVX512_ER{kz|sae}
+  ASMJIT_INST_3x(vrcp28ss, Vrcp28ss, Xmm, Xmm, Xmm)                    //      AVX512_ER{kz|sae}
+  ASMJIT_INST_3x(vrcp28ss, Vrcp28ss, Xmm, Xmm, Mem)                    //      AVX512_ER{kz|sae}
+  ASMJIT_INST_2x(vrcpps, Vrcpps, Vec, Vec)                             // AVX
+  ASMJIT_INST_2x(vrcpps, Vrcpps, Vec, Mem)                             // AVX
+  ASMJIT_INST_3x(vrcpss, Vrcpss, Xmm, Xmm, Xmm)                        // AVX
+  ASMJIT_INST_3x(vrcpss, Vrcpss, Xmm, Xmm, Mem)                        // AVX
+  ASMJIT_INST_3x(vreducepd, Vreducepd, Vec, Vec, Imm)                  //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vreducepd, Vreducepd, Vec, Mem, Imm)                  //      AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vreduceps, Vreduceps, Vec, Vec, Imm)                  //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_3x(vreduceps, Vreduceps, Vec, Mem, Imm)                  //      AVX512_DQ{kz|b32}
+  ASMJIT_INST_4x(vreducesd, Vreducesd, Xmm, Xmm, Xmm, Imm)             //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vreducesd, Vreducesd, Xmm, Xmm, Mem, Imm)             //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vreducess, Vreducess, Xmm, Xmm, Xmm, Imm)             //      AVX512_DQ{kz}
+  ASMJIT_INST_4x(vreducess, Vreducess, Xmm, Xmm, Mem, Imm)             //      AVX512_DQ{kz}
+  ASMJIT_INST_3x(vrndscalepd, Vrndscalepd, Vec, Vec, Imm)              //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vrndscalepd, Vrndscalepd, Vec, Mem, Imm)              //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vrndscaleps, Vrndscaleps, Vec, Vec, Imm)              //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vrndscaleps, Vrndscaleps, Vec, Mem, Imm)              //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vrndscalesd, Vrndscalesd, Xmm, Xmm, Xmm, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vrndscalesd, Vrndscalesd, Xmm, Xmm, Mem, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vrndscaless, Vrndscaless, Xmm, Xmm, Xmm, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_4x(vrndscaless, Vrndscaless, Xmm, Xmm, Mem, Imm)         //      AVX512_F{kz|sae}
+  ASMJIT_INST_3x(vroundpd, Vroundpd, Vec, Vec, Imm)                    // AVX
+  ASMJIT_INST_3x(vroundpd, Vroundpd, Vec, Mem, Imm)                    // AVX
+  ASMJIT_INST_3x(vroundps, Vroundps, Vec, Vec, Imm)                    // AVX
+  ASMJIT_INST_3x(vroundps, Vroundps, Vec, Mem, Imm)                    // AVX
+  ASMJIT_INST_4x(vroundsd, Vroundsd, Xmm, Xmm, Xmm, Imm)               // AVX
+  ASMJIT_INST_4x(vroundsd, Vroundsd, Xmm, Xmm, Mem, Imm)               // AVX
+  ASMJIT_INST_4x(vroundss, Vroundss, Xmm, Xmm, Xmm, Imm)               // AVX
+  ASMJIT_INST_4x(vroundss, Vroundss, Xmm, Xmm, Mem, Imm)               // AVX
+  ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Vec, Vec)                     //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Vec, Mem)                     //      AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Vec, Vec)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Vec, Mem)                     //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vrsqrt14sd, Vrsqrt14sd, Xmm, Xmm, Xmm)                //      AVX512_F{kz}
+  ASMJIT_INST_3x(vrsqrt14sd, Vrsqrt14sd, Xmm, Xmm, Mem)                //      AVX512_F{kz}
+  ASMJIT_INST_3x(vrsqrt14ss, Vrsqrt14ss, Xmm, Xmm, Xmm)                //      AVX512_F{kz}
+  ASMJIT_INST_3x(vrsqrt14ss, Vrsqrt14ss, Xmm, Xmm, Mem)                //      AVX512_F{kz}
+  ASMJIT_INST_2x(vrsqrt28pd, Vrsqrt28pd, Vec, Vec)                     //      AVX512_ER{kz|sae|b64}
+  ASMJIT_INST_2x(vrsqrt28pd, Vrsqrt28pd, Vec, Mem)                     //      AVX512_ER{kz|sae|b64}
+  ASMJIT_INST_2x(vrsqrt28ps, Vrsqrt28ps, Vec, Vec)                     //      AVX512_ER{kz|sae|b32}
+  ASMJIT_INST_2x(vrsqrt28ps, Vrsqrt28ps, Vec, Mem)                     //      AVX512_ER{kz|sae|b32}
+  ASMJIT_INST_3x(vrsqrt28sd, Vrsqrt28sd, Xmm, Xmm, Xmm)                //      AVX512_ER{kz|sae}
+  ASMJIT_INST_3x(vrsqrt28sd, Vrsqrt28sd, Xmm, Xmm, Mem)                //      AVX512_ER{kz|sae}
+  ASMJIT_INST_3x(vrsqrt28ss, Vrsqrt28ss, Xmm, Xmm, Xmm)                //      AVX512_ER{kz|sae}
+  ASMJIT_INST_3x(vrsqrt28ss, Vrsqrt28ss, Xmm, Xmm, Mem)                //      AVX512_ER{kz|sae}
+  ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Vec, Vec)                         // AVX
+  ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Vec, Mem)                         // AVX
+  ASMJIT_INST_3x(vrsqrtss, Vrsqrtss, Xmm, Xmm, Xmm)                    // AVX
+  ASMJIT_INST_3x(vrsqrtss, Vrsqrtss, Xmm, Xmm, Mem)                    // AVX
+  ASMJIT_INST_3x(vscalefpd, Vscalefpd, Vec, Vec, Vec)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vscalefpd, Vscalefpd, Vec, Vec, Mem)                  //      AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vscalefps, Vscalefps, Vec, Vec, Vec)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vscalefps, Vscalefps, Vec, Vec, Mem)                  //      AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vscalefsd, Vscalefsd, Xmm, Xmm, Xmm)                  //      AVX512_F{kz|er}
+  ASMJIT_INST_3x(vscalefsd, Vscalefsd, Xmm, Xmm, Mem)                  //      AVX512_F{kz|er}
+  ASMJIT_INST_3x(vscalefss, Vscalefss, Xmm, Xmm, Xmm)                  //      AVX512_F{kz|er}
+  ASMJIT_INST_3x(vscalefss, Vscalefss, Xmm, Xmm, Mem)                  //      AVX512_F{kz|er}
+  ASMJIT_INST_2x(vscatterdpd, Vscatterdpd, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_2x(vscatterdps, Vscatterdps, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_1x(vscatterpf0dpd, Vscatterpf0dpd, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf0dps, Vscatterpf0dps, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf0qpd, Vscatterpf0qpd, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf0qps, Vscatterpf0qps, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf1dpd, Vscatterpf1dpd, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf1dps, Vscatterpf1dps, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf1qpd, Vscatterpf1qpd, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_1x(vscatterpf1qps, Vscatterpf1qps, Mem)                  //      AVX512_PF{k}
+  ASMJIT_INST_2x(vscatterqpd, Vscatterqpd, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_2x(vscatterqps, Vscatterqps, Mem, Vec)                   //      AVX512_F{k}
+  ASMJIT_INST_4x(vshuff32x4, Vshuff32x4, Vec, Vec, Vec, Imm)           //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vshuff32x4, Vshuff32x4, Vec, Vec, Mem, Imm)           //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vshuff64x2, Vshuff64x2, Vec, Vec, Vec, Imm)           //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vshuff64x2, Vshuff64x2, Vec, Vec, Mem, Imm)           //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vshufi32x4, Vshufi32x4, Vec, Vec, Vec, Imm)           //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vshufi32x4, Vshufi32x4, Vec, Vec, Mem, Imm)           //      AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vshufi64x2, Vshufi64x2, Vec, Vec, Vec, Imm)           //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vshufi64x2, Vshufi64x2, Vec, Vec, Mem, Imm)           //      AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vshufpd, Vshufpd, Vec, Vec, Vec, Imm)                 // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vshufpd, Vshufpd, Vec, Vec, Mem, Imm)                 // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_4x(vshufps, Vshufps, Vec, Vec, Vec, Imm)                 // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_4x(vshufps, Vshufps, Vec, Vec, Mem, Imm)                 // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Vec, Vec)                           // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Vec, Mem)                           // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_2x(vsqrtps, Vsqrtps, Vec, Vec)                           // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_2x(vsqrtps, Vsqrtps, Vec, Mem)                           // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vsqrtsd, Vsqrtsd, Xmm, Xmm, Xmm)                      // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vsqrtsd, Vsqrtsd, Xmm, Xmm, Mem)                      // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vsqrtss, Vsqrtss, Xmm, Xmm, Xmm)                      // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vsqrtss, Vsqrtss, Xmm, Xmm, Mem)                      // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_1x(vstmxcsr, Vstmxcsr, Mem)                              // AVX
+  ASMJIT_INST_3x(vsubpd, Vsubpd, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vsubpd, Vsubpd, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vsubps, Vsubps, Vec, Vec, Vec)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vsubps, Vsubps, Vec, Vec, Mem)                        // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vsubsd, Vsubsd, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vsubsd, Vsubsd, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vsubss, Vsubss, Xmm, Xmm, Xmm)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_3x(vsubss, Vsubss, Xmm, Xmm, Mem)                        // AVX  AVX512_F{kz|er}
+  ASMJIT_INST_2x(vtestpd, Vtestpd, Vec, Vec)                           // AVX
+  ASMJIT_INST_2x(vtestpd, Vtestpd, Vec, Mem)                           // AVX
+  ASMJIT_INST_2x(vtestps, Vtestps, Vec, Vec)                           // AVX
+  ASMJIT_INST_2x(vtestps, Vtestps, Vec, Mem)                           // AVX
+  ASMJIT_INST_2x(vucomisd, Vucomisd, Xmm, Xmm)                         // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vucomisd, Vucomisd, Xmm, Mem)                         // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vucomiss, Vucomiss, Xmm, Xmm)                         // AVX  AVX512_F{sae}
+  ASMJIT_INST_2x(vucomiss, Vucomiss, Xmm, Mem)                         // AVX  AVX512_F{sae}
+  ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Vec, Vec, Vec)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Vec, Vec, Mem)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vunpckhps, Vunpckhps, Vec, Vec, Vec)                  // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vunpckhps, Vunpckhps, Vec, Vec, Mem)                  // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Vec, Vec, Vec)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Vec, Vec, Mem)                  // AVX  AVX512_F{kz|b64}
+  ASMJIT_INST_3x(vunpcklps, Vunpcklps, Vec, Vec, Vec)                  // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vunpcklps, Vunpcklps, Vec, Vec, Mem)                  // AVX  AVX512_F{kz|b32}
+  ASMJIT_INST_3x(vxorpd, Vxorpd, Vec, Vec, Vec)                        // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vxorpd, Vxorpd, Vec, Vec, Mem)                        // AVX  AVX512_DQ{kz|b64}
+  ASMJIT_INST_3x(vxorps, Vxorps, Vec, Vec, Vec)                        // AVX  AVX512_DQ{kz|b32}
+  ASMJIT_INST_3x(vxorps, Vxorps, Vec, Vec, Mem)                        // AVX  AVX512_DQ{kz|b32}
+  ASMJIT_INST_0x(vzeroall, Vzeroall)                                   // AVX
+  ASMJIT_INST_0x(vzeroupper, Vzeroupper)                               // AVX
+
+  //! \}
+
+  //! \name FMA4 Instructions
+  //! \{
+
+  ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Vec, Vec, Vec, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Vec, Vec, Mem, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Vec, Vec, Vec, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmaddps, Vfmaddps, Vec, Vec, Vec, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmaddps, Vfmaddps, Vec, Vec, Mem, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmaddps, Vfmaddps, Vec, Vec, Vec, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmaddsd, Vfmaddsd, Xmm, Xmm, Xmm, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmaddsd, Vfmaddsd, Xmm, Xmm, Mem, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmaddsd, Vfmaddsd, Xmm, Xmm, Xmm, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmaddss, Vfmaddss, Xmm, Xmm, Xmm, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmaddss, Vfmaddss, Xmm, Xmm, Mem, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmaddss, Vfmaddss, Xmm, Xmm, Xmm, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Vec, Vec, Vec, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Vec, Vec, Mem, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Vec, Vec, Vec, Mem)         // FMA4
+  ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Vec, Vec, Vec, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Vec, Vec, Mem, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Vec, Vec, Vec, Mem)         // FMA4
+  ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Vec, Vec, Vec, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Vec, Vec, Mem, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Vec, Vec, Vec, Mem)         // FMA4
+  ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Vec, Vec, Vec, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Vec, Vec, Mem, Vec)         // FMA4
+  ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Vec, Vec, Vec, Mem)         // FMA4
+  ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Vec, Vec, Vec, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Vec, Vec, Mem, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Vec, Vec, Vec, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmsubps, Vfmsubps, Vec, Vec, Vec, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmsubps, Vfmsubps, Vec, Vec, Mem, Vec)               // FMA4
+  ASMJIT_INST_4x(vfmsubps, Vfmsubps, Vec, Vec, Vec, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmsubsd, Vfmsubsd, Xmm, Xmm, Xmm, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmsubsd, Vfmsubsd, Xmm, Xmm, Mem, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmsubsd, Vfmsubsd, Xmm, Xmm, Xmm, Mem)               // FMA4
+  ASMJIT_INST_4x(vfmsubss, Vfmsubss, Xmm, Xmm, Xmm, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmsubss, Vfmsubss, Xmm, Xmm, Mem, Xmm)               // FMA4
+  ASMJIT_INST_4x(vfmsubss, Vfmsubss, Xmm, Xmm, Xmm, Mem)               // FMA4
+  ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Vec, Vec, Vec, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Vec, Vec, Mem, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Vec, Vec, Vec, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Vec, Vec, Vec, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Vec, Vec, Mem, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Vec, Vec, Vec, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmaddsd, Vfnmaddsd, Xmm, Xmm, Xmm, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmaddsd, Vfnmaddsd, Xmm, Xmm, Mem, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmaddsd, Vfnmaddsd, Xmm, Xmm, Xmm, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmaddss, Vfnmaddss, Xmm, Xmm, Xmm, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmaddss, Vfnmaddss, Xmm, Xmm, Mem, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmaddss, Vfnmaddss, Xmm, Xmm, Xmm, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Vec, Vec, Vec, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Vec, Vec, Mem, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Vec, Vec, Vec, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Vec, Vec, Vec, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Vec, Vec, Mem, Vec)             // FMA4
+  ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Vec, Vec, Vec, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmsubsd, Vfnmsubsd, Xmm, Xmm, Xmm, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmsubsd, Vfnmsubsd, Xmm, Xmm, Mem, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmsubsd, Vfnmsubsd, Xmm, Xmm, Xmm, Mem)             // FMA4
+  ASMJIT_INST_4x(vfnmsubss, Vfnmsubss, Xmm, Xmm, Xmm, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmsubss, Vfnmsubss, Xmm, Xmm, Mem, Xmm)             // FMA4
+  ASMJIT_INST_4x(vfnmsubss, Vfnmsubss, Xmm, Xmm, Xmm, Mem)             // FMA4
+
+  //! \}
+
+  //! \name XOP Instructions (Deprecated)
+  //! \{
+
+  ASMJIT_INST_2x(vfrczpd, Vfrczpd, Vec, Vec)                           // XOP
+  ASMJIT_INST_2x(vfrczpd, Vfrczpd, Vec, Mem)                           // XOP
+  ASMJIT_INST_2x(vfrczps, Vfrczps, Vec, Vec)                           // XOP
+  ASMJIT_INST_2x(vfrczps, Vfrczps, Vec, Mem)                           // XOP
+  ASMJIT_INST_2x(vfrczsd, Vfrczsd, Xmm, Xmm)                           // XOP
+  ASMJIT_INST_2x(vfrczsd, Vfrczsd, Xmm, Mem)                           // XOP
+  ASMJIT_INST_2x(vfrczss, Vfrczss, Xmm, Xmm)                           // XOP
+  ASMJIT_INST_2x(vfrczss, Vfrczss, Xmm, Mem)                           // XOP
+  ASMJIT_INST_4x(vpcmov, Vpcmov, Vec, Vec, Vec, Vec)                   // XOP
+  ASMJIT_INST_4x(vpcmov, Vpcmov, Vec, Vec, Mem, Vec)                   // XOP
+  ASMJIT_INST_4x(vpcmov, Vpcmov, Vec, Vec, Vec, Mem)                   // XOP
+  ASMJIT_INST_4x(vpcomb, Vpcomb, Xmm, Xmm, Xmm, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomb, Vpcomb, Xmm, Xmm, Mem, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomd, Vpcomd, Xmm, Xmm, Xmm, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomd, Vpcomd, Xmm, Xmm, Mem, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomq, Vpcomq, Xmm, Xmm, Xmm, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomq, Vpcomq, Xmm, Xmm, Mem, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomw, Vpcomw, Xmm, Xmm, Xmm, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomw, Vpcomw, Xmm, Xmm, Mem, Imm)                   // XOP
+  ASMJIT_INST_4x(vpcomub, Vpcomub, Xmm, Xmm, Xmm, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomub, Vpcomub, Xmm, Xmm, Mem, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomud, Vpcomud, Xmm, Xmm, Xmm, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomud, Vpcomud, Xmm, Xmm, Mem, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomuq, Vpcomuq, Xmm, Xmm, Xmm, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomuq, Vpcomuq, Xmm, Xmm, Mem, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomuw, Vpcomuw, Xmm, Xmm, Xmm, Imm)                 // XOP
+  ASMJIT_INST_4x(vpcomuw, Vpcomuw, Xmm, Xmm, Mem, Imm)                 // XOP
+  ASMJIT_INST_5x(vpermil2pd, Vpermil2pd, Vec, Vec, Vec, Vec, Imm)      // XOP
+  ASMJIT_INST_5x(vpermil2pd, Vpermil2pd, Vec, Vec, Mem, Vec, Imm)      // XOP
+  ASMJIT_INST_5x(vpermil2pd, Vpermil2pd, Vec, Vec, Vec, Mem, Imm)      // XOP
+  ASMJIT_INST_5x(vpermil2ps, Vpermil2ps, Vec, Vec, Vec, Vec, Imm)      // XOP
+  ASMJIT_INST_5x(vpermil2ps, Vpermil2ps, Vec, Vec, Mem, Vec, Imm)      // XOP
+  ASMJIT_INST_5x(vpermil2ps, Vpermil2ps, Vec, Vec, Vec, Mem, Imm)      // XOP
+  ASMJIT_INST_2x(vphaddbd, Vphaddbd, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphaddbd, Vphaddbd, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphaddbq, Vphaddbq, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphaddbq, Vphaddbq, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphaddbw, Vphaddbw, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphaddbw, Vphaddbw, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphadddq, Vphadddq, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphadddq, Vphadddq, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphaddwd, Vphaddwd, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphaddwd, Vphaddwd, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphaddwq, Vphaddwq, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphaddwq, Vphaddwq, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphaddubd, Vphaddubd, Xmm, Xmm)                       // XOP
+  ASMJIT_INST_2x(vphaddubd, Vphaddubd, Xmm, Mem)                       // XOP
+  ASMJIT_INST_2x(vphaddubq, Vphaddubq, Xmm, Xmm)                       // XOP
+  ASMJIT_INST_2x(vphaddubq, Vphaddubq, Xmm, Mem)                       // XOP
+  ASMJIT_INST_2x(vphaddubw, Vphaddubw, Xmm, Xmm)                       // XOP
+  ASMJIT_INST_2x(vphaddubw, Vphaddubw, Xmm, Mem)                       // XOP
+  ASMJIT_INST_2x(vphaddudq, Vphaddudq, Xmm, Xmm)                       // XOP
+  ASMJIT_INST_2x(vphaddudq, Vphaddudq, Xmm, Mem)                       // XOP
+  ASMJIT_INST_2x(vphadduwd, Vphadduwd, Xmm, Xmm)                       // XOP
+  ASMJIT_INST_2x(vphadduwd, Vphadduwd, Xmm, Mem)                       // XOP
+  ASMJIT_INST_2x(vphadduwq, Vphadduwq, Xmm, Xmm)                       // XOP
+  ASMJIT_INST_2x(vphadduwq, Vphadduwq, Xmm, Mem)                       // XOP
+  ASMJIT_INST_2x(vphsubbw, Vphsubbw, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphsubbw, Vphsubbw, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphsubdq, Vphsubdq, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphsubdq, Vphsubdq, Xmm, Mem)                         // XOP
+  ASMJIT_INST_2x(vphsubwd, Vphsubwd, Xmm, Xmm)                         // XOP
+  ASMJIT_INST_2x(vphsubwd, Vphsubwd, Xmm, Mem)                         // XOP
+  ASMJIT_INST_4x(vpmacsdd, Vpmacsdd, Xmm, Xmm, Xmm, Xmm)               // XOP
+  ASMJIT_INST_4x(vpmacsdd, Vpmacsdd, Xmm, Xmm, Mem, Xmm)               // XOP
+  ASMJIT_INST_4x(vpmacsdqh, Vpmacsdqh, Xmm, Xmm, Xmm, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacsdqh, Vpmacsdqh, Xmm, Xmm, Mem, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacsdql, Vpmacsdql, Xmm, Xmm, Xmm, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacsdql, Vpmacsdql, Xmm, Xmm, Mem, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacswd, Vpmacswd, Xmm, Xmm, Xmm, Xmm)               // XOP
+  ASMJIT_INST_4x(vpmacswd, Vpmacswd, Xmm, Xmm, Mem, Xmm)               // XOP
+  ASMJIT_INST_4x(vpmacsww, Vpmacsww, Xmm, Xmm, Xmm, Xmm)               // XOP
+  ASMJIT_INST_4x(vpmacsww, Vpmacsww, Xmm, Xmm, Mem, Xmm)               // XOP
+  ASMJIT_INST_4x(vpmacssdd, Vpmacssdd, Xmm, Xmm, Xmm, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacssdd, Vpmacssdd, Xmm, Xmm, Mem, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacssdqh, Vpmacssdqh, Xmm, Xmm, Xmm, Xmm)           // XOP
+  ASMJIT_INST_4x(vpmacssdqh, Vpmacssdqh, Xmm, Xmm, Mem, Xmm)           // XOP
+  ASMJIT_INST_4x(vpmacssdql, Vpmacssdql, Xmm, Xmm, Xmm, Xmm)           // XOP
+  ASMJIT_INST_4x(vpmacssdql, Vpmacssdql, Xmm, Xmm, Mem, Xmm)           // XOP
+  ASMJIT_INST_4x(vpmacsswd, Vpmacsswd, Xmm, Xmm, Xmm, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacsswd, Vpmacsswd, Xmm, Xmm, Mem, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacssww, Vpmacssww, Xmm, Xmm, Xmm, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmacssww, Vpmacssww, Xmm, Xmm, Mem, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmadcsswd, Vpmadcsswd, Xmm, Xmm, Xmm, Xmm)           // XOP
+  ASMJIT_INST_4x(vpmadcsswd, Vpmadcsswd, Xmm, Xmm, Mem, Xmm)           // XOP
+  ASMJIT_INST_4x(vpmadcswd, Vpmadcswd, Xmm, Xmm, Xmm, Xmm)             // XOP
+  ASMJIT_INST_4x(vpmadcswd, Vpmadcswd, Xmm, Xmm, Mem, Xmm)             // XOP
+  ASMJIT_INST_4x(vpperm, Vpperm, Xmm, Xmm, Xmm, Xmm)                   // XOP
+  ASMJIT_INST_4x(vpperm, Vpperm, Xmm, Xmm, Mem, Xmm)                   // XOP
+  ASMJIT_INST_4x(vpperm, Vpperm, Xmm, Xmm, Xmm, Mem)                   // XOP
+  ASMJIT_INST_3x(vprotb, Vprotb, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotb, Vprotb, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotb, Vprotb, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vprotb, Vprotb, Xmm, Xmm, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotb, Vprotb, Xmm, Mem, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotd, Vprotd, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotd, Vprotd, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotd, Vprotd, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vprotd, Vprotd, Xmm, Xmm, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotd, Vprotd, Xmm, Mem, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotq, Vprotq, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotq, Vprotq, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotq, Vprotq, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vprotq, Vprotq, Xmm, Xmm, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotq, Vprotq, Xmm, Mem, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotw, Vprotw, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotw, Vprotw, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vprotw, Vprotw, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vprotw, Vprotw, Xmm, Xmm, Imm)                        // XOP
+  ASMJIT_INST_3x(vprotw, Vprotw, Xmm, Mem, Imm)                        // XOP
+  ASMJIT_INST_3x(vpshab, Vpshab, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshab, Vpshab, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshab, Vpshab, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshad, Vpshad, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshad, Vpshad, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshad, Vpshad, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshaq, Vpshaq, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshaq, Vpshaq, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshaq, Vpshaq, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshaw, Vpshaw, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshaw, Vpshaw, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshaw, Vpshaw, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshlb, Vpshlb, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshlb, Vpshlb, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshlb, Vpshlb, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshld, Vpshld, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshld, Vpshld, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshld, Vpshld, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshlq, Vpshlq, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshlq, Vpshlq, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshlq, Vpshlq, Xmm, Xmm, Mem)                        // XOP
+  ASMJIT_INST_3x(vpshlw, Vpshlw, Xmm, Xmm, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshlw, Vpshlw, Xmm, Mem, Xmm)                        // XOP
+  ASMJIT_INST_3x(vpshlw, Vpshlw, Xmm, Xmm, Mem)                        // XOP
+
+  //! \}
+
+  //! \name AVX512_FP16 Instructions
+  //! \{
+
+  ASMJIT_INST_3x(vaddph, Vaddph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vaddph, Vaddph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vaddsh, Vaddsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vaddsh, Vaddsh, Vec, Vec, Mem)
+  ASMJIT_INST_4x(vcmpph, Vcmpph, KReg, Vec, Vec, Imm)
+  ASMJIT_INST_4x(vcmpph, Vcmpph, KReg, Vec, Mem, Imm)
+  ASMJIT_INST_4x(vcmpsh, Vcmpsh, KReg, Vec, Vec, Imm)
+  ASMJIT_INST_4x(vcmpsh, Vcmpsh, KReg, Vec, Mem, Imm)
+  ASMJIT_INST_2x(vcomish, Vcomish, Vec, Vec)
+  ASMJIT_INST_2x(vcomish, Vcomish, Vec, Mem)
+  ASMJIT_INST_2x(vcvtdq2ph, Vcvtdq2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtdq2ph, Vcvtdq2ph, Vec, Mem)
+  ASMJIT_INST_2x(vcvtpd2ph, Vcvtpd2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtpd2ph, Vcvtpd2ph, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2dq, Vcvtph2dq, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2dq, Vcvtph2dq, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2pd, Vcvtph2pd, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2pd, Vcvtph2pd, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2psx, Vcvtph2psx, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2psx, Vcvtph2psx, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2qq, Vcvtph2qq, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2qq, Vcvtph2qq, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2udq, Vcvtph2udq, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2udq, Vcvtph2udq, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2uqq, Vcvtph2uqq, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2uqq, Vcvtph2uqq, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2uw, Vcvtph2uw, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2uw, Vcvtph2uw, Vec, Mem)
+  ASMJIT_INST_2x(vcvtph2w, Vcvtph2w, Vec, Vec)
+  ASMJIT_INST_2x(vcvtph2w, Vcvtph2w, Vec, Mem)
+  ASMJIT_INST_2x(vcvtps2phx, Vcvtps2phx, Vec, Vec)
+  ASMJIT_INST_2x(vcvtps2phx, Vcvtps2phx, Vec, Mem)
+  ASMJIT_INST_2x(vcvtqq2ph, Vcvtqq2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtqq2ph, Vcvtqq2ph, Vec, Mem)
+  ASMJIT_INST_3x(vcvtsd2sh, Vcvtsd2sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vcvtsd2sh, Vcvtsd2sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vcvtsh2sd, Vcvtsh2sd, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vcvtsh2sd, Vcvtsh2sd, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vcvtsh2si, Vcvtsh2si, Gp, Vec)
+  ASMJIT_INST_2x(vcvtsh2si, Vcvtsh2si, Gp, Mem)
+  ASMJIT_INST_3x(vcvtsh2ss, Vcvtsh2ss, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vcvtsh2ss, Vcvtsh2ss, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vcvtsh2usi, Vcvtsh2usi, Gp, Vec)
+  ASMJIT_INST_2x(vcvtsh2usi, Vcvtsh2usi, Gp, Mem)
+  ASMJIT_INST_3x(vcvtsi2sh, Vcvtsi2sh, Vec, Vec, Gp)
+  ASMJIT_INST_3x(vcvtsi2sh, Vcvtsi2sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vcvtss2sh, Vcvtss2sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vcvtss2sh, Vcvtss2sh, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vcvttph2dq, Vcvttph2dq, Vec, Vec)
+  ASMJIT_INST_2x(vcvttph2dq, Vcvttph2dq, Vec, Mem)
+  ASMJIT_INST_2x(vcvttph2qq, Vcvttph2qq, Vec, Vec)
+  ASMJIT_INST_2x(vcvttph2qq, Vcvttph2qq, Vec, Mem)
+  ASMJIT_INST_2x(vcvttph2udq, Vcvttph2udq, Vec, Vec)
+  ASMJIT_INST_2x(vcvttph2udq, Vcvttph2udq, Vec, Mem)
+  ASMJIT_INST_2x(vcvttph2uqq, Vcvttph2uqq, Vec, Vec)
+  ASMJIT_INST_2x(vcvttph2uqq, Vcvttph2uqq, Vec, Mem)
+  ASMJIT_INST_2x(vcvttph2uw, Vcvttph2uw, Vec, Vec)
+  ASMJIT_INST_2x(vcvttph2uw, Vcvttph2uw, Vec, Mem)
+  ASMJIT_INST_2x(vcvttph2w, Vcvttph2w, Vec, Vec)
+  ASMJIT_INST_2x(vcvttph2w, Vcvttph2w, Vec, Mem)
+  ASMJIT_INST_2x(vcvttsh2si, Vcvttsh2si, Gp, Vec)
+  ASMJIT_INST_2x(vcvttsh2si, Vcvttsh2si, Gp, Mem)
+  ASMJIT_INST_2x(vcvttsh2usi, Vcvttsh2usi, Gp, Vec)
+  ASMJIT_INST_2x(vcvttsh2usi, Vcvttsh2usi, Gp, Mem)
+  ASMJIT_INST_2x(vcvtudq2ph, Vcvtudq2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtudq2ph, Vcvtudq2ph, Vec, Mem)
+  ASMJIT_INST_2x(vcvtuqq2ph, Vcvtuqq2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtuqq2ph, Vcvtuqq2ph, Vec, Mem)
+  ASMJIT_INST_3x(vcvtusi2sh, Vcvtusi2sh, Vec, Vec, Gp)
+  ASMJIT_INST_3x(vcvtusi2sh, Vcvtusi2sh, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vcvtuw2ph, Vcvtuw2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtuw2ph, Vcvtuw2ph, Vec, Mem)
+  ASMJIT_INST_2x(vcvtw2ph, Vcvtw2ph, Vec, Vec)
+  ASMJIT_INST_2x(vcvtw2ph, Vcvtw2ph, Vec, Mem)
+  ASMJIT_INST_3x(vdivph, Vdivph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vdivph, Vdivph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vdivsh, Vdivsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vdivsh, Vdivsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfcmaddcph, Vfcmaddcph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfcmaddcph, Vfcmaddcph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfcmaddcsh, Vfcmaddcsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfcmaddcsh, Vfcmaddcsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfcmulcph, Vfcmulcph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfcmulcph, Vfcmulcph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfcmulcsh, Vfcmulcsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfcmulcsh, Vfcmulcsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmadd132ph, Vfmadd132ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmadd132ph, Vfmadd132ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmadd132sh, Vfmadd132sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmadd132sh, Vfmadd132sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmadd213ph, Vfmadd213ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmadd213ph, Vfmadd213ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmadd213sh, Vfmadd213sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmadd213sh, Vfmadd213sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmadd231ph, Vfmadd231ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmadd231ph, Vfmadd231ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmadd231sh, Vfmadd231sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmadd231sh, Vfmadd231sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmaddcph, Vfmaddcph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmaddcph, Vfmaddcph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmaddcsh, Vfmaddcsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmaddcsh, Vfmaddcsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmaddsub132ph, Vfmaddsub132ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmaddsub132ph, Vfmaddsub132ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmaddsub213ph, Vfmaddsub213ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmaddsub213ph, Vfmaddsub213ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmaddsub231ph, Vfmaddsub231ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmaddsub231ph, Vfmaddsub231ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsub132ph, Vfmsub132ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsub132ph, Vfmsub132ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsub132sh, Vfmsub132sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsub132sh, Vfmsub132sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsub213ph, Vfmsub213ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsub213ph, Vfmsub213ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsub213sh, Vfmsub213sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsub213sh, Vfmsub213sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsub231ph, Vfmsub231ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsub231ph, Vfmsub231ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsub231sh, Vfmsub231sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsub231sh, Vfmsub231sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsubadd132ph, Vfmsubadd132ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsubadd132ph, Vfmsubadd132ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsubadd213ph, Vfmsubadd213ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsubadd213ph, Vfmsubadd213ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmsubadd231ph, Vfmsubadd231ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmsubadd231ph, Vfmsubadd231ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmulcph, Vfmulcph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmulcph, Vfmulcph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfmulcsh, Vfmulcsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfmulcsh, Vfmulcsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmadd132ph, Vfnmadd132ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmadd132ph, Vfnmadd132ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmadd132sh, Vfnmadd132sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmadd132sh, Vfnmadd132sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmadd213ph, Vfnmadd213ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmadd213ph, Vfnmadd213ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmadd213sh, Vfnmadd213sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmadd213sh, Vfnmadd213sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmadd231ph, Vfnmadd231ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmadd231ph, Vfnmadd231ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmadd231sh, Vfnmadd231sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmadd231sh, Vfnmadd231sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmsub132ph, Vfnmsub132ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmsub132ph, Vfnmsub132ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmsub132sh, Vfnmsub132sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmsub132sh, Vfnmsub132sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmsub213ph, Vfnmsub213ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmsub213ph, Vfnmsub213ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmsub213sh, Vfnmsub213sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmsub213sh, Vfnmsub213sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmsub231ph, Vfnmsub231ph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmsub231ph, Vfnmsub231ph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfnmsub231sh, Vfnmsub231sh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vfnmsub231sh, Vfnmsub231sh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vfpclassph, Vfpclassph, KReg, Vec, Imm)
+  ASMJIT_INST_3x(vfpclassph, Vfpclassph, KReg, Mem, Imm)
+  ASMJIT_INST_3x(vfpclasssh, Vfpclasssh, KReg, Vec, Imm)
+  ASMJIT_INST_3x(vfpclasssh, Vfpclasssh, KReg, Mem, Imm)
+  ASMJIT_INST_2x(vgetexpph, Vgetexpph, Vec, Vec)
+  ASMJIT_INST_2x(vgetexpph, Vgetexpph, Vec, Mem)
+  ASMJIT_INST_3x(vgetexpsh, Vgetexpsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vgetexpsh, Vgetexpsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vgetmantph, Vgetmantph, Vec, Vec, Imm)
+  ASMJIT_INST_3x(vgetmantph, Vgetmantph, Vec, Mem, Imm)
+  ASMJIT_INST_4x(vgetmantsh, Vgetmantsh, Vec, Vec, Vec, Imm)
+  ASMJIT_INST_4x(vgetmantsh, Vgetmantsh, Vec, Vec, Mem, Imm)
+  ASMJIT_INST_3x(vmaxph, Vmaxph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vmaxph, Vmaxph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vmaxsh, Vmaxsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vmaxsh, Vmaxsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vminph, Vminph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vminph, Vminph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vminsh, Vminsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vminsh, Vminsh, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vmovsh, Vmovsh, Mem, Xmm)
+  ASMJIT_INST_2x(vmovsh, Vmovsh, Xmm, Mem)
+  ASMJIT_INST_3x(vmovsh, Vmovsh, Xmm, Xmm, Xmm)
+  ASMJIT_INST_2x(vmovw, Vmovw, Gp, Xmm)
+  ASMJIT_INST_2x(vmovw, Vmovw, Mem, Xmm)
+  ASMJIT_INST_2x(vmovw, Vmovw, Xmm, Gp)
+  ASMJIT_INST_2x(vmovw, Vmovw, Xmm, Mem)
+  ASMJIT_INST_3x(vmulph, Vmulph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vmulph, Vmulph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vmulsh, Vmulsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vmulsh, Vmulsh, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vrcpph, Vrcpph, Vec, Vec)
+  ASMJIT_INST_2x(vrcpph, Vrcpph, Vec, Mem)
+  ASMJIT_INST_3x(vrcpsh, Vrcpsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vrcpsh, Vrcpsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vreduceph, Vreduceph, Vec, Vec, Imm)
+  ASMJIT_INST_3x(vreduceph, Vreduceph, Vec, Mem, Imm)
+  ASMJIT_INST_4x(vreducesh, Vreducesh, Vec, Vec, Vec, Imm)
+  ASMJIT_INST_4x(vreducesh, Vreducesh, Vec, Vec, Mem, Imm)
+  ASMJIT_INST_3x(vrndscaleph, Vrndscaleph, Vec, Vec, Imm)
+  ASMJIT_INST_3x(vrndscaleph, Vrndscaleph, Vec, Mem, Imm)
+  ASMJIT_INST_4x(vrndscalesh, Vrndscalesh, Vec, Vec, Vec, Imm)
+  ASMJIT_INST_4x(vrndscalesh, Vrndscalesh, Vec, Vec, Mem, Imm)
+  ASMJIT_INST_2x(vrsqrtph, Vrsqrtph, Vec, Vec)
+  ASMJIT_INST_2x(vrsqrtph, Vrsqrtph, Vec, Mem)
+  ASMJIT_INST_3x(vrsqrtsh, Vrsqrtsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vrsqrtsh, Vrsqrtsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vscalefph, Vscalefph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vscalefph, Vscalefph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vscalefsh, Vscalefsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vscalefsh, Vscalefsh, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vsqrtph, Vsqrtph, Vec, Vec)
+  ASMJIT_INST_2x(vsqrtph, Vsqrtph, Vec, Mem)
+  ASMJIT_INST_3x(vsqrtsh, Vsqrtsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vsqrtsh, Vsqrtsh, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vsubph, Vsubph, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vsubph, Vsubph, Vec, Vec, Mem)
+  ASMJIT_INST_3x(vsubsh, Vsubsh, Vec, Vec, Vec)
+  ASMJIT_INST_3x(vsubsh, Vsubsh, Vec, Vec, Mem)
+  ASMJIT_INST_2x(vucomish, Vucomish, Vec, Vec)
+  ASMJIT_INST_2x(vucomish, Vucomish, Vec, Mem)
+
+  //! \}
+
+  //! \name AMX Instructions
+  //! \{
+
+  ASMJIT_INST_1x(ldtilecfg, Ldtilecfg, Mem)                            // AMX_TILE
+  ASMJIT_INST_1x(sttilecfg, Sttilecfg, Mem)                            // AMX_TILE
+  ASMJIT_INST_2x(tileloadd, Tileloadd, Tmm, Mem)                       // AMX_TILE
+  ASMJIT_INST_2x(tileloaddt1, Tileloaddt1, Tmm, Mem)                   // AMX_TILE
+  ASMJIT_INST_0x(tilerelease, Tilerelease)                             // AMX_TILE
+  ASMJIT_INST_2x(tilestored, Tilestored, Mem, Tmm)                     // AMX_TILE
+  ASMJIT_INST_1x(tilezero, Tilezero, Tmm)                              // AMX_TILE
+
+  ASMJIT_INST_3x(tdpbf16ps, Tdpbf16ps, Tmm, Tmm, Tmm)                  // AMX_BF16
+  ASMJIT_INST_3x(tdpbssd, Tdpbssd, Tmm, Tmm, Tmm)                      // AMX_INT8
+  ASMJIT_INST_3x(tdpbsud, Tdpbsud, Tmm, Tmm, Tmm)                      // AMX_INT8
+  ASMJIT_INST_3x(tdpbusd, Tdpbusd, Tmm, Tmm, Tmm)                      // AMX_INT8
+  ASMJIT_INST_3x(tdpbuud, Tdpbuud, Tmm, Tmm, Tmm)                      // AMX_INT8
+
+  //! \}
+};
+
+//! Emitter (X86 - implicit).
+template<typename This>
+struct EmitterImplicitT : public EmitterExplicitT<This> {
+  //! \cond
+  using EmitterExplicitT<This>::_emitter;
+  //! \endcond
+
+  //! \name Prefix Options
+  //! \{
+
+  //! Use REP/REPE prefix.
+  inline This& rep() noexcept { return EmitterExplicitT<This>::_addInstOptions(InstOptions::kX86_Rep); }
+  //! Use REP/REPE prefix.
+  inline This& repe() noexcept { return rep(); }
+  //! Use REP/REPE prefix.
+  inline This& repz() noexcept { return rep(); }
+
+  //! Use REPNE prefix.
+  inline This& repne() noexcept { return EmitterExplicitT<This>::_addInstOptions(InstOptions::kX86_Repne); }
+  //! Use REPNE prefix.
+  inline This& repnz() noexcept { return repne(); }
+
+  //! \}
+
+  //! \name Core Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::cbw;
+  using EmitterExplicitT<This>::cdq;
+  using EmitterExplicitT<This>::cdqe;
+  using EmitterExplicitT<This>::cqo;
+  using EmitterExplicitT<This>::cwd;
+  using EmitterExplicitT<This>::cwde;
+  using EmitterExplicitT<This>::cmpsd;
+  using EmitterExplicitT<This>::cmpxchg;
+  using EmitterExplicitT<This>::cmpxchg8b;
+  using EmitterExplicitT<This>::cmpxchg16b;
+  using EmitterExplicitT<This>::div;
+  using EmitterExplicitT<This>::idiv;
+  using EmitterExplicitT<This>::imul;
+  using EmitterExplicitT<This>::jecxz;
+  using EmitterExplicitT<This>::loop;
+  using EmitterExplicitT<This>::loope;
+  using EmitterExplicitT<This>::loopne;
+  using EmitterExplicitT<This>::mul;
+  //! \endcond
+
+  ASMJIT_INST_0x(cbw, Cbw)                                             // ANY       [IMPLICIT] AX      <- Sign Extend AL
+  ASMJIT_INST_0x(cdq, Cdq)                                             // ANY       [IMPLICIT] EDX:EAX <- Sign Extend EAX
+  ASMJIT_INST_0x(cdqe, Cdqe)                                           // X64       [IMPLICIT] RAX     <- Sign Extend EAX
+  ASMJIT_INST_2x(cmpxchg, Cmpxchg, Gp, Gp)                             // I486      [IMPLICIT]
+  ASMJIT_INST_2x(cmpxchg, Cmpxchg, Mem, Gp)                            // I486      [IMPLICIT]
+  ASMJIT_INST_1x(cmpxchg16b, Cmpxchg16b, Mem)                          // CMPXCHG8B [IMPLICIT] m == RDX:RAX ? m <- RCX:RBX
+  ASMJIT_INST_1x(cmpxchg8b, Cmpxchg8b, Mem)                            // CMPXCHG16B[IMPLICIT] m == EDX:EAX ? m <- ECX:EBX
+  ASMJIT_INST_0x(cqo, Cqo)                                             // X64       [IMPLICIT] RDX:RAX <- Sign Extend RAX
+  ASMJIT_INST_0x(cwd, Cwd)                                             // ANY       [IMPLICIT] DX:AX   <- Sign Extend AX
+  ASMJIT_INST_0x(cwde, Cwde)                                           // ANY       [IMPLICIT] EAX     <- Sign Extend AX
+  ASMJIT_INST_1x(div, Div, Gp)                                         // ANY       [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / r8} {xDX[Rem]:xAX[Quot] <- DX:AX / r16|r32|r64}
+  ASMJIT_INST_1x(div, Div, Mem)                                        // ANY       [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / m8} {xDX[Rem]:xAX[Quot] <- DX:AX / m16|m32|m64}
+  ASMJIT_INST_1x(idiv, Idiv, Gp)                                       // ANY       [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / r8} {xDX[Rem]:xAX[Quot] <- DX:AX / r16|r32|r64}
+  ASMJIT_INST_1x(idiv, Idiv, Mem)                                      // ANY       [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / m8} {xDX[Rem]:xAX[Quot] <- DX:AX / m16|m32|m64}
+  ASMJIT_INST_1x(imul, Imul, Gp)                                       // ANY       [IMPLICIT] {AX <- AL * r8} {xAX:xDX <- xAX * r16|r32|r64}
+  ASMJIT_INST_1x(imul, Imul, Mem)                                      // ANY       [IMPLICIT] {AX <- AL * m8} {xAX:xDX <- xAX * m16|m32|m64}
+  ASMJIT_INST_0x(iret, Iret)                                           // ANY       [IMPLICIT]
+  ASMJIT_INST_0x(iretd, Iretd)                                         // ANY       [IMPLICIT]
+  ASMJIT_INST_0x(iretq, Iretq)                                         // X64       [IMPLICIT]
+  ASMJIT_INST_1x(jecxz, Jecxz, Label)                                  // ANY       [IMPLICIT] Short jump if CX/ECX/RCX is zero.
+  ASMJIT_INST_1x(jecxz, Jecxz, Imm)                                    // ANY       [IMPLICIT] Short jump if CX/ECX/RCX is zero.
+  ASMJIT_INST_1x(loop, Loop, Label)                                    // ANY       [IMPLICIT] Decrement xCX; short jump if xCX != 0.
+  ASMJIT_INST_1x(loop, Loop, Imm)                                      // ANY       [IMPLICIT] Decrement xCX; short jump if xCX != 0.
+  ASMJIT_INST_1x(loope, Loope, Label)                                  // ANY       [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
+  ASMJIT_INST_1x(loope, Loope, Imm)                                    // ANY       [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
+  ASMJIT_INST_1x(loopne, Loopne, Label)                                // ANY       [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
+  ASMJIT_INST_1x(loopne, Loopne, Imm)                                  // ANY       [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
+  ASMJIT_INST_1x(mul, Mul, Gp)                                         // ANY       [IMPLICIT] {AX <- AL * r8} {xDX:xAX <- xAX * r16|r32|r64}
+  ASMJIT_INST_1x(mul, Mul, Mem)                                        // ANY       [IMPLICIT] {AX <- AL * m8} {xDX:xAX <- xAX * m16|m32|m64}
+  ASMJIT_INST_0x(ret, Ret)
+  ASMJIT_INST_1x(ret, Ret, Imm)
+  ASMJIT_INST_0x(retf, Retf)
+  ASMJIT_INST_1x(retf, Retf, Imm)
+  ASMJIT_INST_0x(xlatb, Xlatb)                                         // ANY       [IMPLICIT]
+
+  //! \}
+
+  //! \name String Instruction Aliases
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::movsd;
+  //! \endcond
+
+  inline Error cmpsb() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT<This>::ptr_zsi(0, 1), EmitterExplicitT<This>::ptr_zdi(0, 1)); }
+  inline Error cmpsd() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT<This>::ptr_zsi(0, 4), EmitterExplicitT<This>::ptr_zdi(0, 4)); }
+  inline Error cmpsq() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT<This>::ptr_zsi(0, 8), EmitterExplicitT<This>::ptr_zdi(0, 8)); }
+  inline Error cmpsw() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT<This>::ptr_zsi(0, 2), EmitterExplicitT<This>::ptr_zdi(0, 2)); }
+
+  inline Error lodsb() { return _emitter()->emit(Inst::kIdLods, al , EmitterExplicitT<This>::ptr_zsi(0, 1)); }
+  inline Error lodsd() { return _emitter()->emit(Inst::kIdLods, eax, EmitterExplicitT<This>::ptr_zsi(0, 4)); }
+  inline Error lodsq() { return _emitter()->emit(Inst::kIdLods, rax, EmitterExplicitT<This>::ptr_zsi(0, 8)); }
+  inline Error lodsw() { return _emitter()->emit(Inst::kIdLods, ax , EmitterExplicitT<This>::ptr_zsi(0, 2)); }
+
+  inline Error movsb() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT<This>::ptr_zdi(0, 1), EmitterExplicitT<This>::ptr_zsi(0, 1)); }
+  inline Error movsd() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT<This>::ptr_zdi(0, 4), EmitterExplicitT<This>::ptr_zsi(0, 4)); }
+  inline Error movsq() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT<This>::ptr_zdi(0, 8), EmitterExplicitT<This>::ptr_zsi(0, 8)); }
+  inline Error movsw() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT<This>::ptr_zdi(0, 2), EmitterExplicitT<This>::ptr_zsi(0, 2)); }
+
+  inline Error scasb() { return _emitter()->emit(Inst::kIdScas, al , EmitterExplicitT<This>::ptr_zdi(0, 1)); }
+  inline Error scasd() { return _emitter()->emit(Inst::kIdScas, eax, EmitterExplicitT<This>::ptr_zdi(0, 4)); }
+  inline Error scasq() { return _emitter()->emit(Inst::kIdScas, rax, EmitterExplicitT<This>::ptr_zdi(0, 8)); }
+  inline Error scasw() { return _emitter()->emit(Inst::kIdScas, ax , EmitterExplicitT<This>::ptr_zdi(0, 2)); }
+
+  inline Error stosb() { return _emitter()->emit(Inst::kIdStos, EmitterExplicitT<This>::ptr_zdi(0, 1), al ); }
+  inline Error stosd() { return _emitter()->emit(Inst::kIdStos, EmitterExplicitT<This>::ptr_zdi(0, 4), eax); }
+  inline Error stosq() { return _emitter()->emit(Inst::kIdStos, EmitterExplicitT<This>::ptr_zdi(0, 8), rax); }
+  inline Error stosw() { return _emitter()->emit(Inst::kIdStos, EmitterExplicitT<This>::ptr_zdi(0, 2), ax ); }
+
+  //! \}
+
+  //! \name Deprecated 32-bit Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::aaa;
+  using EmitterExplicitT<This>::aad;
+  using EmitterExplicitT<This>::aam;
+  using EmitterExplicitT<This>::aas;
+  using EmitterExplicitT<This>::daa;
+  using EmitterExplicitT<This>::das;
+  //! \endcond
+
+  ASMJIT_INST_0x(aaa, Aaa)                                             // X86 [IMPLICIT]
+  ASMJIT_INST_1x(aad, Aad, Imm)                                        // X86 [IMPLICIT]
+  ASMJIT_INST_1x(aam, Aam, Imm)                                        // X86 [IMPLICIT]
+  ASMJIT_INST_0x(aas, Aas)                                             // X86 [IMPLICIT]
+  ASMJIT_INST_0x(daa, Daa)                                             // X86 [IMPLICIT]
+  ASMJIT_INST_0x(das, Das)                                             // X86 [IMPLICIT]
+
+  //! \}
+
+  //! \name LAHF/SAHF Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::lahf;
+  using EmitterExplicitT<This>::sahf;
+  //! \endcond
+
+  ASMJIT_INST_0x(lahf, Lahf)                                           // LAHFSAHF  [IMPLICIT] AH <- EFL
+  ASMJIT_INST_0x(sahf, Sahf)                                           // LAHFSAHF  [IMPLICIT] EFL <- AH
+
+  //! \}
+
+  //! \name CPUID Instruction
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::cpuid;
+  //! \endcond
+
+  ASMJIT_INST_0x(cpuid, Cpuid)                                         // I486      [IMPLICIT] EAX:EBX:ECX:EDX  <- CPUID[EAX:ECX]
+
+  //! \}
+
+  //! \name CacheLine Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::clzero;
+  //! \endcond
+
+  ASMJIT_INST_0x(clzero, Clzero)                                       // CLZERO    [IMPLICIT]
+
+  //! \}
+
+  //! \name RDPRU/RDPKRU Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::rdpru;
+  using EmitterExplicitT<This>::rdpkru;
+  //! \endcond
+
+  ASMJIT_INST_0x(rdpru, Rdpru)                                         // RDPRU     [IMPLICIT] EDX:EAX <- PRU[ECX]
+  ASMJIT_INST_0x(rdpkru, Rdpkru)                                       // RDPKRU    [IMPLICIT] EDX:EAX <- PKRU[ECX]
+
+  //! \}
+
+  //! \name RDTSC/RDTSCP Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::rdtsc;
+  using EmitterExplicitT<This>::rdtscp;
+  //! \endcond
+
+  ASMJIT_INST_0x(rdtsc, Rdtsc)                                         // RDTSC     [IMPLICIT] EDX:EAX <- CNT
+  ASMJIT_INST_0x(rdtscp, Rdtscp)                                       // RDTSCP    [IMPLICIT] EDX:EAX:EXC <- CNT
+
+  //! \}
+
+  //! \name BMI2 Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::mulx;
+  //! \endcond
+
+  ASMJIT_INST_3x(mulx, Mulx, Gp, Gp, Gp)                               // BMI2      [IMPLICIT]
+  ASMJIT_INST_3x(mulx, Mulx, Gp, Gp, Mem)                              // BMI2      [IMPLICIT]
+
+  //! \}
+
+  //! \name XSAVE Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::xgetbv;
+  using EmitterExplicitT<This>::xrstor;
+  using EmitterExplicitT<This>::xrstor64;
+  using EmitterExplicitT<This>::xrstors;
+  using EmitterExplicitT<This>::xrstors64;
+  using EmitterExplicitT<This>::xsave;
+  using EmitterExplicitT<This>::xsave64;
+  using EmitterExplicitT<This>::xsavec;
+  using EmitterExplicitT<This>::xsavec64;
+  using EmitterExplicitT<This>::xsaveopt;
+  using EmitterExplicitT<This>::xsaveopt64;
+  using EmitterExplicitT<This>::xsaves;
+  using EmitterExplicitT<This>::xsaves64;
+  //! \endcond
+
+  ASMJIT_INST_0x(xgetbv, Xgetbv)                                       // XSAVE     [IMPLICIT] EDX:EAX <- XCR[ECX]
+  ASMJIT_INST_1x(xrstor, Xrstor, Mem)                                  // XSAVE     [IMPLICIT]
+  ASMJIT_INST_1x(xrstor64, Xrstor64, Mem)                              // XSAVE+X64 [IMPLICIT]
+  ASMJIT_INST_1x(xrstors, Xrstors, Mem)                                // XSAVE     [IMPLICIT]
+  ASMJIT_INST_1x(xrstors64, Xrstors64, Mem)                            // XSAVE+X64 [IMPLICIT]
+  ASMJIT_INST_1x(xsave, Xsave, Mem)                                    // XSAVE     [IMPLICIT]
+  ASMJIT_INST_1x(xsave64, Xsave64, Mem)                                // XSAVE+X64 [IMPLICIT]
+  ASMJIT_INST_1x(xsavec, Xsavec, Mem)                                  // XSAVE     [IMPLICIT]
+  ASMJIT_INST_1x(xsavec64, Xsavec64, Mem)                              // XSAVE+X64 [IMPLICIT]
+  ASMJIT_INST_1x(xsaveopt, Xsaveopt, Mem)                              // XSAVE     [IMPLICIT]
+  ASMJIT_INST_1x(xsaveopt64, Xsaveopt64, Mem)                          // XSAVE+X64 [IMPLICIT]
+  ASMJIT_INST_1x(xsaves, Xsaves, Mem)                                  // XSAVE     [IMPLICIT]
+  ASMJIT_INST_1x(xsaves64, Xsaves64, Mem)                              // XSAVE+X64 [IMPLICIT]
+
+  //! \}
+
+  //! \name SYSCALL/SYSENTER Instructions
+  //! \{
+
+  ASMJIT_INST_0x(syscall, Syscall)                                     // X64       [IMPLICIT]
+  ASMJIT_INST_0x(sysenter, Sysenter)                                   // X64       [IMPLICIT]
+
+  //! \}
+
+  //! \name HRESET Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::hreset;
+  //! \endcond
+
+  ASMJIT_INST_1x(hreset, Hreset, Imm)                                  // HRESET    [IMPLICIT]
+
+  //! \}
+
+  //! \name Privileged Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::rdmsr;
+  using EmitterExplicitT<This>::rdpmc;
+  using EmitterExplicitT<This>::wrmsr;
+  using EmitterExplicitT<This>::xsetbv;
+  //! \endcond
+
+  ASMJIT_INST_0x(pconfig, Pconfig)                                     // PCONFIG   [IMPLICIT]
+  ASMJIT_INST_0x(rdmsr, Rdmsr)                                         // ANY       [IMPLICIT]
+  ASMJIT_INST_0x(rdpmc, Rdpmc)                                         // ANY       [IMPLICIT]
+  ASMJIT_INST_0x(sysexit, Sysexit)                                     // X64       [IMPLICIT]
+  ASMJIT_INST_0x(sysexitq, Sysexitq)                                   // X64       [IMPLICIT]
+  ASMJIT_INST_0x(sysret, Sysret)                                       // X64       [IMPLICIT]
+  ASMJIT_INST_0x(sysretq, Sysretq)                                     // X64       [IMPLICIT]
+  ASMJIT_INST_0x(wrmsr, Wrmsr)                                         // ANY       [IMPLICIT]
+  ASMJIT_INST_0x(xsetbv, Xsetbv)                                       // XSAVE     [IMPLICIT] XCR[ECX] <- EDX:EAX
+
+  //! \}
+
+  //! \name Monitor & MWait Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::monitor;
+  using EmitterExplicitT<This>::monitorx;
+  using EmitterExplicitT<This>::mwait;
+  using EmitterExplicitT<This>::mwaitx;
+  //! \endcond
+
+  ASMJIT_INST_0x(monitor, Monitor)
+  ASMJIT_INST_0x(monitorx, Monitorx)
+  ASMJIT_INST_0x(mwait, Mwait)
+  ASMJIT_INST_0x(mwaitx, Mwaitx)
+
+  //! \}
+
+  //! \name WAITPKG Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::tpause;
+  using EmitterExplicitT<This>::umwait;
+  //! \endcond
+
+  ASMJIT_INST_1x(tpause, Tpause, Gp)
+  ASMJIT_INST_1x(umwait, Umwait, Gp)
+
+  //! \}
+
+  //! \name MMX & SSE Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::blendvpd;
+  using EmitterExplicitT<This>::blendvps;
+  using EmitterExplicitT<This>::maskmovq;
+  using EmitterExplicitT<This>::maskmovdqu;
+  using EmitterExplicitT<This>::pblendvb;
+  using EmitterExplicitT<This>::pcmpestri;
+  using EmitterExplicitT<This>::pcmpestrm;
+  using EmitterExplicitT<This>::pcmpistri;
+  using EmitterExplicitT<This>::pcmpistrm;
+  //! \endcond
+
+  ASMJIT_INST_2x(blendvpd, Blendvpd, Xmm, Xmm)                         // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_2x(blendvpd, Blendvpd, Xmm, Mem)                         // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_2x(blendvps, Blendvps, Xmm, Xmm)                         // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_2x(blendvps, Blendvps, Xmm, Mem)                         // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_2x(pblendvb, Pblendvb, Xmm, Xmm)                         // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_2x(pblendvb, Pblendvb, Xmm, Mem)                         // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_2x(maskmovq, Maskmovq, Mm, Mm)                           // SSE    [IMPLICIT]
+  ASMJIT_INST_2x(maskmovdqu, Maskmovdqu, Xmm, Xmm)                     // SSE2   [IMPLICIT]
+  ASMJIT_INST_3x(pcmpestri, Pcmpestri, Xmm, Xmm, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpestri, Pcmpestri, Xmm, Mem, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpestrm, Pcmpestrm, Xmm, Xmm, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpestrm, Pcmpestrm, Xmm, Mem, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpistri, Pcmpistri, Xmm, Xmm, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpistri, Pcmpistri, Xmm, Mem, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpistrm, Pcmpistrm, Xmm, Xmm, Imm)                  // SSE4_1 [IMPLICIT]
+  ASMJIT_INST_3x(pcmpistrm, Pcmpistrm, Xmm, Mem, Imm)                  // SSE4_1 [IMPLICIT]
+
+  //! \}
+
+  //! \name SHA Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::sha256rnds2;
+  //! \endcond
+
+  ASMJIT_INST_2x(sha256rnds2, Sha256rnds2, Xmm, Xmm)                   // SHA [IMPLICIT]
+  ASMJIT_INST_2x(sha256rnds2, Sha256rnds2, Xmm, Mem)                   // SHA [IMPLICIT]
+
+  //! \}
+
+  //! \name AVX, FMA, and AVX512 Instructions
+  //! \{
+
+  //! \cond
+  using EmitterExplicitT<This>::vmaskmovdqu;
+  using EmitterExplicitT<This>::vpcmpestri;
+  using EmitterExplicitT<This>::vpcmpestrm;
+  using EmitterExplicitT<This>::vpcmpistri;
+  using EmitterExplicitT<This>::vpcmpistrm;
+  //! \endcond
+
+  ASMJIT_INST_2x(vmaskmovdqu, Vmaskmovdqu, Xmm, Xmm)                   // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpestri, Vpcmpestri, Xmm, Xmm, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpestri, Vpcmpestri, Xmm, Mem, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpestrm, Vpcmpestrm, Xmm, Xmm, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpestrm, Vpcmpestrm, Xmm, Mem, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpistri, Vpcmpistri, Xmm, Xmm, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpistri, Vpcmpistri, Xmm, Mem, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpistrm, Vpcmpistrm, Xmm, Xmm, Imm)                // AVX [IMPLICIT]
+  ASMJIT_INST_3x(vpcmpistrm, Vpcmpistrm, Xmm, Mem, Imm)                // AVX [IMPLICIT]
+
+  //! \}
+};
+
+//! Emitter (X86).
+//!
+//! \note This class cannot be instantiated, you can only cast to it and use it as emitter that emits to either
+//! `x86::Assembler`, `x86::Builder`, or `x86::Compiler` (use with caution with `x86::Compiler` as it requires
+//! virtual registers).
+class Emitter : public BaseEmitter, public EmitterImplicitT<Emitter> {
+  ASMJIT_NONCONSTRUCTIBLE(Emitter)
+};
+
+//! \}
+
+#undef ASMJIT_INST_0x
+#undef ASMJIT_INST_1x
+#undef ASMJIT_INST_1c
+#undef ASMJIT_INST_2x
+#undef ASMJIT_INST_2c
+#undef ASMJIT_INST_3x
+#undef ASMJIT_INST_4x
+#undef ASMJIT_INST_5x
+#undef ASMJIT_INST_6x
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86EMITTER_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86formatter.cpp b/lib/lepton/asmjit/x86/x86formatter.cpp
new file mode 100644
index 0000000000..d62dd18b63
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86formatter.cpp
@@ -0,0 +1,944 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/cpuinfo.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../x86/x86formatter_p.h"
+#include "../x86/x86instapi_p.h"
+#include "../x86/x86instdb_p.h"
+#include "../x86/x86operand.h"
+
+#ifndef ASMJIT_NO_COMPILER
+  #include "../core/compiler.h"
+#endif
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::FormatterInternal - Constants
+// ==================================
+
+struct RegFormatInfo {
+  struct TypeEntry {
+    uint8_t index;
+  };
+
+  struct NameEntry {
+    uint8_t count;
+    uint8_t formatIndex;
+    uint8_t specialIndex;
+    uint8_t specialCount;
+  };
+
+  TypeEntry typeEntries[uint32_t(RegType::kMaxValue) + 1];
+  char typeStrings[128 - 32];
+
+  NameEntry nameEntries[uint32_t(RegType::kMaxValue) + 1];
+  char nameStrings[280];
+};
+
+template<uint32_t X>
+struct RegFormatInfo_T {
+  enum {
+    kTypeIndex    = X == uint32_t(RegType::kX86_GpbLo) ? 1   :
+                    X == uint32_t(RegType::kX86_GpbHi) ? 8   :
+                    X == uint32_t(RegType::kX86_Gpw  ) ? 15  :
+                    X == uint32_t(RegType::kX86_Gpd  ) ? 19  :
+                    X == uint32_t(RegType::kX86_Gpq  ) ? 23  :
+                    X == uint32_t(RegType::kX86_Xmm  ) ? 27  :
+                    X == uint32_t(RegType::kX86_Ymm  ) ? 31  :
+                    X == uint32_t(RegType::kX86_Zmm  ) ? 35  :
+                    X == uint32_t(RegType::kX86_Mm   ) ? 50  :
+                    X == uint32_t(RegType::kX86_KReg ) ? 53  :
+                    X == uint32_t(RegType::kX86_SReg ) ? 43  :
+                    X == uint32_t(RegType::kX86_CReg ) ? 59  :
+                    X == uint32_t(RegType::kX86_DReg ) ? 62  :
+                    X == uint32_t(RegType::kX86_St   ) ? 47  :
+                    X == uint32_t(RegType::kX86_Bnd  ) ? 55  :
+                    X == uint32_t(RegType::kX86_Tmm  ) ? 65  :
+                    X == uint32_t(RegType::kX86_Rip  ) ? 39  : 0,
+
+    kFormatIndex  = X == uint32_t(RegType::kX86_GpbLo) ? 1   :
+                    X == uint32_t(RegType::kX86_GpbHi) ? 6   :
+                    X == uint32_t(RegType::kX86_Gpw  ) ? 11  :
+                    X == uint32_t(RegType::kX86_Gpd  ) ? 16  :
+                    X == uint32_t(RegType::kX86_Gpq  ) ? 21  :
+                    X == uint32_t(RegType::kX86_Xmm  ) ? 25  :
+                    X == uint32_t(RegType::kX86_Ymm  ) ? 31  :
+                    X == uint32_t(RegType::kX86_Zmm  ) ? 37  :
+                    X == uint32_t(RegType::kX86_Mm   ) ? 60  :
+                    X == uint32_t(RegType::kX86_KReg ) ? 65  :
+                    X == uint32_t(RegType::kX86_SReg ) ? 49  :
+                    X == uint32_t(RegType::kX86_CReg ) ? 75  :
+                    X == uint32_t(RegType::kX86_DReg ) ? 80  :
+                    X == uint32_t(RegType::kX86_St   ) ? 55  :
+                    X == uint32_t(RegType::kX86_Bnd  ) ? 69  :
+                    X == uint32_t(RegType::kX86_Tmm  ) ? 89  :
+                    X == uint32_t(RegType::kX86_Rip  ) ? 43  : 0,
+
+    kSpecialIndex = X == uint32_t(RegType::kX86_GpbLo) ? 96  :
+                    X == uint32_t(RegType::kX86_GpbHi) ? 128 :
+                    X == uint32_t(RegType::kX86_Gpw  ) ? 161 :
+                    X == uint32_t(RegType::kX86_Gpd  ) ? 160 :
+                    X == uint32_t(RegType::kX86_Gpq  ) ? 192 :
+                    X == uint32_t(RegType::kX86_SReg ) ? 224 :
+                    X == uint32_t(RegType::kX86_Rip  ) ? 85  : 0,
+
+    kSpecialCount = X == uint32_t(RegType::kX86_GpbLo) ? 8   :
+                    X == uint32_t(RegType::kX86_GpbHi) ? 4   :
+                    X == uint32_t(RegType::kX86_Gpw  ) ? 8   :
+                    X == uint32_t(RegType::kX86_Gpd  ) ? 8   :
+                    X == uint32_t(RegType::kX86_Gpq  ) ? 8   :
+                    X == uint32_t(RegType::kX86_SReg ) ? 7   :
+                    X == uint32_t(RegType::kX86_Rip  ) ? 1   : 0
+  };
+};
+
+#define ASMJIT_REG_TYPE_ENTRY(TYPE) {   \
+  RegFormatInfo_T<TYPE>::kTypeIndex     \
+}
+
+#define ASMJIT_REG_NAME_ENTRY(TYPE) {   \
+  RegTraits<RegType(TYPE)>::kCount,     \
+  RegFormatInfo_T<TYPE>::kFormatIndex,  \
+  RegFormatInfo_T<TYPE>::kSpecialIndex, \
+  RegFormatInfo_T<TYPE>::kSpecialCount  \
+}
+
+static const RegFormatInfo x86RegFormatInfo = {
+  // Register type entries and strings.
+  { ASMJIT_LOOKUP_TABLE_32(ASMJIT_REG_TYPE_ENTRY, 0) },
+
+  "\0"             // #0
+  "gpb\0\0\0\0"    // #1
+  "gpb.hi\0"       // #8
+  "gpw\0"          // #15
+  "gpd\0"          // #19
+  "gpq\0"          // #23
+  "xmm\0"          // #27
+  "ymm\0"          // #31
+  "zmm\0"          // #35
+  "rip\0"          // #39
+  "seg\0"          // #43
+  "st\0"           // #47
+  "mm\0"           // #50
+  "k\0"            // #53
+  "bnd\0"          // #55
+  "cr\0"           // #59
+  "dr\0"           // #62
+  "tmm\0"          // #65
+  ,
+
+  // Register name entries and strings.
+  { ASMJIT_LOOKUP_TABLE_32(ASMJIT_REG_NAME_ENTRY, 0) },
+
+  "\0"
+  "r%ub\0"         // #1
+  "r%uh\0"         // #6
+  "r%uw\0"         // #11
+  "r%ud\0"         // #16
+  "r%u\0"          // #21
+  "xmm%u\0"        // #25
+  "ymm%u\0"        // #31
+  "zmm%u\0"        // #37
+  "rip%u\0"        // #43
+  "seg%u\0"        // #49
+  "st%u\0"         // #55
+  "mm%u\0"         // #60
+  "k%u\0"          // #65
+  "bnd%u\0"        // #69
+  "cr%u\0"         // #75
+  "dr%u\0"         // #80
+
+  "rip\0"          // #85
+  "tmm%u\0"        // #89
+  "\0"             // #95
+
+  "al\0\0" "cl\0\0" "dl\0\0" "bl\0\0" "spl\0"  "bpl\0"  "sil\0"  "dil\0" // #96
+  "ah\0\0" "ch\0\0" "dh\0\0" "bh\0\0" "n/a\0"  "n/a\0"  "n/a\0"  "n/a\0" // #128
+  "eax\0"  "ecx\0"  "edx\0"  "ebx\0"  "esp\0"  "ebp\0"  "esi\0"  "edi\0" // #160
+  "rax\0"  "rcx\0"  "rdx\0"  "rbx\0"  "rsp\0"  "rbp\0"  "rsi\0"  "rdi\0" // #192
+  "n/a\0"  "es\0\0" "cs\0\0" "ss\0\0" "ds\0\0" "fs\0\0" "gs\0\0" "n/a\0" // #224
+};
+#undef ASMJIT_REG_NAME_ENTRY
+#undef ASMJIT_REG_TYPE_ENTRY
+
+static const char* x86GetAddressSizeString(uint32_t size) noexcept {
+  switch (size) {
+    case 1 : return "byte ptr ";
+    case 2 : return "word ptr ";
+    case 4 : return "dword ptr ";
+    case 6 : return "fword ptr ";
+    case 8 : return "qword ptr ";
+    case 10: return "tbyte ptr ";
+    case 16: return "xmmword ptr ";
+    case 32: return "ymmword ptr ";
+    case 64: return "zmmword ptr ";
+    default: return "";
+  }
+}
+
+// x86::FormatterInternal - Format FeatureId
+// =========================================
+
+Error FormatterInternal::formatFeature(String& sb, uint32_t featureId) noexcept {
+  // @EnumStringBegin{"enum": "CpuFeatures::X86", "output": "sFeature", "strip": "k"}@
+  static const char sFeatureString[] =
+    "None\0"
+    "MT\0"
+    "NX\0"
+    "3DNOW\0"
+    "3DNOW2\0"
+    "ADX\0"
+    "AESNI\0"
+    "ALTMOVCR8\0"
+    "AMX_BF16\0"
+    "AMX_INT8\0"
+    "AMX_TILE\0"
+    "AVX\0"
+    "AVX2\0"
+    "AVX512_4FMAPS\0"
+    "AVX512_4VNNIW\0"
+    "AVX512_BF16\0"
+    "AVX512_BITALG\0"
+    "AVX512_BW\0"
+    "AVX512_CDI\0"
+    "AVX512_DQ\0"
+    "AVX512_ERI\0"
+    "AVX512_F\0"
+    "AVX512_FP16\0"
+    "AVX512_IFMA\0"
+    "AVX512_PFI\0"
+    "AVX512_VBMI\0"
+    "AVX512_VBMI2\0"
+    "AVX512_VL\0"
+    "AVX512_VNNI\0"
+    "AVX512_VP2INTERSECT\0"
+    "AVX512_VPOPCNTDQ\0"
+    "AVX_VNNI\0"
+    "BMI\0"
+    "BMI2\0"
+    "CET_IBT\0"
+    "CET_SS\0"
+    "CLDEMOTE\0"
+    "CLFLUSH\0"
+    "CLFLUSHOPT\0"
+    "CLWB\0"
+    "CLZERO\0"
+    "CMOV\0"
+    "CMPXCHG16B\0"
+    "CMPXCHG8B\0"
+    "ENCLV\0"
+    "ENQCMD\0"
+    "ERMS\0"
+    "F16C\0"
+    "FMA\0"
+    "FMA4\0"
+    "FPU\0"
+    "FSGSBASE\0"
+    "FXSR\0"
+    "FXSROPT\0"
+    "GEODE\0"
+    "GFNI\0"
+    "HLE\0"
+    "HRESET\0"
+    "I486\0"
+    "LAHFSAHF\0"
+    "LWP\0"
+    "LZCNT\0"
+    "MCOMMIT\0"
+    "MMX\0"
+    "MMX2\0"
+    "MONITOR\0"
+    "MONITORX\0"
+    "MOVBE\0"
+    "MOVDIR64B\0"
+    "MOVDIRI\0"
+    "MPX\0"
+    "MSR\0"
+    "MSSE\0"
+    "OSXSAVE\0"
+    "OSPKE\0"
+    "PCLMULQDQ\0"
+    "PCONFIG\0"
+    "POPCNT\0"
+    "PREFETCHW\0"
+    "PREFETCHWT1\0"
+    "PTWRITE\0"
+    "RDPID\0"
+    "RDPRU\0"
+    "RDRAND\0"
+    "RDSEED\0"
+    "RDTSC\0"
+    "RDTSCP\0"
+    "RTM\0"
+    "SERIALIZE\0"
+    "SHA\0"
+    "SKINIT\0"
+    "SMAP\0"
+    "SMEP\0"
+    "SMX\0"
+    "SNP\0"
+    "SSE\0"
+    "SSE2\0"
+    "SSE3\0"
+    "SSE4_1\0"
+    "SSE4_2\0"
+    "SSE4A\0"
+    "SSSE3\0"
+    "SVM\0"
+    "TBM\0"
+    "TSX\0"
+    "TSXLDTRK\0"
+    "UINTR\0"
+    "VAES\0"
+    "VMX\0"
+    "VPCLMULQDQ\0"
+    "WAITPKG\0"
+    "WBNOINVD\0"
+    "XOP\0"
+    "XSAVE\0"
+    "XSAVEC\0"
+    "XSAVEOPT\0"
+    "XSAVES\0"
+    "<Unknown>\0";
+
+  static const uint16_t sFeatureIndex[] = {
+    0, 5, 8, 11, 17, 24, 28, 34, 44, 53, 62, 71, 75, 80, 94, 108, 120, 134, 144,
+    155, 165, 176, 185, 197, 209, 220, 232, 245, 255, 267, 287, 304, 313, 317,
+    322, 330, 337, 346, 354, 365, 370, 377, 382, 393, 403, 409, 416, 421, 426,
+    430, 435, 439, 448, 453, 461, 467, 472, 476, 483, 488, 497, 501, 507, 515,
+    519, 524, 532, 541, 547, 557, 565, 569, 573, 578, 586, 592, 602, 610, 617,
+    627, 639, 647, 653, 659, 666, 673, 679, 686, 690, 700, 704, 711, 716, 721,
+    725, 729, 733, 738, 743, 750, 757, 763, 769, 773, 777, 781, 790, 796, 801,
+    805, 816, 824, 833, 837, 843, 850, 859, 866
+  };
+  // @EnumStringEnd@
+
+  return sb.append(sFeatureString + sFeatureIndex[Support::min<uint32_t>(featureId, uint32_t(CpuFeatures::X86::kMaxValue) + 1)]);
+}
+
+// x86::FormatterInternal - Format Register
+// ========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatRegister(String& sb, FormatFlags formatFlags, const BaseEmitter* emitter, Arch arch, RegType type, uint32_t id) noexcept {
+  DebugUtils::unused(arch);
+  const RegFormatInfo& info = x86RegFormatInfo;
+
+#ifndef ASMJIT_NO_COMPILER
+  if (Operand::isVirtId(id)) {
+    if (emitter && emitter->emitterType() == EmitterType::kCompiler) {
+      const BaseCompiler* cc = static_cast<const BaseCompiler*>(emitter);
+      if (cc->isVirtIdValid(id)) {
+        VirtReg* vReg = cc->virtRegById(id);
+        ASMJIT_ASSERT(vReg != nullptr);
+
+        const char* name = vReg->name();
+        if (name && name[0] != '\0')
+          ASMJIT_PROPAGATE(sb.append(name));
+        else
+          ASMJIT_PROPAGATE(sb.appendFormat("%%%u", unsigned(Operand::virtIdToIndex(id))));
+
+        if (vReg->type() != type && uint32_t(type) <= uint32_t(RegType::kMaxValue) && Support::test(formatFlags, FormatFlags::kRegCasts)) {
+          const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[size_t(type)];
+          if (typeEntry.index)
+            ASMJIT_PROPAGATE(sb.appendFormat("@%s", info.typeStrings + typeEntry.index));
+        }
+
+        return kErrorOk;
+      }
+    }
+  }
+#else
+  DebugUtils::unused(emitter, formatFlags);
+#endif
+
+  if (uint32_t(type) <= uint32_t(RegType::kMaxValue)) {
+    const RegFormatInfo::NameEntry& nameEntry = info.nameEntries[size_t(type)];
+
+    if (id < nameEntry.specialCount)
+      return sb.append(info.nameStrings + nameEntry.specialIndex + id * 4);
+
+    if (id < nameEntry.count)
+      return sb.appendFormat(info.nameStrings + nameEntry.formatIndex, unsigned(id));
+
+    const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[size_t(type)];
+    if (typeEntry.index)
+      return sb.appendFormat("%s@%u", info.typeStrings + typeEntry.index, id);
+  }
+
+  return sb.appendFormat("<Reg-%u>?%u", uint32_t(type), id);
+}
+
+// x86::FormatterInternal - Format Operand
+// =======================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatOperand(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept {
+
+  if (op.isReg())
+    return formatRegister(sb, formatFlags, emitter, arch, op.as<BaseReg>().type(), op.as<BaseReg>().id());
+
+  if (op.isMem()) {
+    const Mem& m = op.as<Mem>();
+    ASMJIT_PROPAGATE(sb.append(x86GetAddressSizeString(m.size())));
+
+    // Segment override prefix.
+    uint32_t seg = m.segmentId();
+    if (seg != SReg::kIdNone && seg < SReg::kIdCount)
+      ASMJIT_PROPAGATE(sb.appendFormat("%s:", x86RegFormatInfo.nameStrings + 224 + size_t(seg) * 4));
+
+    ASMJIT_PROPAGATE(sb.append('['));
+    switch (m.addrType()) {
+      case Mem::AddrType::kDefault:
+        break;
+      case Mem::AddrType::kAbs:
+        ASMJIT_PROPAGATE(sb.append("abs "));
+        break;
+      case Mem::AddrType::kRel:
+        ASMJIT_PROPAGATE(sb.append("rel "));
+        break;
+    }
+
+    char opSign = '\0';
+    if (m.hasBase()) {
+      opSign = '+';
+      if (m.hasBaseLabel()) {
+        ASMJIT_PROPAGATE(Formatter::formatLabel(sb, formatFlags, emitter, m.baseId()));
+      }
+      else {
+        FormatFlags modifiedFlags = formatFlags;
+        if (m.isRegHome()) {
+          ASMJIT_PROPAGATE(sb.append("&"));
+          modifiedFlags &= ~FormatFlags::kRegCasts;
+        }
+        ASMJIT_PROPAGATE(formatRegister(sb, modifiedFlags, emitter, arch, m.baseType(), m.baseId()));
+      }
+    }
+
+    if (m.hasIndex()) {
+      if (opSign)
+        ASMJIT_PROPAGATE(sb.append(opSign));
+
+      opSign = '+';
+      ASMJIT_PROPAGATE(formatRegister(sb, formatFlags, emitter, arch, m.indexType(), m.indexId()));
+      if (m.hasShift())
+        ASMJIT_PROPAGATE(sb.appendFormat("*%u", 1 << m.shift()));
+    }
+
+    uint64_t off = uint64_t(m.offset());
+    if (off || !m.hasBaseOrIndex()) {
+      if (int64_t(off) < 0) {
+        opSign = '-';
+        off = ~off + 1;
+      }
+
+      if (opSign)
+        ASMJIT_PROPAGATE(sb.append(opSign));
+
+      uint32_t base = 10;
+      if (Support::test(formatFlags, FormatFlags::kHexOffsets) && off > 9) {
+        ASMJIT_PROPAGATE(sb.append("0x", 2));
+        base = 16;
+      }
+
+      ASMJIT_PROPAGATE(sb.appendUInt(off, base));
+    }
+
+    return sb.append(']');
+  }
+
+  if (op.isImm()) {
+    const Imm& i = op.as<Imm>();
+    int64_t val = i.value();
+
+    if (Support::test(formatFlags, FormatFlags::kHexImms) && uint64_t(val) > 9) {
+      ASMJIT_PROPAGATE(sb.append("0x", 2));
+      return sb.appendUInt(uint64_t(val), 16);
+    }
+    else {
+      return sb.appendInt(val, 10);
+    }
+  }
+
+  if (op.isLabel()) {
+    return Formatter::formatLabel(sb, formatFlags, emitter, op.id());
+  }
+
+  return sb.append("<None>");
+}
+
+// x86::FormatterInternal - Format Immediate (Extension)
+// =====================================================
+
+static constexpr char kImmCharStart = '{';
+static constexpr char kImmCharEnd   = '}';
+static constexpr char kImmCharOr    = '|';
+
+struct ImmBits {
+  enum Mode : uint32_t {
+    kModeLookup = 0,
+    kModeFormat = 1
+  };
+
+  uint8_t mask;
+  uint8_t shift;
+  uint8_t mode;
+  char text[48 - 3];
+};
+
+ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmShuf(String& sb, uint32_t u8, uint32_t bits, uint32_t count) noexcept {
+  uint32_t mask = (1 << bits) - 1;
+
+  for (uint32_t i = 0; i < count; i++, u8 >>= bits) {
+    uint32_t value = u8 & mask;
+    ASMJIT_PROPAGATE(sb.append(i == 0 ? kImmCharStart : kImmCharOr));
+    ASMJIT_PROPAGATE(sb.appendUInt(value));
+  }
+
+  if (kImmCharEnd)
+    ASMJIT_PROPAGATE(sb.append(kImmCharEnd));
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmBits(String& sb, uint32_t u8, const ImmBits* bits, uint32_t count) noexcept {
+  uint32_t n = 0;
+  char buf[64];
+
+  for (uint32_t i = 0; i < count; i++) {
+    const ImmBits& spec = bits[i];
+
+    uint32_t value = (u8 & uint32_t(spec.mask)) >> spec.shift;
+    const char* str = nullptr;
+
+    switch (spec.mode) {
+      case ImmBits::kModeLookup:
+        str = Support::findPackedString(spec.text, value);
+        break;
+
+      case ImmBits::kModeFormat:
+        snprintf(buf, sizeof(buf), spec.text, unsigned(value));
+        str = buf;
+        break;
+
+      default:
+        return DebugUtils::errored(kErrorInvalidState);
+    }
+
+    if (!str[0])
+      continue;
+
+    ASMJIT_PROPAGATE(sb.append(++n == 1 ? kImmCharStart : kImmCharOr));
+    ASMJIT_PROPAGATE(sb.append(str));
+  }
+
+  if (n && kImmCharEnd)
+    ASMJIT_PROPAGATE(sb.append(kImmCharEnd));
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmText(String& sb, uint32_t u8, uint32_t bits, uint32_t advance, const char* text, uint32_t count = 1) noexcept {
+  uint32_t mask = (1u << bits) - 1;
+  uint32_t pos = 0;
+
+  for (uint32_t i = 0; i < count; i++, u8 >>= bits, pos += advance) {
+    uint32_t value = (u8 & mask) + pos;
+    ASMJIT_PROPAGATE(sb.append(i == 0 ? kImmCharStart : kImmCharOr));
+    ASMJIT_PROPAGATE(sb.append(Support::findPackedString(text, value)));
+  }
+
+  if (kImmCharEnd)
+    ASMJIT_PROPAGATE(sb.append(kImmCharEnd));
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE static Error FormatterInternal_explainConst(
+  String& sb,
+  FormatFlags formatFlags,
+  InstId instId,
+  uint32_t vecSize,
+  const Imm& imm) noexcept {
+
+  DebugUtils::unused(formatFlags);
+
+  static const char vcmpx[] =
+    "EQ_OQ\0" "LT_OS\0"  "LE_OS\0"  "UNORD_Q\0"  "NEQ_UQ\0" "NLT_US\0" "NLE_US\0" "ORD_Q\0"
+    "EQ_UQ\0" "NGE_US\0" "NGT_US\0" "FALSE_OQ\0" "NEQ_OQ\0" "GE_OS\0"  "GT_OS\0"  "TRUE_UQ\0"
+    "EQ_OS\0" "LT_OQ\0"  "LE_OQ\0"  "UNORD_S\0"  "NEQ_US\0" "NLT_UQ\0" "NLE_UQ\0" "ORD_S\0"
+    "EQ_US\0" "NGE_UQ\0" "NGT_UQ\0" "FALSE_OS\0" "NEQ_OS\0" "GE_OQ\0"  "GT_OQ\0"  "TRUE_US\0";
+
+  // Why to make it compatible...
+  static const char vpcmpx[] = "EQ\0" "LT\0" "LE\0" "FALSE\0" "NEQ\0" "GE\0"  "GT\0"    "TRUE\0";
+  static const char vpcomx[] = "LT\0" "LE\0" "GT\0" "GE\0"    "EQ\0"  "NEQ\0" "FALSE\0" "TRUE\0";
+
+  static const char vshufpd[] = "A0\0A1\0B0\0B1\0A2\0A3\0B2\0B3\0A4\0A5\0B4\0B5\0A6\0A7\0B6\0B7\0";
+  static const char vshufps[] = "A0\0A1\0A2\0A3\0A0\0A1\0A2\0A3\0B0\0B1\0B2\0B3\0B0\0B1\0B2\0B3\0";
+
+  static const ImmBits vfpclassxx[] = {
+    { 0x07u, 0, ImmBits::kModeLookup, "QNAN\0" "+0\0" "-0\0" "+INF\0" "-INF\0" "DENORMAL\0" "-FINITE\0" "SNAN\0" }
+  };
+
+  static const ImmBits vfixupimmxx[] = {
+    { 0x01u, 0, ImmBits::kModeLookup, "\0" "+INF_IE\0" },
+    { 0x02u, 1, ImmBits::kModeLookup, "\0" "-VE_IE\0"  },
+    { 0x04u, 2, ImmBits::kModeLookup, "\0" "-INF_IE\0" },
+    { 0x08u, 3, ImmBits::kModeLookup, "\0" "SNAN_IE\0" },
+    { 0x10u, 4, ImmBits::kModeLookup, "\0" "ONE_IE\0"  },
+    { 0x20u, 5, ImmBits::kModeLookup, "\0" "ONE_ZE\0"  },
+    { 0x40u, 6, ImmBits::kModeLookup, "\0" "ZERO_IE\0" },
+    { 0x80u, 7, ImmBits::kModeLookup, "\0" "ZERO_ZE\0" }
+  };
+
+  static const ImmBits vgetmantxx[] = {
+    { 0x03u, 0, ImmBits::kModeLookup, "[1, 2)\0" "[.5, 2)\0" "[.5, 1)\0" "[.75, 1.5)\0" },
+    { 0x04u, 2, ImmBits::kModeLookup, "\0" "NO_SIGN\0" },
+    { 0x08u, 3, ImmBits::kModeLookup, "\0" "QNAN_IF_SIGN\0" }
+  };
+
+  static const ImmBits vmpsadbw[] = {
+    { 0x04u, 2, ImmBits::kModeLookup, "BLK1[0]\0" "BLK1[1]\0" },
+    { 0x03u, 0, ImmBits::kModeLookup, "BLK2[0]\0" "BLK2[1]\0" "BLK2[2]\0" "BLK2[3]\0" },
+    { 0x40u, 6, ImmBits::kModeLookup, "BLK1[4]\0" "BLK1[5]\0" },
+    { 0x30u, 4, ImmBits::kModeLookup, "BLK2[4]\0" "BLK2[5]\0" "BLK2[6]\0" "BLK2[7]\0" }
+  };
+
+  static const ImmBits vpclmulqdq[] = {
+    { 0x01u, 0, ImmBits::kModeLookup, "LQ\0" "HQ\0" },
+    { 0x10u, 4, ImmBits::kModeLookup, "LQ\0" "HQ\0" }
+  };
+
+  static const ImmBits vperm2x128[] = {
+    { 0x0Bu, 0, ImmBits::kModeLookup, "A0\0" "A1\0" "B0\0" "B1\0" "\0" "\0" "\0" "\0" "0\0" "0\0" "0\0" "0\0" },
+    { 0xB0u, 4, ImmBits::kModeLookup, "A0\0" "A1\0" "B0\0" "B1\0" "\0" "\0" "\0" "\0" "0\0" "0\0" "0\0" "0\0" }
+  };
+
+  static const ImmBits vrangexx[] = {
+    { 0x03u, 0, ImmBits::kModeLookup, "MIN\0" "MAX\0" "MIN_ABS\0" "MAX_ABS\0" },
+    { 0x0Cu, 2, ImmBits::kModeLookup, "SIGN_A\0" "SIGN_B\0" "SIGN_0\0" "SIGN_1\0" }
+  };
+
+  static const ImmBits vreducexx_vrndscalexx[] = {
+    { 0x07u, 0, ImmBits::kModeLookup, "\0" "\0" "\0" "\0" "ROUND\0" "FLOOR\0" "CEIL\0" "TRUNC\0" },
+    { 0x08u, 3, ImmBits::kModeLookup, "\0" "SAE\0" },
+    { 0xF0u, 4, ImmBits::kModeFormat, "LEN=%d" }
+  };
+
+  static const ImmBits vroundxx[] = {
+    { 0x07u, 0, ImmBits::kModeLookup, "ROUND\0" "FLOOR\0" "CEIL\0" "TRUNC\0" "\0" "\0" "\0" "\0" },
+    { 0x08u, 3, ImmBits::kModeLookup, "\0" "INEXACT\0" }
+  };
+
+  uint32_t u8 = imm.valueAs<uint8_t>();
+  switch (instId) {
+    case Inst::kIdVblendpd:
+    case Inst::kIdBlendpd:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 8);
+
+    case Inst::kIdVblendps:
+    case Inst::kIdBlendps:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 4);
+
+    case Inst::kIdVcmppd:
+    case Inst::kIdVcmpps:
+    case Inst::kIdVcmpsd:
+    case Inst::kIdVcmpss:
+      return FormatterInternal_formatImmText(sb, u8, 5, 0, vcmpx);
+
+    case Inst::kIdCmppd:
+    case Inst::kIdCmpps:
+    case Inst::kIdCmpsd:
+    case Inst::kIdCmpss:
+      return FormatterInternal_formatImmText(sb, u8, 3, 0, vcmpx);
+
+    case Inst::kIdVdbpsadbw:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVdppd:
+    case Inst::kIdVdpps:
+    case Inst::kIdDppd:
+    case Inst::kIdDpps:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, 8);
+
+    case Inst::kIdVmpsadbw:
+    case Inst::kIdMpsadbw:
+      return FormatterInternal_formatImmBits(sb, u8, vmpsadbw, Support::min<uint32_t>(vecSize / 8, 4));
+
+    case Inst::kIdVpblendw:
+    case Inst::kIdPblendw:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, 8);
+
+    case Inst::kIdVpblendd:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, Support::min<uint32_t>(vecSize / 4, 8));
+
+    case Inst::kIdVpclmulqdq:
+    case Inst::kIdPclmulqdq:
+      return FormatterInternal_formatImmBits(sb, u8, vpclmulqdq, ASMJIT_ARRAY_SIZE(vpclmulqdq));
+
+    case Inst::kIdVroundpd:
+    case Inst::kIdVroundps:
+    case Inst::kIdVroundsd:
+    case Inst::kIdVroundss:
+    case Inst::kIdRoundpd:
+    case Inst::kIdRoundps:
+    case Inst::kIdRoundsd:
+    case Inst::kIdRoundss:
+      return FormatterInternal_formatImmBits(sb, u8, vroundxx, ASMJIT_ARRAY_SIZE(vroundxx));
+
+    case Inst::kIdVshufpd:
+    case Inst::kIdShufpd:
+      return FormatterInternal_formatImmText(sb, u8, 1, 2, vshufpd, Support::min<uint32_t>(vecSize / 8, 8));
+
+    case Inst::kIdVshufps:
+    case Inst::kIdShufps:
+      return FormatterInternal_formatImmText(sb, u8, 2, 4, vshufps, 4);
+
+    case Inst::kIdVcvtps2ph:
+      return FormatterInternal_formatImmBits(sb, u8, vroundxx, 1);
+
+    case Inst::kIdVperm2f128:
+    case Inst::kIdVperm2i128:
+      return FormatterInternal_formatImmBits(sb, u8, vperm2x128, ASMJIT_ARRAY_SIZE(vperm2x128));
+
+    case Inst::kIdVpermilpd:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 8);
+
+    case Inst::kIdVpermilps:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVpshufd:
+    case Inst::kIdPshufd:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVpshufhw:
+    case Inst::kIdVpshuflw:
+    case Inst::kIdPshufhw:
+    case Inst::kIdPshuflw:
+    case Inst::kIdPshufw:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVfixupimmpd:
+    case Inst::kIdVfixupimmps:
+    case Inst::kIdVfixupimmsd:
+    case Inst::kIdVfixupimmss:
+      return FormatterInternal_formatImmBits(sb, u8, vfixupimmxx, ASMJIT_ARRAY_SIZE(vfixupimmxx));
+
+    case Inst::kIdVfpclasspd:
+    case Inst::kIdVfpclassps:
+    case Inst::kIdVfpclasssd:
+    case Inst::kIdVfpclassss:
+      return FormatterInternal_formatImmBits(sb, u8, vfpclassxx, ASMJIT_ARRAY_SIZE(vfpclassxx));
+
+    case Inst::kIdVgetmantpd:
+    case Inst::kIdVgetmantps:
+    case Inst::kIdVgetmantsd:
+    case Inst::kIdVgetmantss:
+      return FormatterInternal_formatImmBits(sb, u8, vgetmantxx, ASMJIT_ARRAY_SIZE(vgetmantxx));
+
+    case Inst::kIdVpcmpb:
+    case Inst::kIdVpcmpd:
+    case Inst::kIdVpcmpq:
+    case Inst::kIdVpcmpw:
+    case Inst::kIdVpcmpub:
+    case Inst::kIdVpcmpud:
+    case Inst::kIdVpcmpuq:
+    case Inst::kIdVpcmpuw:
+      return FormatterInternal_formatImmText(sb, u8, 3, 0, vpcmpx);
+
+    case Inst::kIdVpcomb:
+    case Inst::kIdVpcomd:
+    case Inst::kIdVpcomq:
+    case Inst::kIdVpcomw:
+    case Inst::kIdVpcomub:
+    case Inst::kIdVpcomud:
+    case Inst::kIdVpcomuq:
+    case Inst::kIdVpcomuw:
+      return FormatterInternal_formatImmText(sb, u8, 3, 0, vpcomx);
+
+    case Inst::kIdVpermq:
+    case Inst::kIdVpermpd:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVpternlogd:
+    case Inst::kIdVpternlogq:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, 8);
+
+    case Inst::kIdVrangepd:
+    case Inst::kIdVrangeps:
+    case Inst::kIdVrangesd:
+    case Inst::kIdVrangess:
+      return FormatterInternal_formatImmBits(sb, u8, vrangexx, ASMJIT_ARRAY_SIZE(vrangexx));
+
+    case Inst::kIdVreducepd:
+    case Inst::kIdVreduceps:
+    case Inst::kIdVreducesd:
+    case Inst::kIdVreducess:
+    case Inst::kIdVrndscalepd:
+    case Inst::kIdVrndscaleps:
+    case Inst::kIdVrndscalesd:
+    case Inst::kIdVrndscaless:
+      return FormatterInternal_formatImmBits(sb, u8, vreducexx_vrndscalexx, ASMJIT_ARRAY_SIZE(vreducexx_vrndscalexx));
+
+    case Inst::kIdVshuff32x4:
+    case Inst::kIdVshuff64x2:
+    case Inst::kIdVshufi32x4:
+    case Inst::kIdVshufi64x2: {
+      uint32_t count = Support::max<uint32_t>(vecSize / 16, 2u);
+      uint32_t bits = count <= 2 ? 1u : 2u;
+      return FormatterInternal_formatImmShuf(sb, u8, bits, count);
+    }
+
+    default:
+      return kErrorOk;
+  }
+}
+
+// x86::FormatterInternal - Format Instruction
+// ===========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatInstruction(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept {
+
+  InstId instId = inst.id();
+  InstOptions options = inst.options();
+
+  // Format instruction options and instruction mnemonic.
+  if (instId < Inst::_kIdCount) {
+    // VEX|EVEX options.
+    if (Support::test(options, InstOptions::kX86_Vex))
+      ASMJIT_PROPAGATE(sb.append("{vex} "));
+
+    if (Support::test(options, InstOptions::kX86_Vex3))
+      ASMJIT_PROPAGATE(sb.append("{vex3} "));
+
+    if (Support::test(options, InstOptions::kX86_Evex))
+      ASMJIT_PROPAGATE(sb.append("{evex} "));
+
+    // MOD/RM and MOD/MR options
+    if (Support::test(options, InstOptions::kX86_ModRM))
+      ASMJIT_PROPAGATE(sb.append("{modrm} "));
+    else if (Support::test(options, InstOptions::kX86_ModMR))
+      ASMJIT_PROPAGATE(sb.append("{modmr} "));
+
+    // SHORT|LONG options.
+    if (Support::test(options, InstOptions::kShortForm))
+      ASMJIT_PROPAGATE(sb.append("short "));
+
+    if (Support::test(options, InstOptions::kLongForm))
+      ASMJIT_PROPAGATE(sb.append("long "));
+
+    // LOCK|XACQUIRE|XRELEASE options.
+    if (Support::test(options, InstOptions::kX86_XAcquire))
+      ASMJIT_PROPAGATE(sb.append("xacquire "));
+
+    if (Support::test(options, InstOptions::kX86_XRelease))
+      ASMJIT_PROPAGATE(sb.append("xrelease "));
+
+    if (Support::test(options, InstOptions::kX86_Lock))
+      ASMJIT_PROPAGATE(sb.append("lock "));
+
+    // REP|REPNE options.
+    if (Support::test(options, InstOptions::kX86_Rep | InstOptions::kX86_Repne)) {
+      sb.append(Support::test(options, InstOptions::kX86_Rep) ? "rep " : "repnz ");
+      if (inst.hasExtraReg()) {
+        ASMJIT_PROPAGATE(sb.append("{"));
+        ASMJIT_PROPAGATE(formatOperand(sb, formatFlags, emitter, arch, inst.extraReg().toReg<BaseReg>()));
+        ASMJIT_PROPAGATE(sb.append("} "));
+      }
+    }
+
+    // REX options.
+    if (Support::test(options, InstOptions::kX86_Rex)) {
+      const InstOptions kRXBWMask = InstOptions::kX86_OpCodeR |
+                                    InstOptions::kX86_OpCodeX |
+                                    InstOptions::kX86_OpCodeB |
+                                    InstOptions::kX86_OpCodeW ;
+      if (Support::test(options, kRXBWMask)) {
+        ASMJIT_PROPAGATE(sb.append("rex."));
+        if (Support::test(options, InstOptions::kX86_OpCodeR)) sb.append('r');
+        if (Support::test(options, InstOptions::kX86_OpCodeX)) sb.append('x');
+        if (Support::test(options, InstOptions::kX86_OpCodeB)) sb.append('b');
+        if (Support::test(options, InstOptions::kX86_OpCodeW)) sb.append('w');
+        sb.append(' ');
+      }
+      else {
+        ASMJIT_PROPAGATE(sb.append("rex "));
+      }
+    }
+
+    ASMJIT_PROPAGATE(InstInternal::instIdToString(arch, instId, sb));
+  }
+  else {
+    ASMJIT_PROPAGATE(sb.appendFormat("[InstId=#%u]", unsigned(instId)));
+  }
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand_& op = operands[i];
+    if (op.isNone()) break;
+
+    ASMJIT_PROPAGATE(sb.append(i == 0 ? " " : ", "));
+    ASMJIT_PROPAGATE(formatOperand(sb, formatFlags, emitter, arch, op));
+
+    if (op.isImm() && uint32_t(formatFlags & FormatFlags::kExplainImms)) {
+      uint32_t vecSize = 16;
+      for (uint32_t j = 0; j < opCount; j++)
+        if (operands[j].isReg())
+          vecSize = Support::max<uint32_t>(vecSize, operands[j].size());
+      ASMJIT_PROPAGATE(FormatterInternal_explainConst(sb, formatFlags, instId, vecSize, op.as<Imm>()));
+    }
+
+    // Support AVX-512 masking - {k}{z}.
+    if (i == 0) {
+      if (inst.extraReg().group() == RegGroup::kX86_K) {
+        ASMJIT_PROPAGATE(sb.append(" {"));
+        ASMJIT_PROPAGATE(formatRegister(sb, formatFlags, emitter, arch, inst.extraReg().type(), inst.extraReg().id()));
+        ASMJIT_PROPAGATE(sb.append('}'));
+
+        if (Support::test(options, InstOptions::kX86_ZMask))
+          ASMJIT_PROPAGATE(sb.append("{z}"));
+      }
+      else if (Support::test(options, InstOptions::kX86_ZMask)) {
+        ASMJIT_PROPAGATE(sb.append(" {z}"));
+      }
+    }
+
+    // Support AVX-512 broadcast - {1tox}.
+    if (op.isMem() && op.as<Mem>().hasBroadcast()) {
+      ASMJIT_PROPAGATE(sb.appendFormat(" {1to%u}", Support::bitMask(uint32_t(op.as<Mem>().getBroadcast()))));
+    }
+  }
+
+  // Support AVX-512 embedded rounding and suppress-all-exceptions {sae}.
+  if (inst.hasOption(InstOptions::kX86_ER | InstOptions::kX86_SAE)) {
+    if (inst.hasOption(InstOptions::kX86_ER)) {
+      uint32_t bits = uint32_t(inst.options() & InstOptions::kX86_ERMask) >> Support::ConstCTZ<uint32_t(InstOptions::kX86_ERMask)>::value;
+
+      const char roundingModes[] = "rn\0rd\0ru\0rz";
+      ASMJIT_PROPAGATE(sb.appendFormat(", {%s-sae}", roundingModes + bits * 3));
+    }
+    else {
+      ASMJIT_PROPAGATE(sb.append(", {sae}"));
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
diff --git a/lib/lepton/asmjit/x86/x86formatter_p.h b/lib/lepton/asmjit/x86/x86formatter_p.h
new file mode 100644
index 0000000000..f37a8f6db1
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86formatter_p.h
@@ -0,0 +1,58 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86FORMATTER_P_H_INCLUDED
+#define ASMJIT_X86_X86FORMATTER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/formatter.h"
+#include "../core/string.h"
+#include "../x86/x86globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+namespace FormatterInternal {
+
+Error ASMJIT_CDECL formatFeature(
+  String& sb,
+  uint32_t featureId) noexcept;
+
+Error ASMJIT_CDECL formatRegister(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  RegType regType,
+  uint32_t regId) noexcept;
+
+Error ASMJIT_CDECL formatOperand(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept;
+
+Error ASMJIT_CDECL formatInstruction(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept;
+
+} // {FormatterInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
+#endif // ASMJIT_X86_X86FORMATTER_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86func.cpp b/lib/lepton/asmjit/x86/x86func.cpp
new file mode 100644
index 0000000000..bba9eef148
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86func.cpp
@@ -0,0 +1,503 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../x86/x86func_p.h"
+#include "../x86/x86emithelper_p.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+namespace FuncInternal {
+
+static inline bool shouldThreatAsCDeclIn64BitMode(CallConvId ccId) noexcept {
+  return ccId == CallConvId::kCDecl ||
+         ccId == CallConvId::kStdCall ||
+         ccId == CallConvId::kThisCall ||
+         ccId == CallConvId::kFastCall ||
+         ccId == CallConvId::kRegParm1 ||
+         ccId == CallConvId::kRegParm2 ||
+         ccId == CallConvId::kRegParm3;
+}
+
+ASMJIT_FAVOR_SIZE Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept {
+  constexpr uint32_t kZax = Gp::kIdAx;
+  constexpr uint32_t kZbx = Gp::kIdBx;
+  constexpr uint32_t kZcx = Gp::kIdCx;
+  constexpr uint32_t kZdx = Gp::kIdDx;
+  constexpr uint32_t kZsp = Gp::kIdSp;
+  constexpr uint32_t kZbp = Gp::kIdBp;
+  constexpr uint32_t kZsi = Gp::kIdSi;
+  constexpr uint32_t kZdi = Gp::kIdDi;
+
+  bool winABI = environment.isPlatformWindows() || environment.isMSVC();
+
+  cc.setArch(environment.arch());
+  cc.setSaveRestoreRegSize(RegGroup::kVec, 16);
+  cc.setSaveRestoreRegSize(RegGroup::kX86_MM, 8);
+  cc.setSaveRestoreRegSize(RegGroup::kX86_K, 8);
+  cc.setSaveRestoreAlignment(RegGroup::kVec, 16);
+  cc.setSaveRestoreAlignment(RegGroup::kX86_MM, 8);
+  cc.setSaveRestoreAlignment(RegGroup::kX86_K, 8);
+
+  if (environment.is32Bit()) {
+    bool isStandardCallConv = true;
+
+    cc.setSaveRestoreRegSize(RegGroup::kGp, 4);
+    cc.setSaveRestoreAlignment(RegGroup::kGp, 4);
+
+    cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(Gp::kIdBx, Gp::kIdSp, Gp::kIdBp, Gp::kIdSi, Gp::kIdDi));
+    cc.setNaturalStackAlignment(4);
+
+    switch (ccId) {
+      case CallConvId::kCDecl:
+        break;
+
+      case CallConvId::kStdCall:
+        cc.setFlags(CallConvFlags::kCalleePopsStack);
+        break;
+
+      case CallConvId::kFastCall:
+        cc.setFlags(CallConvFlags::kCalleePopsStack);
+        cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx);
+        break;
+
+      case CallConvId::kVectorCall:
+        cc.setFlags(CallConvFlags::kCalleePopsStack);
+        cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5);
+        break;
+
+      case CallConvId::kThisCall:
+        // NOTE: Even MINGW (starting with GCC 4.7.0) now uses __thiscall on MS Windows, so we won't bail to any
+        // other calling convention if __thiscall was specified.
+        if (winABI) {
+          cc.setFlags(CallConvFlags::kCalleePopsStack);
+          cc.setPassedOrder(RegGroup::kGp, kZcx);
+        }
+        else {
+          ccId = CallConvId::kCDecl;
+        }
+        break;
+
+      case CallConvId::kRegParm1:
+        cc.setPassedOrder(RegGroup::kGp, kZax);
+        break;
+
+      case CallConvId::kRegParm2:
+        cc.setPassedOrder(RegGroup::kGp, kZax, kZdx);
+        break;
+
+      case CallConvId::kRegParm3:
+        cc.setPassedOrder(RegGroup::kGp, kZax, kZdx, kZcx);
+        break;
+
+      case CallConvId::kLightCall2:
+      case CallConvId::kLightCall3:
+      case CallConvId::kLightCall4: {
+        uint32_t n = uint32_t(ccId) - uint32_t(CallConvId::kLightCall2) + 2;
+
+        cc.setFlags(CallConvFlags::kPassFloatsByVec);
+        cc.setPassedOrder(RegGroup::kGp, kZax, kZdx, kZcx, kZsi, kZdi);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPassedOrder(RegGroup::kX86_K, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPassedOrder(RegGroup::kX86_MM, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPreservedRegs(RegGroup::kGp, Support::lsbMask<uint32_t>(8));
+        cc.setPreservedRegs(RegGroup::kVec, Support::lsbMask<uint32_t>(8) & ~Support::lsbMask<uint32_t>(n));
+
+        cc.setNaturalStackAlignment(16);
+        isStandardCallConv = false;
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidArgument);
+    }
+
+    if (isStandardCallConv) {
+      // MMX arguments is something where compiler vendors disagree. For example GCC and MSVC would pass first three
+      // via registers and the rest via stack, however Clang passes all via stack. Returning MMX registers is even
+      // more fun, where GCC uses MM0, but Clang uses EAX:EDX pair. I'm not sure it's something we should be worried
+      // about as MMX is deprecated anyway.
+      cc.setPassedOrder(RegGroup::kX86_MM, 0, 1, 2);
+
+      // Vector arguments (XMM|YMM|ZMM) are passed via registers. However, if the function is variadic then they have
+      // to be passed via stack.
+      cc.setPassedOrder(RegGroup::kVec, 0, 1, 2);
+
+      // Functions with variable arguments always use stack for MM and vector arguments.
+      cc.addFlags(CallConvFlags::kPassVecByStackIfVA);
+    }
+
+    if (ccId == CallConvId::kCDecl) {
+      cc.addFlags(CallConvFlags::kVarArgCompatible);
+    }
+  }
+  else {
+    cc.setSaveRestoreRegSize(RegGroup::kGp, 8);
+    cc.setSaveRestoreAlignment(RegGroup::kGp, 8);
+
+    // Preprocess the calling convention into a common id as many conventions are normally ignored even by C/C++
+    // compilers and treated as `__cdecl`.
+    if (shouldThreatAsCDeclIn64BitMode(ccId))
+      ccId = winABI ? CallConvId::kX64Windows : CallConvId::kX64SystemV;
+
+    switch (ccId) {
+      case CallConvId::kX64SystemV: {
+        cc.setFlags(CallConvFlags::kPassFloatsByVec |
+                    CallConvFlags::kPassMmxByXmm    |
+                    CallConvFlags::kVarArgCompatible);
+        cc.setNaturalStackAlignment(16);
+        cc.setRedZoneSize(128);
+        cc.setPassedOrder(RegGroup::kGp, kZdi, kZsi, kZdx, kZcx, 8, 9);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(kZbx, kZsp, kZbp, 12, 13, 14, 15));
+        break;
+      }
+
+      case CallConvId::kX64Windows: {
+        cc.setStrategy(CallConvStrategy::kX64Windows);
+        cc.setFlags(CallConvFlags::kPassFloatsByVec |
+                    CallConvFlags::kIndirectVecArgs |
+                    CallConvFlags::kPassMmxByGp     |
+                    CallConvFlags::kVarArgCompatible);
+        cc.setNaturalStackAlignment(16);
+        // Maximum 4 arguments in registers, each adds 8 bytes to the spill zone.
+        cc.setSpillZoneSize(4 * 8);
+        cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx, 8, 9);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3);
+        cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15));
+        cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
+        break;
+      }
+
+      case CallConvId::kVectorCall: {
+        cc.setStrategy(CallConvStrategy::kX64VectorCall);
+        cc.setFlags(CallConvFlags::kPassFloatsByVec |
+                    CallConvFlags::kPassMmxByGp     );
+        cc.setNaturalStackAlignment(16);
+        // Maximum 6 arguments in registers, each adds 8 bytes to the spill zone.
+        cc.setSpillZoneSize(6 * 8);
+        cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx, 8, 9);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5);
+        cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15));
+        cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
+        break;
+      }
+
+      case CallConvId::kLightCall2:
+      case CallConvId::kLightCall3:
+      case CallConvId::kLightCall4: {
+        uint32_t n = uint32_t(ccId) - uint32_t(CallConvId::kLightCall2) + 2;
+
+        cc.setFlags(CallConvFlags::kPassFloatsByVec);
+        cc.setNaturalStackAlignment(16);
+        cc.setPassedOrder(RegGroup::kGp, kZax, kZdx, kZcx, kZsi, kZdi);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPassedOrder(RegGroup::kX86_K, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPassedOrder(RegGroup::kX86_MM, 0, 1, 2, 3, 4, 5, 6, 7);
+
+        cc.setPreservedRegs(RegGroup::kGp, Support::lsbMask<uint32_t>(16));
+        cc.setPreservedRegs(RegGroup::kVec, ~Support::lsbMask<uint32_t>(n));
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidArgument);
+    }
+  }
+
+  cc.setId(ccId);
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE void unpackValues(FuncDetail& func, FuncValuePack& pack) noexcept {
+  TypeId typeId = pack[0].typeId();
+  switch (typeId) {
+    case TypeId::kInt64:
+    case TypeId::kUInt64: {
+      if (Environment::is32Bit(func.callConv().arch())) {
+        // Convert a 64-bit return value to two 32-bit return values.
+        pack[0].initTypeId(TypeId::kUInt32);
+        pack[1].initTypeId(TypeId(uint32_t(typeId) - 2));
+        break;
+      }
+      break;
+    }
+
+    default: {
+      break;
+    }
+  }
+}
+
+ASMJIT_FAVOR_SIZE Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept {
+  const CallConv& cc = func.callConv();
+  Arch arch = cc.arch();
+  uint32_t stackOffset = cc._spillZoneSize;
+  uint32_t argCount = func.argCount();
+
+  // Up to two return values can be returned in GP registers.
+  static const uint8_t gpReturnIndexes[4] = {
+    uint8_t(Gp::kIdAx),
+    uint8_t(Gp::kIdDx),
+    uint8_t(BaseReg::kIdBad),
+    uint8_t(BaseReg::kIdBad)
+  };
+
+  if (func.hasRet()) {
+    unpackValues(func, func._rets);
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      TypeId typeId = func._rets[valueIndex].typeId();
+
+      // Terminate at the first void type (end of the pack).
+      if (typeId == TypeId::kVoid)
+        break;
+
+      switch (typeId) {
+        case TypeId::kInt64:
+        case TypeId::kUInt64: {
+          if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
+            func._rets[valueIndex].initReg(RegType::kX86_Gpq, gpReturnIndexes[valueIndex], typeId);
+          else
+            return DebugUtils::errored(kErrorInvalidState);
+          break;
+        }
+
+        case TypeId::kInt8:
+        case TypeId::kInt16:
+        case TypeId::kInt32: {
+          if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
+            func._rets[valueIndex].initReg(RegType::kX86_Gpd, gpReturnIndexes[valueIndex], TypeId::kInt32);
+          else
+            return DebugUtils::errored(kErrorInvalidState);
+          break;
+        }
+
+        case TypeId::kUInt8:
+        case TypeId::kUInt16:
+        case TypeId::kUInt32: {
+          if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
+            func._rets[valueIndex].initReg(RegType::kX86_Gpd, gpReturnIndexes[valueIndex], TypeId::kUInt32);
+          else
+            return DebugUtils::errored(kErrorInvalidState);
+          break;
+        }
+
+        case TypeId::kFloat32:
+        case TypeId::kFloat64: {
+          RegType regType = Environment::is32Bit(arch) ? RegType::kX86_St : RegType::kX86_Xmm;
+          func._rets[valueIndex].initReg(regType, valueIndex, typeId);
+          break;
+        }
+
+        case TypeId::kFloat80: {
+          // 80-bit floats are always returned by FP0.
+          func._rets[valueIndex].initReg(RegType::kX86_St, valueIndex, typeId);
+          break;
+        }
+
+        case TypeId::kMmx32:
+        case TypeId::kMmx64: {
+          // MM registers are returned through XMM (SystemV) or GPQ (Win64).
+          RegType regType = RegType::kX86_Mm;
+          uint32_t regIndex = valueIndex;
+          if (Environment::is64Bit(arch)) {
+            regType = cc.strategy() == CallConvStrategy::kDefault ? RegType::kX86_Xmm : RegType::kX86_Gpq;
+            regIndex = cc.strategy() == CallConvStrategy::kDefault ? valueIndex : gpReturnIndexes[valueIndex];
+
+            if (regIndex == BaseReg::kIdBad)
+              return DebugUtils::errored(kErrorInvalidState);
+          }
+
+          func._rets[valueIndex].initReg(regType, regIndex, typeId);
+          break;
+        }
+
+        default: {
+          func._rets[valueIndex].initReg(vecTypeIdToRegType(typeId), valueIndex, typeId);
+          break;
+        }
+      }
+    }
+  }
+
+  switch (cc.strategy()) {
+    case CallConvStrategy::kDefault: {
+      uint32_t gpzPos = 0;
+      uint32_t vecPos = 0;
+
+      for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+        unpackValues(func, func._args[argIndex]);
+
+        for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+          FuncValue& arg = func._args[argIndex][valueIndex];
+
+          // Terminate if there are no more arguments in the pack.
+          if (!arg)
+            break;
+
+          TypeId typeId = arg.typeId();
+
+          if (TypeUtils::isInt(typeId)) {
+            uint32_t regId = BaseReg::kIdBad;
+
+            if (gpzPos < CallConv::kMaxRegArgsPerGroup)
+              regId = cc._passedOrder[RegGroup::kGp].id[gpzPos];
+
+            if (regId != BaseReg::kIdBad) {
+              RegType regType = typeId <= TypeId::kUInt32 ? RegType::kX86_Gpd : RegType::kX86_Gpq;
+              arg.assignRegData(regType, regId);
+              func.addUsedRegs(RegGroup::kGp, Support::bitMask(regId));
+              gpzPos++;
+            }
+            else {
+              uint32_t size = Support::max<uint32_t>(TypeUtils::sizeOf(typeId), registerSize);
+              arg.assignStackOffset(int32_t(stackOffset));
+              stackOffset += size;
+            }
+            continue;
+          }
+
+          if (TypeUtils::isFloat(typeId) || TypeUtils::isVec(typeId)) {
+            uint32_t regId = BaseReg::kIdBad;
+
+            if (vecPos < CallConv::kMaxRegArgsPerGroup)
+              regId = cc._passedOrder[RegGroup::kVec].id[vecPos];
+
+            if (TypeUtils::isFloat(typeId)) {
+              // If this is a float, but `kFlagPassFloatsByVec` is false, we have to use stack instead. This should
+              // be only used by 32-bit calling conventions.
+              if (!cc.hasFlag(CallConvFlags::kPassFloatsByVec))
+                regId = BaseReg::kIdBad;
+            }
+            else {
+              // Pass vector registers via stack if this is a variable arguments function. This should be only used
+              // by 32-bit calling conventions.
+              if (signature.hasVarArgs() && cc.hasFlag(CallConvFlags::kPassVecByStackIfVA))
+                regId = BaseReg::kIdBad;
+            }
+
+            if (regId != BaseReg::kIdBad) {
+              arg.initTypeId(typeId);
+              arg.assignRegData(vecTypeIdToRegType(typeId), regId);
+              func.addUsedRegs(RegGroup::kVec, Support::bitMask(regId));
+              vecPos++;
+            }
+            else {
+              uint32_t size = TypeUtils::sizeOf(typeId);
+              arg.assignStackOffset(int32_t(stackOffset));
+              stackOffset += size;
+            }
+            continue;
+          }
+        }
+      }
+      break;
+    }
+
+    case CallConvStrategy::kX64Windows:
+    case CallConvStrategy::kX64VectorCall: {
+      // Both X64 and VectorCall behave similarly - arguments are indexed from left to right. The position of the
+      // argument determines in which register the argument is allocated, so it's either GP or one of XMM/YMM/ZMM
+      // registers.
+      //
+      //       [       X64       ] [VecCall]
+      // Index: #0   #1   #2   #3   #4   #5
+      //
+      // GP   : RCX  RDX  R8   R9
+      // VEC  : XMM0 XMM1 XMM2 XMM3 XMM4 XMM5
+      //
+      // For example function `f(int a, double b, int c, double d)` will be:
+      //
+      //        (a)  (b)  (c)  (d)
+      //        RCX  XMM1 R8   XMM3
+      //
+      // Unused vector registers are used by HVA.
+      bool isVectorCall = (cc.strategy() == CallConvStrategy::kX64VectorCall);
+
+      for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+        unpackValues(func, func._args[argIndex]);
+
+        for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+          FuncValue& arg = func._args[argIndex][valueIndex];
+
+          // Terminate if there are no more arguments in the pack.
+          if (!arg)
+            break;
+
+          TypeId typeId = arg.typeId();
+          uint32_t size = TypeUtils::sizeOf(typeId);
+
+          if (TypeUtils::isInt(typeId) || TypeUtils::isMmx(typeId)) {
+            uint32_t regId = BaseReg::kIdBad;
+
+            if (argIndex < CallConv::kMaxRegArgsPerGroup)
+              regId = cc._passedOrder[RegGroup::kGp].id[argIndex];
+
+            if (regId != BaseReg::kIdBad) {
+              RegType regType = size <= 4 && !TypeUtils::isMmx(typeId) ? RegType::kX86_Gpd : RegType::kX86_Gpq;
+              arg.assignRegData(regType, regId);
+              func.addUsedRegs(RegGroup::kGp, Support::bitMask(regId));
+            }
+            else {
+              arg.assignStackOffset(int32_t(stackOffset));
+              stackOffset += 8;
+            }
+            continue;
+          }
+
+          if (TypeUtils::isFloat(typeId) || TypeUtils::isVec(typeId)) {
+            uint32_t regId = BaseReg::kIdBad;
+
+            if (argIndex < CallConv::kMaxRegArgsPerGroup)
+              regId = cc._passedOrder[RegGroup::kVec].id[argIndex];
+
+            if (regId != BaseReg::kIdBad) {
+              // X64-ABI doesn't allow vector types (XMM|YMM|ZMM) to be passed via registers, however, VectorCall
+              // was designed for that purpose.
+              if (TypeUtils::isFloat(typeId) || isVectorCall) {
+                RegType regType = vecTypeIdToRegType(typeId);
+                arg.assignRegData(regType, regId);
+                func.addUsedRegs(RegGroup::kVec, Support::bitMask(regId));
+                continue;
+              }
+            }
+
+            // Passed via stack if the argument is float/double or indirectly. The trap is - if the argument is
+            // passed indirectly, the address can be passed via register, if the argument's index has GP one.
+            if (TypeUtils::isFloat(typeId)) {
+              arg.assignStackOffset(int32_t(stackOffset));
+            }
+            else {
+              uint32_t gpRegId = cc._passedOrder[RegGroup::kGp].id[argIndex];
+              if (gpRegId != BaseReg::kIdBad)
+                arg.assignRegData(RegType::kX86_Gpq, gpRegId);
+              else
+                arg.assignStackOffset(int32_t(stackOffset));
+              arg.addFlags(FuncValue::kFlagIsIndirect);
+            }
+
+            // Always 8 bytes (float/double/pointer).
+            stackOffset += 8;
+            continue;
+          }
+        }
+      }
+      break;
+    }
+  }
+
+  func._argStackSize = stackOffset;
+  return kErrorOk;
+}
+
+} // {FuncInternal}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
diff --git a/lib/lepton/asmjit/x86/x86func_p.h b/lib/lepton/asmjit/x86/x86func_p.h
new file mode 100644
index 0000000000..0fe1da14d9
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86func_p.h
@@ -0,0 +1,33 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86FUNC_P_H_INCLUDED
+#define ASMJIT_X86_X86FUNC_P_H_INCLUDED
+
+#include "../core/func.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86-specific function API (calling conventions and other utilities).
+namespace FuncInternal {
+
+//! Initialize `CallConv` structure (X86 specific).
+Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept;
+
+//! Initialize `FuncDetail` (X86 specific).
+Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept;
+
+} // {FuncInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86FUNC_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86globals.h b/lib/lepton/asmjit/x86/x86globals.h
new file mode 100644
index 0000000000..803c813ac5
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86globals.h
@@ -0,0 +1,2169 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86GLOBALS_H_INCLUDED
+#define ASMJIT_X86_X86GLOBALS_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/inst.h"
+
+//! \namespace asmjit::x86
+//! \ingroup asmjit_x86
+//!
+//! X86/X64 API.
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! Condition code.
+enum class CondCode : uint8_t {
+  kO             = 0x00u,       //!<         OF==1
+  kNO            = 0x01u,       //!<         OF==0
+  kC             = 0x02u,       //!< CF==1
+  kB             = 0x02u,       //!< CF==1          (unsigned < )
+  kNAE           = 0x02u,       //!< CF==1          (unsigned < )
+  kNC            = 0x03u,       //!< CF==0
+  kAE            = 0x03u,       //!< CF==0          (unsigned >=)
+  kNB            = 0x03u,       //!< CF==0          (unsigned >=)
+  kE             = 0x04u,       //!<         ZF==1  (any_sign ==)
+  kZ             = 0x04u,       //!<         ZF==1  (any_sign ==)
+  kNE            = 0x05u,       //!<         ZF==0  (any_sign !=)
+  kNZ            = 0x05u,       //!<         ZF==0  (any_sign !=)
+  kBE            = 0x06u,       //!< CF==1 | ZF==1  (unsigned <=)
+  kNA            = 0x06u,       //!< CF==1 | ZF==1  (unsigned <=)
+  kA             = 0x07u,       //!< CF==0 & ZF==0  (unsigned > )
+  kNBE           = 0x07u,       //!< CF==0 & ZF==0  (unsigned > )
+  kS             = 0x08u,       //!<         SF==1  (is negative)
+  kNS            = 0x09u,       //!<         SF==0  (is positive or zero)
+  kP             = 0x0Au,       //!< PF==1
+  kPE            = 0x0Au,       //!< PF==1
+  kPO            = 0x0Bu,       //!< PF==0
+  kNP            = 0x0Bu,       //!< PF==0
+  kL             = 0x0Cu,       //!<         SF!=OF (signed < )
+  kNGE           = 0x0Cu,       //!<         SF!=OF (signed < )
+  kGE            = 0x0Du,       //!<         SF==OF (signed >=)
+  kNL            = 0x0Du,       //!<         SF==OF (signed >=)
+  kLE            = 0x0Eu,       //!< ZF==1 | SF!=OF (signed <=)
+  kNG            = 0x0Eu,       //!< ZF==1 | SF!=OF (signed <=)
+  kG             = 0x0Fu,       //!< ZF==0 & SF==OF (signed > )
+  kNLE           = 0x0Fu,       //!< ZF==0 & SF==OF (signed > )
+
+  kZero          = kZ,          //!< Zero flag.
+  kNotZero       = kNZ,         //!< Non-zero flag.
+
+  kSign          = kS,          //!< Sign flag.
+  kNotSign       = kNS,         //!< No sign flag.
+
+  kNegative      = kS,          //!< Sign flag.
+  kPositive      = kNS,         //!< No sign flag.
+
+  kOverflow      = kO,          //!< Overflow (signed).
+  kNotOverflow   = kNO,         //!< Not overflow (signed).
+
+  kEqual         = kE,          //!< `a == b` (equal).
+  kNotEqual      = kNE,         //!< `a != b` (not equal).
+
+  kSignedLT      = kL,          //!< `a <  b` (signed).
+  kSignedLE      = kLE,         //!< `a <= b` (signed).
+  kSignedGT      = kG,          //!< `a >  b` (signed).
+  kSignedGE      = kGE,         //!< `a >= b` (signed).
+
+  kUnsignedLT    = kB,          //!< `a <  b` (unsigned).
+  kUnsignedLE    = kBE,         //!< `a <= b` (unsigned).
+  kUnsignedGT    = kA,          //!< `a >  b` (unsigned).
+  kUnsignedGE    = kAE,         //!< `a >= b` (unsigned).
+
+  kParityEven    = kP,          //!< Even parity flag.
+  kParityOdd     = kPO,         //!< Odd parity flag.
+
+  kMaxValue      = 0x0Fu
+};
+
+//! \cond
+static constexpr CondCode _reverseCondTable[] = {
+  CondCode::kO,  // O  <- O
+  CondCode::kNO, // NO <- NO
+  CondCode::kA , // A  <- B
+  CondCode::kBE, // BE <- AE
+  CondCode::kE,  // E  <- E
+  CondCode::kNE, // NE <- NE
+  CondCode::kAE, // AE <- BE
+  CondCode::kB , // B  <- A
+  CondCode::kS,  // S  <- S
+  CondCode::kNS, // NS <- NS
+  CondCode::kPE, // PE <- PE
+  CondCode::kPO, // PO <- PO
+  CondCode::kG,  // G  <- L
+  CondCode::kLE, // LE <- GE
+  CondCode::kGE, // GE <- LE
+  CondCode::kL   // L  <- G
+};
+//! \endcond
+
+//! Reverses a condition code (reverses the corresponding operands of a comparison).
+static inline constexpr CondCode reverseCond(CondCode cond) noexcept { return _reverseCondTable[uint8_t(cond)]; }
+//! Negates a condition code.
+static inline constexpr CondCode negateCond(CondCode cond) noexcept { return CondCode(uint8_t(cond) ^ 1u); }
+
+//! Instruction.
+//!
+//! \note Only used to hold x86-specific instruction identifiers and some additional helper functions.
+namespace Inst {
+  //! Instruction id.
+  enum Id : uint32_t {
+    // ${InstId:Begin}
+    kIdNone = 0,                         //!< Invalid instruction id.
+    kIdAaa,                              //!< Instruction 'aaa' (X86).
+    kIdAad,                              //!< Instruction 'aad' (X86).
+    kIdAam,                              //!< Instruction 'aam' (X86).
+    kIdAas,                              //!< Instruction 'aas' (X86).
+    kIdAdc,                              //!< Instruction 'adc'.
+    kIdAdcx,                             //!< Instruction 'adcx' {ADX}.
+    kIdAdd,                              //!< Instruction 'add'.
+    kIdAddpd,                            //!< Instruction 'addpd' {SSE2}.
+    kIdAddps,                            //!< Instruction 'addps' {SSE}.
+    kIdAddsd,                            //!< Instruction 'addsd' {SSE2}.
+    kIdAddss,                            //!< Instruction 'addss' {SSE}.
+    kIdAddsubpd,                         //!< Instruction 'addsubpd' {SSE3}.
+    kIdAddsubps,                         //!< Instruction 'addsubps' {SSE3}.
+    kIdAdox,                             //!< Instruction 'adox' {ADX}.
+    kIdAesdec,                           //!< Instruction 'aesdec' {AESNI}.
+    kIdAesdeclast,                       //!< Instruction 'aesdeclast' {AESNI}.
+    kIdAesenc,                           //!< Instruction 'aesenc' {AESNI}.
+    kIdAesenclast,                       //!< Instruction 'aesenclast' {AESNI}.
+    kIdAesimc,                           //!< Instruction 'aesimc' {AESNI}.
+    kIdAeskeygenassist,                  //!< Instruction 'aeskeygenassist' {AESNI}.
+    kIdAnd,                              //!< Instruction 'and'.
+    kIdAndn,                             //!< Instruction 'andn' {BMI}.
+    kIdAndnpd,                           //!< Instruction 'andnpd' {SSE2}.
+    kIdAndnps,                           //!< Instruction 'andnps' {SSE}.
+    kIdAndpd,                            //!< Instruction 'andpd' {SSE2}.
+    kIdAndps,                            //!< Instruction 'andps' {SSE}.
+    kIdArpl,                             //!< Instruction 'arpl' (X86).
+    kIdBextr,                            //!< Instruction 'bextr' {BMI}.
+    kIdBlcfill,                          //!< Instruction 'blcfill' {TBM}.
+    kIdBlci,                             //!< Instruction 'blci' {TBM}.
+    kIdBlcic,                            //!< Instruction 'blcic' {TBM}.
+    kIdBlcmsk,                           //!< Instruction 'blcmsk' {TBM}.
+    kIdBlcs,                             //!< Instruction 'blcs' {TBM}.
+    kIdBlendpd,                          //!< Instruction 'blendpd' {SSE4_1}.
+    kIdBlendps,                          //!< Instruction 'blendps' {SSE4_1}.
+    kIdBlendvpd,                         //!< Instruction 'blendvpd' {SSE4_1}.
+    kIdBlendvps,                         //!< Instruction 'blendvps' {SSE4_1}.
+    kIdBlsfill,                          //!< Instruction 'blsfill' {TBM}.
+    kIdBlsi,                             //!< Instruction 'blsi' {BMI}.
+    kIdBlsic,                            //!< Instruction 'blsic' {TBM}.
+    kIdBlsmsk,                           //!< Instruction 'blsmsk' {BMI}.
+    kIdBlsr,                             //!< Instruction 'blsr' {BMI}.
+    kIdBndcl,                            //!< Instruction 'bndcl' {MPX}.
+    kIdBndcn,                            //!< Instruction 'bndcn' {MPX}.
+    kIdBndcu,                            //!< Instruction 'bndcu' {MPX}.
+    kIdBndldx,                           //!< Instruction 'bndldx' {MPX}.
+    kIdBndmk,                            //!< Instruction 'bndmk' {MPX}.
+    kIdBndmov,                           //!< Instruction 'bndmov' {MPX}.
+    kIdBndstx,                           //!< Instruction 'bndstx' {MPX}.
+    kIdBound,                            //!< Instruction 'bound' (X86).
+    kIdBsf,                              //!< Instruction 'bsf'.
+    kIdBsr,                              //!< Instruction 'bsr'.
+    kIdBswap,                            //!< Instruction 'bswap'.
+    kIdBt,                               //!< Instruction 'bt'.
+    kIdBtc,                              //!< Instruction 'btc'.
+    kIdBtr,                              //!< Instruction 'btr'.
+    kIdBts,                              //!< Instruction 'bts'.
+    kIdBzhi,                             //!< Instruction 'bzhi' {BMI2}.
+    kIdCall,                             //!< Instruction 'call'.
+    kIdCbw,                              //!< Instruction 'cbw'.
+    kIdCdq,                              //!< Instruction 'cdq'.
+    kIdCdqe,                             //!< Instruction 'cdqe' (X64).
+    kIdClac,                             //!< Instruction 'clac' {SMAP}.
+    kIdClc,                              //!< Instruction 'clc'.
+    kIdCld,                              //!< Instruction 'cld'.
+    kIdCldemote,                         //!< Instruction 'cldemote' {CLDEMOTE}.
+    kIdClflush,                          //!< Instruction 'clflush' {CLFLUSH}.
+    kIdClflushopt,                       //!< Instruction 'clflushopt' {CLFLUSHOPT}.
+    kIdClgi,                             //!< Instruction 'clgi' {SVM}.
+    kIdCli,                              //!< Instruction 'cli'.
+    kIdClrssbsy,                         //!< Instruction 'clrssbsy' {CET_SS}.
+    kIdClts,                             //!< Instruction 'clts'.
+    kIdClui,                             //!< Instruction 'clui' {UINTR} (X64).
+    kIdClwb,                             //!< Instruction 'clwb' {CLWB}.
+    kIdClzero,                           //!< Instruction 'clzero' {CLZERO}.
+    kIdCmc,                              //!< Instruction 'cmc'.
+    kIdCmova,                            //!< Instruction 'cmova' {CMOV}.
+    kIdCmovae,                           //!< Instruction 'cmovae' {CMOV}.
+    kIdCmovb,                            //!< Instruction 'cmovb' {CMOV}.
+    kIdCmovbe,                           //!< Instruction 'cmovbe' {CMOV}.
+    kIdCmovc,                            //!< Instruction 'cmovc' {CMOV}.
+    kIdCmove,                            //!< Instruction 'cmove' {CMOV}.
+    kIdCmovg,                            //!< Instruction 'cmovg' {CMOV}.
+    kIdCmovge,                           //!< Instruction 'cmovge' {CMOV}.
+    kIdCmovl,                            //!< Instruction 'cmovl' {CMOV}.
+    kIdCmovle,                           //!< Instruction 'cmovle' {CMOV}.
+    kIdCmovna,                           //!< Instruction 'cmovna' {CMOV}.
+    kIdCmovnae,                          //!< Instruction 'cmovnae' {CMOV}.
+    kIdCmovnb,                           //!< Instruction 'cmovnb' {CMOV}.
+    kIdCmovnbe,                          //!< Instruction 'cmovnbe' {CMOV}.
+    kIdCmovnc,                           //!< Instruction 'cmovnc' {CMOV}.
+    kIdCmovne,                           //!< Instruction 'cmovne' {CMOV}.
+    kIdCmovng,                           //!< Instruction 'cmovng' {CMOV}.
+    kIdCmovnge,                          //!< Instruction 'cmovnge' {CMOV}.
+    kIdCmovnl,                           //!< Instruction 'cmovnl' {CMOV}.
+    kIdCmovnle,                          //!< Instruction 'cmovnle' {CMOV}.
+    kIdCmovno,                           //!< Instruction 'cmovno' {CMOV}.
+    kIdCmovnp,                           //!< Instruction 'cmovnp' {CMOV}.
+    kIdCmovns,                           //!< Instruction 'cmovns' {CMOV}.
+    kIdCmovnz,                           //!< Instruction 'cmovnz' {CMOV}.
+    kIdCmovo,                            //!< Instruction 'cmovo' {CMOV}.
+    kIdCmovp,                            //!< Instruction 'cmovp' {CMOV}.
+    kIdCmovpe,                           //!< Instruction 'cmovpe' {CMOV}.
+    kIdCmovpo,                           //!< Instruction 'cmovpo' {CMOV}.
+    kIdCmovs,                            //!< Instruction 'cmovs' {CMOV}.
+    kIdCmovz,                            //!< Instruction 'cmovz' {CMOV}.
+    kIdCmp,                              //!< Instruction 'cmp'.
+    kIdCmppd,                            //!< Instruction 'cmppd' {SSE2}.
+    kIdCmpps,                            //!< Instruction 'cmpps' {SSE}.
+    kIdCmps,                             //!< Instruction 'cmps'.
+    kIdCmpsd,                            //!< Instruction 'cmpsd' {SSE2}.
+    kIdCmpss,                            //!< Instruction 'cmpss' {SSE}.
+    kIdCmpxchg,                          //!< Instruction 'cmpxchg' {I486}.
+    kIdCmpxchg16b,                       //!< Instruction 'cmpxchg16b' {CMPXCHG16B} (X64).
+    kIdCmpxchg8b,                        //!< Instruction 'cmpxchg8b' {CMPXCHG8B}.
+    kIdComisd,                           //!< Instruction 'comisd' {SSE2}.
+    kIdComiss,                           //!< Instruction 'comiss' {SSE}.
+    kIdCpuid,                            //!< Instruction 'cpuid' {I486}.
+    kIdCqo,                              //!< Instruction 'cqo' (X64).
+    kIdCrc32,                            //!< Instruction 'crc32' {SSE4_2}.
+    kIdCvtdq2pd,                         //!< Instruction 'cvtdq2pd' {SSE2}.
+    kIdCvtdq2ps,                         //!< Instruction 'cvtdq2ps' {SSE2}.
+    kIdCvtpd2dq,                         //!< Instruction 'cvtpd2dq' {SSE2}.
+    kIdCvtpd2pi,                         //!< Instruction 'cvtpd2pi' {SSE2}.
+    kIdCvtpd2ps,                         //!< Instruction 'cvtpd2ps' {SSE2}.
+    kIdCvtpi2pd,                         //!< Instruction 'cvtpi2pd' {SSE2}.
+    kIdCvtpi2ps,                         //!< Instruction 'cvtpi2ps' {SSE}.
+    kIdCvtps2dq,                         //!< Instruction 'cvtps2dq' {SSE2}.
+    kIdCvtps2pd,                         //!< Instruction 'cvtps2pd' {SSE2}.
+    kIdCvtps2pi,                         //!< Instruction 'cvtps2pi' {SSE}.
+    kIdCvtsd2si,                         //!< Instruction 'cvtsd2si' {SSE2}.
+    kIdCvtsd2ss,                         //!< Instruction 'cvtsd2ss' {SSE2}.
+    kIdCvtsi2sd,                         //!< Instruction 'cvtsi2sd' {SSE2}.
+    kIdCvtsi2ss,                         //!< Instruction 'cvtsi2ss' {SSE}.
+    kIdCvtss2sd,                         //!< Instruction 'cvtss2sd' {SSE2}.
+    kIdCvtss2si,                         //!< Instruction 'cvtss2si' {SSE}.
+    kIdCvttpd2dq,                        //!< Instruction 'cvttpd2dq' {SSE2}.
+    kIdCvttpd2pi,                        //!< Instruction 'cvttpd2pi' {SSE2}.
+    kIdCvttps2dq,                        //!< Instruction 'cvttps2dq' {SSE2}.
+    kIdCvttps2pi,                        //!< Instruction 'cvttps2pi' {SSE}.
+    kIdCvttsd2si,                        //!< Instruction 'cvttsd2si' {SSE2}.
+    kIdCvttss2si,                        //!< Instruction 'cvttss2si' {SSE}.
+    kIdCwd,                              //!< Instruction 'cwd'.
+    kIdCwde,                             //!< Instruction 'cwde'.
+    kIdDaa,                              //!< Instruction 'daa' (X86).
+    kIdDas,                              //!< Instruction 'das' (X86).
+    kIdDec,                              //!< Instruction 'dec'.
+    kIdDiv,                              //!< Instruction 'div'.
+    kIdDivpd,                            //!< Instruction 'divpd' {SSE2}.
+    kIdDivps,                            //!< Instruction 'divps' {SSE}.
+    kIdDivsd,                            //!< Instruction 'divsd' {SSE2}.
+    kIdDivss,                            //!< Instruction 'divss' {SSE}.
+    kIdDppd,                             //!< Instruction 'dppd' {SSE4_1}.
+    kIdDpps,                             //!< Instruction 'dpps' {SSE4_1}.
+    kIdEmms,                             //!< Instruction 'emms' {MMX}.
+    kIdEndbr32,                          //!< Instruction 'endbr32' {CET_IBT}.
+    kIdEndbr64,                          //!< Instruction 'endbr64' {CET_IBT}.
+    kIdEnqcmd,                           //!< Instruction 'enqcmd' {ENQCMD}.
+    kIdEnqcmds,                          //!< Instruction 'enqcmds' {ENQCMD}.
+    kIdEnter,                            //!< Instruction 'enter'.
+    kIdExtractps,                        //!< Instruction 'extractps' {SSE4_1}.
+    kIdExtrq,                            //!< Instruction 'extrq' {SSE4A}.
+    kIdF2xm1,                            //!< Instruction 'f2xm1'.
+    kIdFabs,                             //!< Instruction 'fabs'.
+    kIdFadd,                             //!< Instruction 'fadd'.
+    kIdFaddp,                            //!< Instruction 'faddp'.
+    kIdFbld,                             //!< Instruction 'fbld'.
+    kIdFbstp,                            //!< Instruction 'fbstp'.
+    kIdFchs,                             //!< Instruction 'fchs'.
+    kIdFclex,                            //!< Instruction 'fclex'.
+    kIdFcmovb,                           //!< Instruction 'fcmovb' {CMOV}.
+    kIdFcmovbe,                          //!< Instruction 'fcmovbe' {CMOV}.
+    kIdFcmove,                           //!< Instruction 'fcmove' {CMOV}.
+    kIdFcmovnb,                          //!< Instruction 'fcmovnb' {CMOV}.
+    kIdFcmovnbe,                         //!< Instruction 'fcmovnbe' {CMOV}.
+    kIdFcmovne,                          //!< Instruction 'fcmovne' {CMOV}.
+    kIdFcmovnu,                          //!< Instruction 'fcmovnu' {CMOV}.
+    kIdFcmovu,                           //!< Instruction 'fcmovu' {CMOV}.
+    kIdFcom,                             //!< Instruction 'fcom'.
+    kIdFcomi,                            //!< Instruction 'fcomi'.
+    kIdFcomip,                           //!< Instruction 'fcomip'.
+    kIdFcomp,                            //!< Instruction 'fcomp'.
+    kIdFcompp,                           //!< Instruction 'fcompp'.
+    kIdFcos,                             //!< Instruction 'fcos'.
+    kIdFdecstp,                          //!< Instruction 'fdecstp'.
+    kIdFdiv,                             //!< Instruction 'fdiv'.
+    kIdFdivp,                            //!< Instruction 'fdivp'.
+    kIdFdivr,                            //!< Instruction 'fdivr'.
+    kIdFdivrp,                           //!< Instruction 'fdivrp'.
+    kIdFemms,                            //!< Instruction 'femms' {3DNOW}.
+    kIdFfree,                            //!< Instruction 'ffree'.
+    kIdFiadd,                            //!< Instruction 'fiadd'.
+    kIdFicom,                            //!< Instruction 'ficom'.
+    kIdFicomp,                           //!< Instruction 'ficomp'.
+    kIdFidiv,                            //!< Instruction 'fidiv'.
+    kIdFidivr,                           //!< Instruction 'fidivr'.
+    kIdFild,                             //!< Instruction 'fild'.
+    kIdFimul,                            //!< Instruction 'fimul'.
+    kIdFincstp,                          //!< Instruction 'fincstp'.
+    kIdFinit,                            //!< Instruction 'finit'.
+    kIdFist,                             //!< Instruction 'fist'.
+    kIdFistp,                            //!< Instruction 'fistp'.
+    kIdFisttp,                           //!< Instruction 'fisttp' {SSE3}.
+    kIdFisub,                            //!< Instruction 'fisub'.
+    kIdFisubr,                           //!< Instruction 'fisubr'.
+    kIdFld,                              //!< Instruction 'fld'.
+    kIdFld1,                             //!< Instruction 'fld1'.
+    kIdFldcw,                            //!< Instruction 'fldcw'.
+    kIdFldenv,                           //!< Instruction 'fldenv'.
+    kIdFldl2e,                           //!< Instruction 'fldl2e'.
+    kIdFldl2t,                           //!< Instruction 'fldl2t'.
+    kIdFldlg2,                           //!< Instruction 'fldlg2'.
+    kIdFldln2,                           //!< Instruction 'fldln2'.
+    kIdFldpi,                            //!< Instruction 'fldpi'.
+    kIdFldz,                             //!< Instruction 'fldz'.
+    kIdFmul,                             //!< Instruction 'fmul'.
+    kIdFmulp,                            //!< Instruction 'fmulp'.
+    kIdFnclex,                           //!< Instruction 'fnclex'.
+    kIdFninit,                           //!< Instruction 'fninit'.
+    kIdFnop,                             //!< Instruction 'fnop'.
+    kIdFnsave,                           //!< Instruction 'fnsave'.
+    kIdFnstcw,                           //!< Instruction 'fnstcw'.
+    kIdFnstenv,                          //!< Instruction 'fnstenv'.
+    kIdFnstsw,                           //!< Instruction 'fnstsw'.
+    kIdFpatan,                           //!< Instruction 'fpatan'.
+    kIdFprem,                            //!< Instruction 'fprem'.
+    kIdFprem1,                           //!< Instruction 'fprem1'.
+    kIdFptan,                            //!< Instruction 'fptan'.
+    kIdFrndint,                          //!< Instruction 'frndint'.
+    kIdFrstor,                           //!< Instruction 'frstor'.
+    kIdFsave,                            //!< Instruction 'fsave'.
+    kIdFscale,                           //!< Instruction 'fscale'.
+    kIdFsin,                             //!< Instruction 'fsin'.
+    kIdFsincos,                          //!< Instruction 'fsincos'.
+    kIdFsqrt,                            //!< Instruction 'fsqrt'.
+    kIdFst,                              //!< Instruction 'fst'.
+    kIdFstcw,                            //!< Instruction 'fstcw'.
+    kIdFstenv,                           //!< Instruction 'fstenv'.
+    kIdFstp,                             //!< Instruction 'fstp'.
+    kIdFstsw,                            //!< Instruction 'fstsw'.
+    kIdFsub,                             //!< Instruction 'fsub'.
+    kIdFsubp,                            //!< Instruction 'fsubp'.
+    kIdFsubr,                            //!< Instruction 'fsubr'.
+    kIdFsubrp,                           //!< Instruction 'fsubrp'.
+    kIdFtst,                             //!< Instruction 'ftst'.
+    kIdFucom,                            //!< Instruction 'fucom'.
+    kIdFucomi,                           //!< Instruction 'fucomi'.
+    kIdFucomip,                          //!< Instruction 'fucomip'.
+    kIdFucomp,                           //!< Instruction 'fucomp'.
+    kIdFucompp,                          //!< Instruction 'fucompp'.
+    kIdFwait,                            //!< Instruction 'fwait'.
+    kIdFxam,                             //!< Instruction 'fxam'.
+    kIdFxch,                             //!< Instruction 'fxch'.
+    kIdFxrstor,                          //!< Instruction 'fxrstor' {FXSR}.
+    kIdFxrstor64,                        //!< Instruction 'fxrstor64' {FXSR} (X64).
+    kIdFxsave,                           //!< Instruction 'fxsave' {FXSR}.
+    kIdFxsave64,                         //!< Instruction 'fxsave64' {FXSR} (X64).
+    kIdFxtract,                          //!< Instruction 'fxtract'.
+    kIdFyl2x,                            //!< Instruction 'fyl2x'.
+    kIdFyl2xp1,                          //!< Instruction 'fyl2xp1'.
+    kIdGetsec,                           //!< Instruction 'getsec' {SMX}.
+    kIdGf2p8affineinvqb,                 //!< Instruction 'gf2p8affineinvqb' {GFNI}.
+    kIdGf2p8affineqb,                    //!< Instruction 'gf2p8affineqb' {GFNI}.
+    kIdGf2p8mulb,                        //!< Instruction 'gf2p8mulb' {GFNI}.
+    kIdHaddpd,                           //!< Instruction 'haddpd' {SSE3}.
+    kIdHaddps,                           //!< Instruction 'haddps' {SSE3}.
+    kIdHlt,                              //!< Instruction 'hlt'.
+    kIdHreset,                           //!< Instruction 'hreset' {HRESET}.
+    kIdHsubpd,                           //!< Instruction 'hsubpd' {SSE3}.
+    kIdHsubps,                           //!< Instruction 'hsubps' {SSE3}.
+    kIdIdiv,                             //!< Instruction 'idiv'.
+    kIdImul,                             //!< Instruction 'imul'.
+    kIdIn,                               //!< Instruction 'in'.
+    kIdInc,                              //!< Instruction 'inc'.
+    kIdIncsspd,                          //!< Instruction 'incsspd' {CET_SS}.
+    kIdIncsspq,                          //!< Instruction 'incsspq' {CET_SS} (X64).
+    kIdIns,                              //!< Instruction 'ins'.
+    kIdInsertps,                         //!< Instruction 'insertps' {SSE4_1}.
+    kIdInsertq,                          //!< Instruction 'insertq' {SSE4A}.
+    kIdInt,                              //!< Instruction 'int'.
+    kIdInt3,                             //!< Instruction 'int3'.
+    kIdInto,                             //!< Instruction 'into' (X86).
+    kIdInvd,                             //!< Instruction 'invd' {I486}.
+    kIdInvept,                           //!< Instruction 'invept' {VMX}.
+    kIdInvlpg,                           //!< Instruction 'invlpg' {I486}.
+    kIdInvlpga,                          //!< Instruction 'invlpga' {SVM}.
+    kIdInvpcid,                          //!< Instruction 'invpcid' {I486}.
+    kIdInvvpid,                          //!< Instruction 'invvpid' {VMX}.
+    kIdIret,                             //!< Instruction 'iret'.
+    kIdIretd,                            //!< Instruction 'iretd'.
+    kIdIretq,                            //!< Instruction 'iretq' (X64).
+    kIdJa,                               //!< Instruction 'ja'.
+    kIdJae,                              //!< Instruction 'jae'.
+    kIdJb,                               //!< Instruction 'jb'.
+    kIdJbe,                              //!< Instruction 'jbe'.
+    kIdJc,                               //!< Instruction 'jc'.
+    kIdJe,                               //!< Instruction 'je'.
+    kIdJecxz,                            //!< Instruction 'jecxz'.
+    kIdJg,                               //!< Instruction 'jg'.
+    kIdJge,                              //!< Instruction 'jge'.
+    kIdJl,                               //!< Instruction 'jl'.
+    kIdJle,                              //!< Instruction 'jle'.
+    kIdJmp,                              //!< Instruction 'jmp'.
+    kIdJna,                              //!< Instruction 'jna'.
+    kIdJnae,                             //!< Instruction 'jnae'.
+    kIdJnb,                              //!< Instruction 'jnb'.
+    kIdJnbe,                             //!< Instruction 'jnbe'.
+    kIdJnc,                              //!< Instruction 'jnc'.
+    kIdJne,                              //!< Instruction 'jne'.
+    kIdJng,                              //!< Instruction 'jng'.
+    kIdJnge,                             //!< Instruction 'jnge'.
+    kIdJnl,                              //!< Instruction 'jnl'.
+    kIdJnle,                             //!< Instruction 'jnle'.
+    kIdJno,                              //!< Instruction 'jno'.
+    kIdJnp,                              //!< Instruction 'jnp'.
+    kIdJns,                              //!< Instruction 'jns'.
+    kIdJnz,                              //!< Instruction 'jnz'.
+    kIdJo,                               //!< Instruction 'jo'.
+    kIdJp,                               //!< Instruction 'jp'.
+    kIdJpe,                              //!< Instruction 'jpe'.
+    kIdJpo,                              //!< Instruction 'jpo'.
+    kIdJs,                               //!< Instruction 'js'.
+    kIdJz,                               //!< Instruction 'jz'.
+    kIdKaddb,                            //!< Instruction 'kaddb' {AVX512_DQ}.
+    kIdKaddd,                            //!< Instruction 'kaddd' {AVX512_BW}.
+    kIdKaddq,                            //!< Instruction 'kaddq' {AVX512_BW}.
+    kIdKaddw,                            //!< Instruction 'kaddw' {AVX512_DQ}.
+    kIdKandb,                            //!< Instruction 'kandb' {AVX512_DQ}.
+    kIdKandd,                            //!< Instruction 'kandd' {AVX512_BW}.
+    kIdKandnb,                           //!< Instruction 'kandnb' {AVX512_DQ}.
+    kIdKandnd,                           //!< Instruction 'kandnd' {AVX512_BW}.
+    kIdKandnq,                           //!< Instruction 'kandnq' {AVX512_BW}.
+    kIdKandnw,                           //!< Instruction 'kandnw' {AVX512_F}.
+    kIdKandq,                            //!< Instruction 'kandq' {AVX512_BW}.
+    kIdKandw,                            //!< Instruction 'kandw' {AVX512_F}.
+    kIdKmovb,                            //!< Instruction 'kmovb' {AVX512_DQ}.
+    kIdKmovd,                            //!< Instruction 'kmovd' {AVX512_BW}.
+    kIdKmovq,                            //!< Instruction 'kmovq' {AVX512_BW}.
+    kIdKmovw,                            //!< Instruction 'kmovw' {AVX512_F}.
+    kIdKnotb,                            //!< Instruction 'knotb' {AVX512_DQ}.
+    kIdKnotd,                            //!< Instruction 'knotd' {AVX512_BW}.
+    kIdKnotq,                            //!< Instruction 'knotq' {AVX512_BW}.
+    kIdKnotw,                            //!< Instruction 'knotw' {AVX512_F}.
+    kIdKorb,                             //!< Instruction 'korb' {AVX512_DQ}.
+    kIdKord,                             //!< Instruction 'kord' {AVX512_BW}.
+    kIdKorq,                             //!< Instruction 'korq' {AVX512_BW}.
+    kIdKortestb,                         //!< Instruction 'kortestb' {AVX512_DQ}.
+    kIdKortestd,                         //!< Instruction 'kortestd' {AVX512_BW}.
+    kIdKortestq,                         //!< Instruction 'kortestq' {AVX512_BW}.
+    kIdKortestw,                         //!< Instruction 'kortestw' {AVX512_F}.
+    kIdKorw,                             //!< Instruction 'korw' {AVX512_F}.
+    kIdKshiftlb,                         //!< Instruction 'kshiftlb' {AVX512_DQ}.
+    kIdKshiftld,                         //!< Instruction 'kshiftld' {AVX512_BW}.
+    kIdKshiftlq,                         //!< Instruction 'kshiftlq' {AVX512_BW}.
+    kIdKshiftlw,                         //!< Instruction 'kshiftlw' {AVX512_F}.
+    kIdKshiftrb,                         //!< Instruction 'kshiftrb' {AVX512_DQ}.
+    kIdKshiftrd,                         //!< Instruction 'kshiftrd' {AVX512_BW}.
+    kIdKshiftrq,                         //!< Instruction 'kshiftrq' {AVX512_BW}.
+    kIdKshiftrw,                         //!< Instruction 'kshiftrw' {AVX512_F}.
+    kIdKtestb,                           //!< Instruction 'ktestb' {AVX512_DQ}.
+    kIdKtestd,                           //!< Instruction 'ktestd' {AVX512_BW}.
+    kIdKtestq,                           //!< Instruction 'ktestq' {AVX512_BW}.
+    kIdKtestw,                           //!< Instruction 'ktestw' {AVX512_DQ}.
+    kIdKunpckbw,                         //!< Instruction 'kunpckbw' {AVX512_F}.
+    kIdKunpckdq,                         //!< Instruction 'kunpckdq' {AVX512_BW}.
+    kIdKunpckwd,                         //!< Instruction 'kunpckwd' {AVX512_BW}.
+    kIdKxnorb,                           //!< Instruction 'kxnorb' {AVX512_DQ}.
+    kIdKxnord,                           //!< Instruction 'kxnord' {AVX512_BW}.
+    kIdKxnorq,                           //!< Instruction 'kxnorq' {AVX512_BW}.
+    kIdKxnorw,                           //!< Instruction 'kxnorw' {AVX512_F}.
+    kIdKxorb,                            //!< Instruction 'kxorb' {AVX512_DQ}.
+    kIdKxord,                            //!< Instruction 'kxord' {AVX512_BW}.
+    kIdKxorq,                            //!< Instruction 'kxorq' {AVX512_BW}.
+    kIdKxorw,                            //!< Instruction 'kxorw' {AVX512_F}.
+    kIdLahf,                             //!< Instruction 'lahf' {LAHFSAHF}.
+    kIdLar,                              //!< Instruction 'lar'.
+    kIdLcall,                            //!< Instruction 'lcall'.
+    kIdLddqu,                            //!< Instruction 'lddqu' {SSE3}.
+    kIdLdmxcsr,                          //!< Instruction 'ldmxcsr' {SSE}.
+    kIdLds,                              //!< Instruction 'lds' (X86).
+    kIdLdtilecfg,                        //!< Instruction 'ldtilecfg' {AMX_TILE} (X64).
+    kIdLea,                              //!< Instruction 'lea'.
+    kIdLeave,                            //!< Instruction 'leave'.
+    kIdLes,                              //!< Instruction 'les' (X86).
+    kIdLfence,                           //!< Instruction 'lfence' {SSE2}.
+    kIdLfs,                              //!< Instruction 'lfs'.
+    kIdLgdt,                             //!< Instruction 'lgdt'.
+    kIdLgs,                              //!< Instruction 'lgs'.
+    kIdLidt,                             //!< Instruction 'lidt'.
+    kIdLjmp,                             //!< Instruction 'ljmp'.
+    kIdLldt,                             //!< Instruction 'lldt'.
+    kIdLlwpcb,                           //!< Instruction 'llwpcb' {LWP}.
+    kIdLmsw,                             //!< Instruction 'lmsw'.
+    kIdLods,                             //!< Instruction 'lods'.
+    kIdLoop,                             //!< Instruction 'loop'.
+    kIdLoope,                            //!< Instruction 'loope'.
+    kIdLoopne,                           //!< Instruction 'loopne'.
+    kIdLsl,                              //!< Instruction 'lsl'.
+    kIdLss,                              //!< Instruction 'lss'.
+    kIdLtr,                              //!< Instruction 'ltr'.
+    kIdLwpins,                           //!< Instruction 'lwpins' {LWP}.
+    kIdLwpval,                           //!< Instruction 'lwpval' {LWP}.
+    kIdLzcnt,                            //!< Instruction 'lzcnt' {LZCNT}.
+    kIdMaskmovdqu,                       //!< Instruction 'maskmovdqu' {SSE2}.
+    kIdMaskmovq,                         //!< Instruction 'maskmovq' {MMX2}.
+    kIdMaxpd,                            //!< Instruction 'maxpd' {SSE2}.
+    kIdMaxps,                            //!< Instruction 'maxps' {SSE}.
+    kIdMaxsd,                            //!< Instruction 'maxsd' {SSE2}.
+    kIdMaxss,                            //!< Instruction 'maxss' {SSE}.
+    kIdMcommit,                          //!< Instruction 'mcommit' {MCOMMIT}.
+    kIdMfence,                           //!< Instruction 'mfence' {SSE2}.
+    kIdMinpd,                            //!< Instruction 'minpd' {SSE2}.
+    kIdMinps,                            //!< Instruction 'minps' {SSE}.
+    kIdMinsd,                            //!< Instruction 'minsd' {SSE2}.
+    kIdMinss,                            //!< Instruction 'minss' {SSE}.
+    kIdMonitor,                          //!< Instruction 'monitor' {MONITOR}.
+    kIdMonitorx,                         //!< Instruction 'monitorx' {MONITORX}.
+    kIdMov,                              //!< Instruction 'mov'.
+    kIdMovabs,                           //!< Instruction 'movabs' (X64).
+    kIdMovapd,                           //!< Instruction 'movapd' {SSE2}.
+    kIdMovaps,                           //!< Instruction 'movaps' {SSE}.
+    kIdMovbe,                            //!< Instruction 'movbe' {MOVBE}.
+    kIdMovd,                             //!< Instruction 'movd' {MMX|SSE2}.
+    kIdMovddup,                          //!< Instruction 'movddup' {SSE3}.
+    kIdMovdir64b,                        //!< Instruction 'movdir64b' {MOVDIR64B}.
+    kIdMovdiri,                          //!< Instruction 'movdiri' {MOVDIRI}.
+    kIdMovdq2q,                          //!< Instruction 'movdq2q' {SSE2}.
+    kIdMovdqa,                           //!< Instruction 'movdqa' {SSE2}.
+    kIdMovdqu,                           //!< Instruction 'movdqu' {SSE2}.
+    kIdMovhlps,                          //!< Instruction 'movhlps' {SSE}.
+    kIdMovhpd,                           //!< Instruction 'movhpd' {SSE2}.
+    kIdMovhps,                           //!< Instruction 'movhps' {SSE}.
+    kIdMovlhps,                          //!< Instruction 'movlhps' {SSE}.
+    kIdMovlpd,                           //!< Instruction 'movlpd' {SSE2}.
+    kIdMovlps,                           //!< Instruction 'movlps' {SSE}.
+    kIdMovmskpd,                         //!< Instruction 'movmskpd' {SSE2}.
+    kIdMovmskps,                         //!< Instruction 'movmskps' {SSE}.
+    kIdMovntdq,                          //!< Instruction 'movntdq' {SSE2}.
+    kIdMovntdqa,                         //!< Instruction 'movntdqa' {SSE4_1}.
+    kIdMovnti,                           //!< Instruction 'movnti' {SSE2}.
+    kIdMovntpd,                          //!< Instruction 'movntpd' {SSE2}.
+    kIdMovntps,                          //!< Instruction 'movntps' {SSE}.
+    kIdMovntq,                           //!< Instruction 'movntq' {MMX2}.
+    kIdMovntsd,                          //!< Instruction 'movntsd' {SSE4A}.
+    kIdMovntss,                          //!< Instruction 'movntss' {SSE4A}.
+    kIdMovq,                             //!< Instruction 'movq' {MMX|SSE2}.
+    kIdMovq2dq,                          //!< Instruction 'movq2dq' {SSE2}.
+    kIdMovs,                             //!< Instruction 'movs'.
+    kIdMovsd,                            //!< Instruction 'movsd' {SSE2}.
+    kIdMovshdup,                         //!< Instruction 'movshdup' {SSE3}.
+    kIdMovsldup,                         //!< Instruction 'movsldup' {SSE3}.
+    kIdMovss,                            //!< Instruction 'movss' {SSE}.
+    kIdMovsx,                            //!< Instruction 'movsx'.
+    kIdMovsxd,                           //!< Instruction 'movsxd' (X64).
+    kIdMovupd,                           //!< Instruction 'movupd' {SSE2}.
+    kIdMovups,                           //!< Instruction 'movups' {SSE}.
+    kIdMovzx,                            //!< Instruction 'movzx'.
+    kIdMpsadbw,                          //!< Instruction 'mpsadbw' {SSE4_1}.
+    kIdMul,                              //!< Instruction 'mul'.
+    kIdMulpd,                            //!< Instruction 'mulpd' {SSE2}.
+    kIdMulps,                            //!< Instruction 'mulps' {SSE}.
+    kIdMulsd,                            //!< Instruction 'mulsd' {SSE2}.
+    kIdMulss,                            //!< Instruction 'mulss' {SSE}.
+    kIdMulx,                             //!< Instruction 'mulx' {BMI2}.
+    kIdMwait,                            //!< Instruction 'mwait' {MONITOR}.
+    kIdMwaitx,                           //!< Instruction 'mwaitx' {MONITORX}.
+    kIdNeg,                              //!< Instruction 'neg'.
+    kIdNop,                              //!< Instruction 'nop'.
+    kIdNot,                              //!< Instruction 'not'.
+    kIdOr,                               //!< Instruction 'or'.
+    kIdOrpd,                             //!< Instruction 'orpd' {SSE2}.
+    kIdOrps,                             //!< Instruction 'orps' {SSE}.
+    kIdOut,                              //!< Instruction 'out'.
+    kIdOuts,                             //!< Instruction 'outs'.
+    kIdPabsb,                            //!< Instruction 'pabsb' {SSSE3}.
+    kIdPabsd,                            //!< Instruction 'pabsd' {SSSE3}.
+    kIdPabsw,                            //!< Instruction 'pabsw' {SSSE3}.
+    kIdPackssdw,                         //!< Instruction 'packssdw' {MMX|SSE2}.
+    kIdPacksswb,                         //!< Instruction 'packsswb' {MMX|SSE2}.
+    kIdPackusdw,                         //!< Instruction 'packusdw' {SSE4_1}.
+    kIdPackuswb,                         //!< Instruction 'packuswb' {MMX|SSE2}.
+    kIdPaddb,                            //!< Instruction 'paddb' {MMX|SSE2}.
+    kIdPaddd,                            //!< Instruction 'paddd' {MMX|SSE2}.
+    kIdPaddq,                            //!< Instruction 'paddq' {SSE2}.
+    kIdPaddsb,                           //!< Instruction 'paddsb' {MMX|SSE2}.
+    kIdPaddsw,                           //!< Instruction 'paddsw' {MMX|SSE2}.
+    kIdPaddusb,                          //!< Instruction 'paddusb' {MMX|SSE2}.
+    kIdPaddusw,                          //!< Instruction 'paddusw' {MMX|SSE2}.
+    kIdPaddw,                            //!< Instruction 'paddw' {MMX|SSE2}.
+    kIdPalignr,                          //!< Instruction 'palignr' {SSE3}.
+    kIdPand,                             //!< Instruction 'pand' {MMX|SSE2}.
+    kIdPandn,                            //!< Instruction 'pandn' {MMX|SSE2}.
+    kIdPause,                            //!< Instruction 'pause'.
+    kIdPavgb,                            //!< Instruction 'pavgb' {MMX2|SSE2}.
+    kIdPavgusb,                          //!< Instruction 'pavgusb' {3DNOW}.
+    kIdPavgw,                            //!< Instruction 'pavgw' {MMX2|SSE2}.
+    kIdPblendvb,                         //!< Instruction 'pblendvb' {SSE4_1}.
+    kIdPblendw,                          //!< Instruction 'pblendw' {SSE4_1}.
+    kIdPclmulqdq,                        //!< Instruction 'pclmulqdq' {PCLMULQDQ}.
+    kIdPcmpeqb,                          //!< Instruction 'pcmpeqb' {MMX|SSE2}.
+    kIdPcmpeqd,                          //!< Instruction 'pcmpeqd' {MMX|SSE2}.
+    kIdPcmpeqq,                          //!< Instruction 'pcmpeqq' {SSE4_1}.
+    kIdPcmpeqw,                          //!< Instruction 'pcmpeqw' {MMX|SSE2}.
+    kIdPcmpestri,                        //!< Instruction 'pcmpestri' {SSE4_2}.
+    kIdPcmpestrm,                        //!< Instruction 'pcmpestrm' {SSE4_2}.
+    kIdPcmpgtb,                          //!< Instruction 'pcmpgtb' {MMX|SSE2}.
+    kIdPcmpgtd,                          //!< Instruction 'pcmpgtd' {MMX|SSE2}.
+    kIdPcmpgtq,                          //!< Instruction 'pcmpgtq' {SSE4_2}.
+    kIdPcmpgtw,                          //!< Instruction 'pcmpgtw' {MMX|SSE2}.
+    kIdPcmpistri,                        //!< Instruction 'pcmpistri' {SSE4_2}.
+    kIdPcmpistrm,                        //!< Instruction 'pcmpistrm' {SSE4_2}.
+    kIdPconfig,                          //!< Instruction 'pconfig' {PCONFIG}.
+    kIdPdep,                             //!< Instruction 'pdep' {BMI2}.
+    kIdPext,                             //!< Instruction 'pext' {BMI2}.
+    kIdPextrb,                           //!< Instruction 'pextrb' {SSE4_1}.
+    kIdPextrd,                           //!< Instruction 'pextrd' {SSE4_1}.
+    kIdPextrq,                           //!< Instruction 'pextrq' {SSE4_1} (X64).
+    kIdPextrw,                           //!< Instruction 'pextrw' {MMX2|SSE2|SSE4_1}.
+    kIdPf2id,                            //!< Instruction 'pf2id' {3DNOW}.
+    kIdPf2iw,                            //!< Instruction 'pf2iw' {3DNOW2}.
+    kIdPfacc,                            //!< Instruction 'pfacc' {3DNOW}.
+    kIdPfadd,                            //!< Instruction 'pfadd' {3DNOW}.
+    kIdPfcmpeq,                          //!< Instruction 'pfcmpeq' {3DNOW}.
+    kIdPfcmpge,                          //!< Instruction 'pfcmpge' {3DNOW}.
+    kIdPfcmpgt,                          //!< Instruction 'pfcmpgt' {3DNOW}.
+    kIdPfmax,                            //!< Instruction 'pfmax' {3DNOW}.
+    kIdPfmin,                            //!< Instruction 'pfmin' {3DNOW}.
+    kIdPfmul,                            //!< Instruction 'pfmul' {3DNOW}.
+    kIdPfnacc,                           //!< Instruction 'pfnacc' {3DNOW2}.
+    kIdPfpnacc,                          //!< Instruction 'pfpnacc' {3DNOW2}.
+    kIdPfrcp,                            //!< Instruction 'pfrcp' {3DNOW}.
+    kIdPfrcpit1,                         //!< Instruction 'pfrcpit1' {3DNOW}.
+    kIdPfrcpit2,                         //!< Instruction 'pfrcpit2' {3DNOW}.
+    kIdPfrcpv,                           //!< Instruction 'pfrcpv' {GEODE}.
+    kIdPfrsqit1,                         //!< Instruction 'pfrsqit1' {3DNOW}.
+    kIdPfrsqrt,                          //!< Instruction 'pfrsqrt' {3DNOW}.
+    kIdPfrsqrtv,                         //!< Instruction 'pfrsqrtv' {GEODE}.
+    kIdPfsub,                            //!< Instruction 'pfsub' {3DNOW}.
+    kIdPfsubr,                           //!< Instruction 'pfsubr' {3DNOW}.
+    kIdPhaddd,                           //!< Instruction 'phaddd' {SSSE3}.
+    kIdPhaddsw,                          //!< Instruction 'phaddsw' {SSSE3}.
+    kIdPhaddw,                           //!< Instruction 'phaddw' {SSSE3}.
+    kIdPhminposuw,                       //!< Instruction 'phminposuw' {SSE4_1}.
+    kIdPhsubd,                           //!< Instruction 'phsubd' {SSSE3}.
+    kIdPhsubsw,                          //!< Instruction 'phsubsw' {SSSE3}.
+    kIdPhsubw,                           //!< Instruction 'phsubw' {SSSE3}.
+    kIdPi2fd,                            //!< Instruction 'pi2fd' {3DNOW}.
+    kIdPi2fw,                            //!< Instruction 'pi2fw' {3DNOW2}.
+    kIdPinsrb,                           //!< Instruction 'pinsrb' {SSE4_1}.
+    kIdPinsrd,                           //!< Instruction 'pinsrd' {SSE4_1}.
+    kIdPinsrq,                           //!< Instruction 'pinsrq' {SSE4_1} (X64).
+    kIdPinsrw,                           //!< Instruction 'pinsrw' {MMX2|SSE2}.
+    kIdPmaddubsw,                        //!< Instruction 'pmaddubsw' {SSSE3}.
+    kIdPmaddwd,                          //!< Instruction 'pmaddwd' {MMX|SSE2}.
+    kIdPmaxsb,                           //!< Instruction 'pmaxsb' {SSE4_1}.
+    kIdPmaxsd,                           //!< Instruction 'pmaxsd' {SSE4_1}.
+    kIdPmaxsw,                           //!< Instruction 'pmaxsw' {MMX2|SSE2}.
+    kIdPmaxub,                           //!< Instruction 'pmaxub' {MMX2|SSE2}.
+    kIdPmaxud,                           //!< Instruction 'pmaxud' {SSE4_1}.
+    kIdPmaxuw,                           //!< Instruction 'pmaxuw' {SSE4_1}.
+    kIdPminsb,                           //!< Instruction 'pminsb' {SSE4_1}.
+    kIdPminsd,                           //!< Instruction 'pminsd' {SSE4_1}.
+    kIdPminsw,                           //!< Instruction 'pminsw' {MMX2|SSE2}.
+    kIdPminub,                           //!< Instruction 'pminub' {MMX2|SSE2}.
+    kIdPminud,                           //!< Instruction 'pminud' {SSE4_1}.
+    kIdPminuw,                           //!< Instruction 'pminuw' {SSE4_1}.
+    kIdPmovmskb,                         //!< Instruction 'pmovmskb' {MMX2|SSE2}.
+    kIdPmovsxbd,                         //!< Instruction 'pmovsxbd' {SSE4_1}.
+    kIdPmovsxbq,                         //!< Instruction 'pmovsxbq' {SSE4_1}.
+    kIdPmovsxbw,                         //!< Instruction 'pmovsxbw' {SSE4_1}.
+    kIdPmovsxdq,                         //!< Instruction 'pmovsxdq' {SSE4_1}.
+    kIdPmovsxwd,                         //!< Instruction 'pmovsxwd' {SSE4_1}.
+    kIdPmovsxwq,                         //!< Instruction 'pmovsxwq' {SSE4_1}.
+    kIdPmovzxbd,                         //!< Instruction 'pmovzxbd' {SSE4_1}.
+    kIdPmovzxbq,                         //!< Instruction 'pmovzxbq' {SSE4_1}.
+    kIdPmovzxbw,                         //!< Instruction 'pmovzxbw' {SSE4_1}.
+    kIdPmovzxdq,                         //!< Instruction 'pmovzxdq' {SSE4_1}.
+    kIdPmovzxwd,                         //!< Instruction 'pmovzxwd' {SSE4_1}.
+    kIdPmovzxwq,                         //!< Instruction 'pmovzxwq' {SSE4_1}.
+    kIdPmuldq,                           //!< Instruction 'pmuldq' {SSE4_1}.
+    kIdPmulhrsw,                         //!< Instruction 'pmulhrsw' {SSSE3}.
+    kIdPmulhrw,                          //!< Instruction 'pmulhrw' {3DNOW}.
+    kIdPmulhuw,                          //!< Instruction 'pmulhuw' {MMX2|SSE2}.
+    kIdPmulhw,                           //!< Instruction 'pmulhw' {MMX|SSE2}.
+    kIdPmulld,                           //!< Instruction 'pmulld' {SSE4_1}.
+    kIdPmullw,                           //!< Instruction 'pmullw' {MMX|SSE2}.
+    kIdPmuludq,                          //!< Instruction 'pmuludq' {SSE2}.
+    kIdPop,                              //!< Instruction 'pop'.
+    kIdPopa,                             //!< Instruction 'popa' (X86).
+    kIdPopad,                            //!< Instruction 'popad' (X86).
+    kIdPopcnt,                           //!< Instruction 'popcnt' {POPCNT}.
+    kIdPopf,                             //!< Instruction 'popf'.
+    kIdPopfd,                            //!< Instruction 'popfd' (X86).
+    kIdPopfq,                            //!< Instruction 'popfq' (X64).
+    kIdPor,                              //!< Instruction 'por' {MMX|SSE2}.
+    kIdPrefetch,                         //!< Instruction 'prefetch' {3DNOW}.
+    kIdPrefetchnta,                      //!< Instruction 'prefetchnta' {MMX2}.
+    kIdPrefetcht0,                       //!< Instruction 'prefetcht0' {MMX2}.
+    kIdPrefetcht1,                       //!< Instruction 'prefetcht1' {MMX2}.
+    kIdPrefetcht2,                       //!< Instruction 'prefetcht2' {MMX2}.
+    kIdPrefetchw,                        //!< Instruction 'prefetchw' {PREFETCHW}.
+    kIdPrefetchwt1,                      //!< Instruction 'prefetchwt1' {PREFETCHWT1}.
+    kIdPsadbw,                           //!< Instruction 'psadbw' {MMX2|SSE2}.
+    kIdPshufb,                           //!< Instruction 'pshufb' {SSSE3}.
+    kIdPshufd,                           //!< Instruction 'pshufd' {SSE2}.
+    kIdPshufhw,                          //!< Instruction 'pshufhw' {SSE2}.
+    kIdPshuflw,                          //!< Instruction 'pshuflw' {SSE2}.
+    kIdPshufw,                           //!< Instruction 'pshufw' {MMX2}.
+    kIdPsignb,                           //!< Instruction 'psignb' {SSSE3}.
+    kIdPsignd,                           //!< Instruction 'psignd' {SSSE3}.
+    kIdPsignw,                           //!< Instruction 'psignw' {SSSE3}.
+    kIdPslld,                            //!< Instruction 'pslld' {MMX|SSE2}.
+    kIdPslldq,                           //!< Instruction 'pslldq' {SSE2}.
+    kIdPsllq,                            //!< Instruction 'psllq' {MMX|SSE2}.
+    kIdPsllw,                            //!< Instruction 'psllw' {MMX|SSE2}.
+    kIdPsmash,                           //!< Instruction 'psmash' {SNP} (X64).
+    kIdPsrad,                            //!< Instruction 'psrad' {MMX|SSE2}.
+    kIdPsraw,                            //!< Instruction 'psraw' {MMX|SSE2}.
+    kIdPsrld,                            //!< Instruction 'psrld' {MMX|SSE2}.
+    kIdPsrldq,                           //!< Instruction 'psrldq' {SSE2}.
+    kIdPsrlq,                            //!< Instruction 'psrlq' {MMX|SSE2}.
+    kIdPsrlw,                            //!< Instruction 'psrlw' {MMX|SSE2}.
+    kIdPsubb,                            //!< Instruction 'psubb' {MMX|SSE2}.
+    kIdPsubd,                            //!< Instruction 'psubd' {MMX|SSE2}.
+    kIdPsubq,                            //!< Instruction 'psubq' {SSE2}.
+    kIdPsubsb,                           //!< Instruction 'psubsb' {MMX|SSE2}.
+    kIdPsubsw,                           //!< Instruction 'psubsw' {MMX|SSE2}.
+    kIdPsubusb,                          //!< Instruction 'psubusb' {MMX|SSE2}.
+    kIdPsubusw,                          //!< Instruction 'psubusw' {MMX|SSE2}.
+    kIdPsubw,                            //!< Instruction 'psubw' {MMX|SSE2}.
+    kIdPswapd,                           //!< Instruction 'pswapd' {3DNOW2}.
+    kIdPtest,                            //!< Instruction 'ptest' {SSE4_1}.
+    kIdPtwrite,                          //!< Instruction 'ptwrite' {PTWRITE}.
+    kIdPunpckhbw,                        //!< Instruction 'punpckhbw' {MMX|SSE2}.
+    kIdPunpckhdq,                        //!< Instruction 'punpckhdq' {MMX|SSE2}.
+    kIdPunpckhqdq,                       //!< Instruction 'punpckhqdq' {SSE2}.
+    kIdPunpckhwd,                        //!< Instruction 'punpckhwd' {MMX|SSE2}.
+    kIdPunpcklbw,                        //!< Instruction 'punpcklbw' {MMX|SSE2}.
+    kIdPunpckldq,                        //!< Instruction 'punpckldq' {MMX|SSE2}.
+    kIdPunpcklqdq,                       //!< Instruction 'punpcklqdq' {SSE2}.
+    kIdPunpcklwd,                        //!< Instruction 'punpcklwd' {MMX|SSE2}.
+    kIdPush,                             //!< Instruction 'push'.
+    kIdPusha,                            //!< Instruction 'pusha' (X86).
+    kIdPushad,                           //!< Instruction 'pushad' (X86).
+    kIdPushf,                            //!< Instruction 'pushf'.
+    kIdPushfd,                           //!< Instruction 'pushfd' (X86).
+    kIdPushfq,                           //!< Instruction 'pushfq' (X64).
+    kIdPvalidate,                        //!< Instruction 'pvalidate' {SNP}.
+    kIdPxor,                             //!< Instruction 'pxor' {MMX|SSE2}.
+    kIdRcl,                              //!< Instruction 'rcl'.
+    kIdRcpps,                            //!< Instruction 'rcpps' {SSE}.
+    kIdRcpss,                            //!< Instruction 'rcpss' {SSE}.
+    kIdRcr,                              //!< Instruction 'rcr'.
+    kIdRdfsbase,                         //!< Instruction 'rdfsbase' {FSGSBASE} (X64).
+    kIdRdgsbase,                         //!< Instruction 'rdgsbase' {FSGSBASE} (X64).
+    kIdRdmsr,                            //!< Instruction 'rdmsr' {MSR}.
+    kIdRdpid,                            //!< Instruction 'rdpid' {RDPID}.
+    kIdRdpkru,                           //!< Instruction 'rdpkru' {OSPKE}.
+    kIdRdpmc,                            //!< Instruction 'rdpmc'.
+    kIdRdpru,                            //!< Instruction 'rdpru' {RDPRU}.
+    kIdRdrand,                           //!< Instruction 'rdrand' {RDRAND}.
+    kIdRdseed,                           //!< Instruction 'rdseed' {RDSEED}.
+    kIdRdsspd,                           //!< Instruction 'rdsspd' {CET_SS}.
+    kIdRdsspq,                           //!< Instruction 'rdsspq' {CET_SS} (X64).
+    kIdRdtsc,                            //!< Instruction 'rdtsc' {RDTSC}.
+    kIdRdtscp,                           //!< Instruction 'rdtscp' {RDTSCP}.
+    kIdRet,                              //!< Instruction 'ret'.
+    kIdRetf,                             //!< Instruction 'retf'.
+    kIdRmpadjust,                        //!< Instruction 'rmpadjust' {SNP} (X64).
+    kIdRmpupdate,                        //!< Instruction 'rmpupdate' {SNP} (X64).
+    kIdRol,                              //!< Instruction 'rol'.
+    kIdRor,                              //!< Instruction 'ror'.
+    kIdRorx,                             //!< Instruction 'rorx' {BMI2}.
+    kIdRoundpd,                          //!< Instruction 'roundpd' {SSE4_1}.
+    kIdRoundps,                          //!< Instruction 'roundps' {SSE4_1}.
+    kIdRoundsd,                          //!< Instruction 'roundsd' {SSE4_1}.
+    kIdRoundss,                          //!< Instruction 'roundss' {SSE4_1}.
+    kIdRsm,                              //!< Instruction 'rsm' (X86).
+    kIdRsqrtps,                          //!< Instruction 'rsqrtps' {SSE}.
+    kIdRsqrtss,                          //!< Instruction 'rsqrtss' {SSE}.
+    kIdRstorssp,                         //!< Instruction 'rstorssp' {CET_SS}.
+    kIdSahf,                             //!< Instruction 'sahf' {LAHFSAHF}.
+    kIdSal,                              //!< Instruction 'sal'.
+    kIdSar,                              //!< Instruction 'sar'.
+    kIdSarx,                             //!< Instruction 'sarx' {BMI2}.
+    kIdSaveprevssp,                      //!< Instruction 'saveprevssp' {CET_SS}.
+    kIdSbb,                              //!< Instruction 'sbb'.
+    kIdScas,                             //!< Instruction 'scas'.
+    kIdSenduipi,                         //!< Instruction 'senduipi' {UINTR} (X64).
+    kIdSerialize,                        //!< Instruction 'serialize' {SERIALIZE}.
+    kIdSeta,                             //!< Instruction 'seta'.
+    kIdSetae,                            //!< Instruction 'setae'.
+    kIdSetb,                             //!< Instruction 'setb'.
+    kIdSetbe,                            //!< Instruction 'setbe'.
+    kIdSetc,                             //!< Instruction 'setc'.
+    kIdSete,                             //!< Instruction 'sete'.
+    kIdSetg,                             //!< Instruction 'setg'.
+    kIdSetge,                            //!< Instruction 'setge'.
+    kIdSetl,                             //!< Instruction 'setl'.
+    kIdSetle,                            //!< Instruction 'setle'.
+    kIdSetna,                            //!< Instruction 'setna'.
+    kIdSetnae,                           //!< Instruction 'setnae'.
+    kIdSetnb,                            //!< Instruction 'setnb'.
+    kIdSetnbe,                           //!< Instruction 'setnbe'.
+    kIdSetnc,                            //!< Instruction 'setnc'.
+    kIdSetne,                            //!< Instruction 'setne'.
+    kIdSetng,                            //!< Instruction 'setng'.
+    kIdSetnge,                           //!< Instruction 'setnge'.
+    kIdSetnl,                            //!< Instruction 'setnl'.
+    kIdSetnle,                           //!< Instruction 'setnle'.
+    kIdSetno,                            //!< Instruction 'setno'.
+    kIdSetnp,                            //!< Instruction 'setnp'.
+    kIdSetns,                            //!< Instruction 'setns'.
+    kIdSetnz,                            //!< Instruction 'setnz'.
+    kIdSeto,                             //!< Instruction 'seto'.
+    kIdSetp,                             //!< Instruction 'setp'.
+    kIdSetpe,                            //!< Instruction 'setpe'.
+    kIdSetpo,                            //!< Instruction 'setpo'.
+    kIdSets,                             //!< Instruction 'sets'.
+    kIdSetssbsy,                         //!< Instruction 'setssbsy' {CET_SS}.
+    kIdSetz,                             //!< Instruction 'setz'.
+    kIdSfence,                           //!< Instruction 'sfence' {MMX2}.
+    kIdSgdt,                             //!< Instruction 'sgdt'.
+    kIdSha1msg1,                         //!< Instruction 'sha1msg1' {SHA}.
+    kIdSha1msg2,                         //!< Instruction 'sha1msg2' {SHA}.
+    kIdSha1nexte,                        //!< Instruction 'sha1nexte' {SHA}.
+    kIdSha1rnds4,                        //!< Instruction 'sha1rnds4' {SHA}.
+    kIdSha256msg1,                       //!< Instruction 'sha256msg1' {SHA}.
+    kIdSha256msg2,                       //!< Instruction 'sha256msg2' {SHA}.
+    kIdSha256rnds2,                      //!< Instruction 'sha256rnds2' {SHA}.
+    kIdShl,                              //!< Instruction 'shl'.
+    kIdShld,                             //!< Instruction 'shld'.
+    kIdShlx,                             //!< Instruction 'shlx' {BMI2}.
+    kIdShr,                              //!< Instruction 'shr'.
+    kIdShrd,                             //!< Instruction 'shrd'.
+    kIdShrx,                             //!< Instruction 'shrx' {BMI2}.
+    kIdShufpd,                           //!< Instruction 'shufpd' {SSE2}.
+    kIdShufps,                           //!< Instruction 'shufps' {SSE}.
+    kIdSidt,                             //!< Instruction 'sidt'.
+    kIdSkinit,                           //!< Instruction 'skinit' {SKINIT}.
+    kIdSldt,                             //!< Instruction 'sldt'.
+    kIdSlwpcb,                           //!< Instruction 'slwpcb' {LWP}.
+    kIdSmsw,                             //!< Instruction 'smsw'.
+    kIdSqrtpd,                           //!< Instruction 'sqrtpd' {SSE2}.
+    kIdSqrtps,                           //!< Instruction 'sqrtps' {SSE}.
+    kIdSqrtsd,                           //!< Instruction 'sqrtsd' {SSE2}.
+    kIdSqrtss,                           //!< Instruction 'sqrtss' {SSE}.
+    kIdStac,                             //!< Instruction 'stac' {SMAP}.
+    kIdStc,                              //!< Instruction 'stc'.
+    kIdStd,                              //!< Instruction 'std'.
+    kIdStgi,                             //!< Instruction 'stgi' {SKINIT}.
+    kIdSti,                              //!< Instruction 'sti'.
+    kIdStmxcsr,                          //!< Instruction 'stmxcsr' {SSE}.
+    kIdStos,                             //!< Instruction 'stos'.
+    kIdStr,                              //!< Instruction 'str'.
+    kIdSttilecfg,                        //!< Instruction 'sttilecfg' {AMX_TILE} (X64).
+    kIdStui,                             //!< Instruction 'stui' {UINTR} (X64).
+    kIdSub,                              //!< Instruction 'sub'.
+    kIdSubpd,                            //!< Instruction 'subpd' {SSE2}.
+    kIdSubps,                            //!< Instruction 'subps' {SSE}.
+    kIdSubsd,                            //!< Instruction 'subsd' {SSE2}.
+    kIdSubss,                            //!< Instruction 'subss' {SSE}.
+    kIdSwapgs,                           //!< Instruction 'swapgs' (X64).
+    kIdSyscall,                          //!< Instruction 'syscall' (X64).
+    kIdSysenter,                         //!< Instruction 'sysenter'.
+    kIdSysexit,                          //!< Instruction 'sysexit'.
+    kIdSysexitq,                         //!< Instruction 'sysexitq'.
+    kIdSysret,                           //!< Instruction 'sysret' (X64).
+    kIdSysretq,                          //!< Instruction 'sysretq' (X64).
+    kIdT1mskc,                           //!< Instruction 't1mskc' {TBM}.
+    kIdTdpbf16ps,                        //!< Instruction 'tdpbf16ps' {AMX_BF16} (X64).
+    kIdTdpbssd,                          //!< Instruction 'tdpbssd' {AMX_INT8} (X64).
+    kIdTdpbsud,                          //!< Instruction 'tdpbsud' {AMX_INT8} (X64).
+    kIdTdpbusd,                          //!< Instruction 'tdpbusd' {AMX_INT8} (X64).
+    kIdTdpbuud,                          //!< Instruction 'tdpbuud' {AMX_INT8} (X64).
+    kIdTest,                             //!< Instruction 'test'.
+    kIdTestui,                           //!< Instruction 'testui' {UINTR} (X64).
+    kIdTileloadd,                        //!< Instruction 'tileloadd' {AMX_TILE} (X64).
+    kIdTileloaddt1,                      //!< Instruction 'tileloaddt1' {AMX_TILE} (X64).
+    kIdTilerelease,                      //!< Instruction 'tilerelease' {AMX_TILE} (X64).
+    kIdTilestored,                       //!< Instruction 'tilestored' {AMX_TILE} (X64).
+    kIdTilezero,                         //!< Instruction 'tilezero' {AMX_TILE} (X64).
+    kIdTpause,                           //!< Instruction 'tpause' {WAITPKG}.
+    kIdTzcnt,                            //!< Instruction 'tzcnt' {BMI}.
+    kIdTzmsk,                            //!< Instruction 'tzmsk' {TBM}.
+    kIdUcomisd,                          //!< Instruction 'ucomisd' {SSE2}.
+    kIdUcomiss,                          //!< Instruction 'ucomiss' {SSE}.
+    kIdUd0,                              //!< Instruction 'ud0'.
+    kIdUd1,                              //!< Instruction 'ud1'.
+    kIdUd2,                              //!< Instruction 'ud2'.
+    kIdUiret,                            //!< Instruction 'uiret' {UINTR} (X64).
+    kIdUmonitor,                         //!< Instruction 'umonitor' {WAITPKG}.
+    kIdUmwait,                           //!< Instruction 'umwait' {WAITPKG}.
+    kIdUnpckhpd,                         //!< Instruction 'unpckhpd' {SSE2}.
+    kIdUnpckhps,                         //!< Instruction 'unpckhps' {SSE}.
+    kIdUnpcklpd,                         //!< Instruction 'unpcklpd' {SSE2}.
+    kIdUnpcklps,                         //!< Instruction 'unpcklps' {SSE}.
+    kIdV4fmaddps,                        //!< Instruction 'v4fmaddps' {AVX512_4FMAPS}.
+    kIdV4fmaddss,                        //!< Instruction 'v4fmaddss' {AVX512_4FMAPS}.
+    kIdV4fnmaddps,                       //!< Instruction 'v4fnmaddps' {AVX512_4FMAPS}.
+    kIdV4fnmaddss,                       //!< Instruction 'v4fnmaddss' {AVX512_4FMAPS}.
+    kIdVaddpd,                           //!< Instruction 'vaddpd' {AVX|AVX512_F+VL}.
+    kIdVaddph,                           //!< Instruction 'vaddph' {AVX512_FP16+VL}.
+    kIdVaddps,                           //!< Instruction 'vaddps' {AVX|AVX512_F+VL}.
+    kIdVaddsd,                           //!< Instruction 'vaddsd' {AVX|AVX512_F}.
+    kIdVaddsh,                           //!< Instruction 'vaddsh' {AVX512_FP16}.
+    kIdVaddss,                           //!< Instruction 'vaddss' {AVX|AVX512_F}.
+    kIdVaddsubpd,                        //!< Instruction 'vaddsubpd' {AVX}.
+    kIdVaddsubps,                        //!< Instruction 'vaddsubps' {AVX}.
+    kIdVaesdec,                          //!< Instruction 'vaesdec' {AVX|AVX512_F+VL & AESNI|VAES}.
+    kIdVaesdeclast,                      //!< Instruction 'vaesdeclast' {AVX|AVX512_F+VL & AESNI|VAES}.
+    kIdVaesenc,                          //!< Instruction 'vaesenc' {AVX|AVX512_F+VL & AESNI|VAES}.
+    kIdVaesenclast,                      //!< Instruction 'vaesenclast' {AVX|AVX512_F+VL & AESNI|VAES}.
+    kIdVaesimc,                          //!< Instruction 'vaesimc' {AVX & AESNI}.
+    kIdVaeskeygenassist,                 //!< Instruction 'vaeskeygenassist' {AVX & AESNI}.
+    kIdValignd,                          //!< Instruction 'valignd' {AVX512_F+VL}.
+    kIdValignq,                          //!< Instruction 'valignq' {AVX512_F+VL}.
+    kIdVandnpd,                          //!< Instruction 'vandnpd' {AVX|AVX512_DQ+VL}.
+    kIdVandnps,                          //!< Instruction 'vandnps' {AVX|AVX512_DQ+VL}.
+    kIdVandpd,                           //!< Instruction 'vandpd' {AVX|AVX512_DQ+VL}.
+    kIdVandps,                           //!< Instruction 'vandps' {AVX|AVX512_DQ+VL}.
+    kIdVblendmpd,                        //!< Instruction 'vblendmpd' {AVX512_F+VL}.
+    kIdVblendmps,                        //!< Instruction 'vblendmps' {AVX512_F+VL}.
+    kIdVblendpd,                         //!< Instruction 'vblendpd' {AVX}.
+    kIdVblendps,                         //!< Instruction 'vblendps' {AVX}.
+    kIdVblendvpd,                        //!< Instruction 'vblendvpd' {AVX}.
+    kIdVblendvps,                        //!< Instruction 'vblendvps' {AVX}.
+    kIdVbroadcastf128,                   //!< Instruction 'vbroadcastf128' {AVX}.
+    kIdVbroadcastf32x2,                  //!< Instruction 'vbroadcastf32x2' {AVX512_DQ+VL}.
+    kIdVbroadcastf32x4,                  //!< Instruction 'vbroadcastf32x4' {AVX512_F}.
+    kIdVbroadcastf32x8,                  //!< Instruction 'vbroadcastf32x8' {AVX512_DQ}.
+    kIdVbroadcastf64x2,                  //!< Instruction 'vbroadcastf64x2' {AVX512_DQ+VL}.
+    kIdVbroadcastf64x4,                  //!< Instruction 'vbroadcastf64x4' {AVX512_F}.
+    kIdVbroadcasti128,                   //!< Instruction 'vbroadcasti128' {AVX2}.
+    kIdVbroadcasti32x2,                  //!< Instruction 'vbroadcasti32x2' {AVX512_DQ+VL}.
+    kIdVbroadcasti32x4,                  //!< Instruction 'vbroadcasti32x4' {AVX512_F+VL}.
+    kIdVbroadcasti32x8,                  //!< Instruction 'vbroadcasti32x8' {AVX512_DQ}.
+    kIdVbroadcasti64x2,                  //!< Instruction 'vbroadcasti64x2' {AVX512_DQ+VL}.
+    kIdVbroadcasti64x4,                  //!< Instruction 'vbroadcasti64x4' {AVX512_F}.
+    kIdVbroadcastsd,                     //!< Instruction 'vbroadcastsd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVbroadcastss,                     //!< Instruction 'vbroadcastss' {AVX|AVX2|AVX512_F+VL}.
+    kIdVcmppd,                           //!< Instruction 'vcmppd' {AVX|AVX512_F+VL}.
+    kIdVcmpph,                           //!< Instruction 'vcmpph' {AVX512_FP16+VL}.
+    kIdVcmpps,                           //!< Instruction 'vcmpps' {AVX|AVX512_F+VL}.
+    kIdVcmpsd,                           //!< Instruction 'vcmpsd' {AVX|AVX512_F}.
+    kIdVcmpsh,                           //!< Instruction 'vcmpsh' {AVX512_FP16}.
+    kIdVcmpss,                           //!< Instruction 'vcmpss' {AVX|AVX512_F}.
+    kIdVcomisd,                          //!< Instruction 'vcomisd' {AVX|AVX512_F}.
+    kIdVcomish,                          //!< Instruction 'vcomish' {AVX512_FP16}.
+    kIdVcomiss,                          //!< Instruction 'vcomiss' {AVX|AVX512_F}.
+    kIdVcompresspd,                      //!< Instruction 'vcompresspd' {AVX512_F+VL}.
+    kIdVcompressps,                      //!< Instruction 'vcompressps' {AVX512_F+VL}.
+    kIdVcvtdq2pd,                        //!< Instruction 'vcvtdq2pd' {AVX|AVX512_F+VL}.
+    kIdVcvtdq2ph,                        //!< Instruction 'vcvtdq2ph' {AVX512_FP16+VL}.
+    kIdVcvtdq2ps,                        //!< Instruction 'vcvtdq2ps' {AVX|AVX512_F+VL}.
+    kIdVcvtne2ps2bf16,                   //!< Instruction 'vcvtne2ps2bf16' {AVX512_BF16+VL}.
+    kIdVcvtneps2bf16,                    //!< Instruction 'vcvtneps2bf16' {AVX512_BF16+VL}.
+    kIdVcvtpd2dq,                        //!< Instruction 'vcvtpd2dq' {AVX|AVX512_F+VL}.
+    kIdVcvtpd2ph,                        //!< Instruction 'vcvtpd2ph' {AVX512_FP16+VL}.
+    kIdVcvtpd2ps,                        //!< Instruction 'vcvtpd2ps' {AVX|AVX512_F+VL}.
+    kIdVcvtpd2qq,                        //!< Instruction 'vcvtpd2qq' {AVX512_DQ+VL}.
+    kIdVcvtpd2udq,                       //!< Instruction 'vcvtpd2udq' {AVX512_F+VL}.
+    kIdVcvtpd2uqq,                       //!< Instruction 'vcvtpd2uqq' {AVX512_DQ+VL}.
+    kIdVcvtph2dq,                        //!< Instruction 'vcvtph2dq' {AVX512_FP16+VL}.
+    kIdVcvtph2pd,                        //!< Instruction 'vcvtph2pd' {AVX512_FP16+VL}.
+    kIdVcvtph2ps,                        //!< Instruction 'vcvtph2ps' {AVX512_F+VL & F16C}.
+    kIdVcvtph2psx,                       //!< Instruction 'vcvtph2psx' {AVX512_FP16+VL}.
+    kIdVcvtph2qq,                        //!< Instruction 'vcvtph2qq' {AVX512_FP16+VL}.
+    kIdVcvtph2udq,                       //!< Instruction 'vcvtph2udq' {AVX512_FP16+VL}.
+    kIdVcvtph2uqq,                       //!< Instruction 'vcvtph2uqq' {AVX512_FP16+VL}.
+    kIdVcvtph2uw,                        //!< Instruction 'vcvtph2uw' {AVX512_FP16+VL}.
+    kIdVcvtph2w,                         //!< Instruction 'vcvtph2w' {AVX512_FP16+VL}.
+    kIdVcvtps2dq,                        //!< Instruction 'vcvtps2dq' {AVX|AVX512_F+VL}.
+    kIdVcvtps2pd,                        //!< Instruction 'vcvtps2pd' {AVX|AVX512_F+VL}.
+    kIdVcvtps2ph,                        //!< Instruction 'vcvtps2ph' {AVX512_F+VL & F16C}.
+    kIdVcvtps2phx,                       //!< Instruction 'vcvtps2phx' {AVX512_FP16+VL}.
+    kIdVcvtps2qq,                        //!< Instruction 'vcvtps2qq' {AVX512_DQ+VL}.
+    kIdVcvtps2udq,                       //!< Instruction 'vcvtps2udq' {AVX512_F+VL}.
+    kIdVcvtps2uqq,                       //!< Instruction 'vcvtps2uqq' {AVX512_DQ+VL}.
+    kIdVcvtqq2pd,                        //!< Instruction 'vcvtqq2pd' {AVX512_DQ+VL}.
+    kIdVcvtqq2ph,                        //!< Instruction 'vcvtqq2ph' {AVX512_FP16+VL}.
+    kIdVcvtqq2ps,                        //!< Instruction 'vcvtqq2ps' {AVX512_DQ+VL}.
+    kIdVcvtsd2sh,                        //!< Instruction 'vcvtsd2sh' {AVX512_FP16}.
+    kIdVcvtsd2si,                        //!< Instruction 'vcvtsd2si' {AVX|AVX512_F}.
+    kIdVcvtsd2ss,                        //!< Instruction 'vcvtsd2ss' {AVX|AVX512_F}.
+    kIdVcvtsd2usi,                       //!< Instruction 'vcvtsd2usi' {AVX512_F}.
+    kIdVcvtsh2sd,                        //!< Instruction 'vcvtsh2sd' {AVX512_FP16}.
+    kIdVcvtsh2si,                        //!< Instruction 'vcvtsh2si' {AVX512_FP16}.
+    kIdVcvtsh2ss,                        //!< Instruction 'vcvtsh2ss' {AVX512_FP16}.
+    kIdVcvtsh2usi,                       //!< Instruction 'vcvtsh2usi' {AVX512_FP16}.
+    kIdVcvtsi2sd,                        //!< Instruction 'vcvtsi2sd' {AVX|AVX512_F}.
+    kIdVcvtsi2sh,                        //!< Instruction 'vcvtsi2sh' {AVX512_FP16}.
+    kIdVcvtsi2ss,                        //!< Instruction 'vcvtsi2ss' {AVX|AVX512_F}.
+    kIdVcvtss2sd,                        //!< Instruction 'vcvtss2sd' {AVX|AVX512_F}.
+    kIdVcvtss2sh,                        //!< Instruction 'vcvtss2sh' {AVX512_FP16}.
+    kIdVcvtss2si,                        //!< Instruction 'vcvtss2si' {AVX|AVX512_F}.
+    kIdVcvtss2usi,                       //!< Instruction 'vcvtss2usi' {AVX512_F}.
+    kIdVcvttpd2dq,                       //!< Instruction 'vcvttpd2dq' {AVX|AVX512_F+VL}.
+    kIdVcvttpd2qq,                       //!< Instruction 'vcvttpd2qq' {AVX512_F+VL}.
+    kIdVcvttpd2udq,                      //!< Instruction 'vcvttpd2udq' {AVX512_F+VL}.
+    kIdVcvttpd2uqq,                      //!< Instruction 'vcvttpd2uqq' {AVX512_DQ+VL}.
+    kIdVcvttph2dq,                       //!< Instruction 'vcvttph2dq' {AVX512_FP16+VL}.
+    kIdVcvttph2qq,                       //!< Instruction 'vcvttph2qq' {AVX512_FP16+VL}.
+    kIdVcvttph2udq,                      //!< Instruction 'vcvttph2udq' {AVX512_FP16+VL}.
+    kIdVcvttph2uqq,                      //!< Instruction 'vcvttph2uqq' {AVX512_FP16+VL}.
+    kIdVcvttph2uw,                       //!< Instruction 'vcvttph2uw' {AVX512_FP16+VL}.
+    kIdVcvttph2w,                        //!< Instruction 'vcvttph2w' {AVX512_FP16+VL}.
+    kIdVcvttps2dq,                       //!< Instruction 'vcvttps2dq' {AVX|AVX512_F+VL}.
+    kIdVcvttps2qq,                       //!< Instruction 'vcvttps2qq' {AVX512_DQ+VL}.
+    kIdVcvttps2udq,                      //!< Instruction 'vcvttps2udq' {AVX512_F+VL}.
+    kIdVcvttps2uqq,                      //!< Instruction 'vcvttps2uqq' {AVX512_DQ+VL}.
+    kIdVcvttsd2si,                       //!< Instruction 'vcvttsd2si' {AVX|AVX512_F}.
+    kIdVcvttsd2usi,                      //!< Instruction 'vcvttsd2usi' {AVX512_F}.
+    kIdVcvttsh2si,                       //!< Instruction 'vcvttsh2si' {AVX512_FP16}.
+    kIdVcvttsh2usi,                      //!< Instruction 'vcvttsh2usi' {AVX512_FP16}.
+    kIdVcvttss2si,                       //!< Instruction 'vcvttss2si' {AVX|AVX512_F}.
+    kIdVcvttss2usi,                      //!< Instruction 'vcvttss2usi' {AVX512_F}.
+    kIdVcvtudq2pd,                       //!< Instruction 'vcvtudq2pd' {AVX512_F+VL}.
+    kIdVcvtudq2ph,                       //!< Instruction 'vcvtudq2ph' {AVX512_FP16+VL}.
+    kIdVcvtudq2ps,                       //!< Instruction 'vcvtudq2ps' {AVX512_F+VL}.
+    kIdVcvtuqq2pd,                       //!< Instruction 'vcvtuqq2pd' {AVX512_DQ+VL}.
+    kIdVcvtuqq2ph,                       //!< Instruction 'vcvtuqq2ph' {AVX512_FP16+VL}.
+    kIdVcvtuqq2ps,                       //!< Instruction 'vcvtuqq2ps' {AVX512_DQ+VL}.
+    kIdVcvtusi2sd,                       //!< Instruction 'vcvtusi2sd' {AVX512_F}.
+    kIdVcvtusi2sh,                       //!< Instruction 'vcvtusi2sh' {AVX512_FP16}.
+    kIdVcvtusi2ss,                       //!< Instruction 'vcvtusi2ss' {AVX512_F}.
+    kIdVcvtuw2ph,                        //!< Instruction 'vcvtuw2ph' {AVX512_FP16+VL}.
+    kIdVcvtw2ph,                         //!< Instruction 'vcvtw2ph' {AVX512_FP16+VL}.
+    kIdVdbpsadbw,                        //!< Instruction 'vdbpsadbw' {AVX512_BW+VL}.
+    kIdVdivpd,                           //!< Instruction 'vdivpd' {AVX|AVX512_F+VL}.
+    kIdVdivph,                           //!< Instruction 'vdivph' {AVX512_FP16+VL}.
+    kIdVdivps,                           //!< Instruction 'vdivps' {AVX|AVX512_F+VL}.
+    kIdVdivsd,                           //!< Instruction 'vdivsd' {AVX|AVX512_F}.
+    kIdVdivsh,                           //!< Instruction 'vdivsh' {AVX512_FP16}.
+    kIdVdivss,                           //!< Instruction 'vdivss' {AVX|AVX512_F}.
+    kIdVdpbf16ps,                        //!< Instruction 'vdpbf16ps' {AVX512_BF16+VL}.
+    kIdVdppd,                            //!< Instruction 'vdppd' {AVX}.
+    kIdVdpps,                            //!< Instruction 'vdpps' {AVX}.
+    kIdVerr,                             //!< Instruction 'verr'.
+    kIdVerw,                             //!< Instruction 'verw'.
+    kIdVexp2pd,                          //!< Instruction 'vexp2pd' {AVX512_ERI}.
+    kIdVexp2ps,                          //!< Instruction 'vexp2ps' {AVX512_ERI}.
+    kIdVexpandpd,                        //!< Instruction 'vexpandpd' {AVX512_F+VL}.
+    kIdVexpandps,                        //!< Instruction 'vexpandps' {AVX512_F+VL}.
+    kIdVextractf128,                     //!< Instruction 'vextractf128' {AVX}.
+    kIdVextractf32x4,                    //!< Instruction 'vextractf32x4' {AVX512_F+VL}.
+    kIdVextractf32x8,                    //!< Instruction 'vextractf32x8' {AVX512_DQ}.
+    kIdVextractf64x2,                    //!< Instruction 'vextractf64x2' {AVX512_DQ+VL}.
+    kIdVextractf64x4,                    //!< Instruction 'vextractf64x4' {AVX512_F}.
+    kIdVextracti128,                     //!< Instruction 'vextracti128' {AVX2}.
+    kIdVextracti32x4,                    //!< Instruction 'vextracti32x4' {AVX512_F+VL}.
+    kIdVextracti32x8,                    //!< Instruction 'vextracti32x8' {AVX512_DQ}.
+    kIdVextracti64x2,                    //!< Instruction 'vextracti64x2' {AVX512_DQ+VL}.
+    kIdVextracti64x4,                    //!< Instruction 'vextracti64x4' {AVX512_F}.
+    kIdVextractps,                       //!< Instruction 'vextractps' {AVX|AVX512_F}.
+    kIdVfcmaddcph,                       //!< Instruction 'vfcmaddcph' {AVX512_FP16+VL}.
+    kIdVfcmaddcsh,                       //!< Instruction 'vfcmaddcsh' {AVX512_FP16+VL}.
+    kIdVfcmulcph,                        //!< Instruction 'vfcmulcph' {AVX512_FP16+VL}.
+    kIdVfcmulcsh,                        //!< Instruction 'vfcmulcsh' {AVX512_FP16+VL}.
+    kIdVfixupimmpd,                      //!< Instruction 'vfixupimmpd' {AVX512_F+VL}.
+    kIdVfixupimmps,                      //!< Instruction 'vfixupimmps' {AVX512_F+VL}.
+    kIdVfixupimmsd,                      //!< Instruction 'vfixupimmsd' {AVX512_F}.
+    kIdVfixupimmss,                      //!< Instruction 'vfixupimmss' {AVX512_F}.
+    kIdVfmadd132pd,                      //!< Instruction 'vfmadd132pd' {FMA|AVX512_F+VL}.
+    kIdVfmadd132ph,                      //!< Instruction 'vfmadd132ph' {AVX512_FP16+VL}.
+    kIdVfmadd132ps,                      //!< Instruction 'vfmadd132ps' {FMA|AVX512_F+VL}.
+    kIdVfmadd132sd,                      //!< Instruction 'vfmadd132sd' {FMA|AVX512_F}.
+    kIdVfmadd132sh,                      //!< Instruction 'vfmadd132sh' {AVX512_FP16}.
+    kIdVfmadd132ss,                      //!< Instruction 'vfmadd132ss' {FMA|AVX512_F}.
+    kIdVfmadd213pd,                      //!< Instruction 'vfmadd213pd' {FMA|AVX512_F+VL}.
+    kIdVfmadd213ph,                      //!< Instruction 'vfmadd213ph' {AVX512_FP16+VL}.
+    kIdVfmadd213ps,                      //!< Instruction 'vfmadd213ps' {FMA|AVX512_F+VL}.
+    kIdVfmadd213sd,                      //!< Instruction 'vfmadd213sd' {FMA|AVX512_F}.
+    kIdVfmadd213sh,                      //!< Instruction 'vfmadd213sh' {AVX512_FP16}.
+    kIdVfmadd213ss,                      //!< Instruction 'vfmadd213ss' {FMA|AVX512_F}.
+    kIdVfmadd231pd,                      //!< Instruction 'vfmadd231pd' {FMA|AVX512_F+VL}.
+    kIdVfmadd231ph,                      //!< Instruction 'vfmadd231ph' {AVX512_FP16+VL}.
+    kIdVfmadd231ps,                      //!< Instruction 'vfmadd231ps' {FMA|AVX512_F+VL}.
+    kIdVfmadd231sd,                      //!< Instruction 'vfmadd231sd' {FMA|AVX512_F}.
+    kIdVfmadd231sh,                      //!< Instruction 'vfmadd231sh' {AVX512_FP16}.
+    kIdVfmadd231ss,                      //!< Instruction 'vfmadd231ss' {FMA|AVX512_F}.
+    kIdVfmaddcph,                        //!< Instruction 'vfmaddcph' {AVX512_FP16+VL}.
+    kIdVfmaddcsh,                        //!< Instruction 'vfmaddcsh' {AVX512_FP16+VL}.
+    kIdVfmaddpd,                         //!< Instruction 'vfmaddpd' {FMA4}.
+    kIdVfmaddps,                         //!< Instruction 'vfmaddps' {FMA4}.
+    kIdVfmaddsd,                         //!< Instruction 'vfmaddsd' {FMA4}.
+    kIdVfmaddss,                         //!< Instruction 'vfmaddss' {FMA4}.
+    kIdVfmaddsub132pd,                   //!< Instruction 'vfmaddsub132pd' {FMA|AVX512_F+VL}.
+    kIdVfmaddsub132ph,                   //!< Instruction 'vfmaddsub132ph' {AVX512_FP16+VL}.
+    kIdVfmaddsub132ps,                   //!< Instruction 'vfmaddsub132ps' {FMA|AVX512_F+VL}.
+    kIdVfmaddsub213pd,                   //!< Instruction 'vfmaddsub213pd' {FMA|AVX512_F+VL}.
+    kIdVfmaddsub213ph,                   //!< Instruction 'vfmaddsub213ph' {AVX512_FP16+VL}.
+    kIdVfmaddsub213ps,                   //!< Instruction 'vfmaddsub213ps' {FMA|AVX512_F+VL}.
+    kIdVfmaddsub231pd,                   //!< Instruction 'vfmaddsub231pd' {FMA|AVX512_F+VL}.
+    kIdVfmaddsub231ph,                   //!< Instruction 'vfmaddsub231ph' {AVX512_FP16+VL}.
+    kIdVfmaddsub231ps,                   //!< Instruction 'vfmaddsub231ps' {FMA|AVX512_F+VL}.
+    kIdVfmaddsubpd,                      //!< Instruction 'vfmaddsubpd' {FMA4}.
+    kIdVfmaddsubps,                      //!< Instruction 'vfmaddsubps' {FMA4}.
+    kIdVfmsub132pd,                      //!< Instruction 'vfmsub132pd' {FMA|AVX512_F+VL}.
+    kIdVfmsub132ph,                      //!< Instruction 'vfmsub132ph' {AVX512_FP16+VL}.
+    kIdVfmsub132ps,                      //!< Instruction 'vfmsub132ps' {FMA|AVX512_F+VL}.
+    kIdVfmsub132sd,                      //!< Instruction 'vfmsub132sd' {FMA|AVX512_F}.
+    kIdVfmsub132sh,                      //!< Instruction 'vfmsub132sh' {AVX512_FP16}.
+    kIdVfmsub132ss,                      //!< Instruction 'vfmsub132ss' {FMA|AVX512_F}.
+    kIdVfmsub213pd,                      //!< Instruction 'vfmsub213pd' {FMA|AVX512_F+VL}.
+    kIdVfmsub213ph,                      //!< Instruction 'vfmsub213ph' {AVX512_FP16+VL}.
+    kIdVfmsub213ps,                      //!< Instruction 'vfmsub213ps' {FMA|AVX512_F+VL}.
+    kIdVfmsub213sd,                      //!< Instruction 'vfmsub213sd' {FMA|AVX512_F}.
+    kIdVfmsub213sh,                      //!< Instruction 'vfmsub213sh' {AVX512_FP16}.
+    kIdVfmsub213ss,                      //!< Instruction 'vfmsub213ss' {FMA|AVX512_F}.
+    kIdVfmsub231pd,                      //!< Instruction 'vfmsub231pd' {FMA|AVX512_F+VL}.
+    kIdVfmsub231ph,                      //!< Instruction 'vfmsub231ph' {AVX512_FP16+VL}.
+    kIdVfmsub231ps,                      //!< Instruction 'vfmsub231ps' {FMA|AVX512_F+VL}.
+    kIdVfmsub231sd,                      //!< Instruction 'vfmsub231sd' {FMA|AVX512_F}.
+    kIdVfmsub231sh,                      //!< Instruction 'vfmsub231sh' {AVX512_FP16}.
+    kIdVfmsub231ss,                      //!< Instruction 'vfmsub231ss' {FMA|AVX512_F}.
+    kIdVfmsubadd132pd,                   //!< Instruction 'vfmsubadd132pd' {FMA|AVX512_F+VL}.
+    kIdVfmsubadd132ph,                   //!< Instruction 'vfmsubadd132ph' {AVX512_FP16+VL}.
+    kIdVfmsubadd132ps,                   //!< Instruction 'vfmsubadd132ps' {FMA|AVX512_F+VL}.
+    kIdVfmsubadd213pd,                   //!< Instruction 'vfmsubadd213pd' {FMA|AVX512_F+VL}.
+    kIdVfmsubadd213ph,                   //!< Instruction 'vfmsubadd213ph' {AVX512_FP16+VL}.
+    kIdVfmsubadd213ps,                   //!< Instruction 'vfmsubadd213ps' {FMA|AVX512_F+VL}.
+    kIdVfmsubadd231pd,                   //!< Instruction 'vfmsubadd231pd' {FMA|AVX512_F+VL}.
+    kIdVfmsubadd231ph,                   //!< Instruction 'vfmsubadd231ph' {AVX512_FP16+VL}.
+    kIdVfmsubadd231ps,                   //!< Instruction 'vfmsubadd231ps' {FMA|AVX512_F+VL}.
+    kIdVfmsubaddpd,                      //!< Instruction 'vfmsubaddpd' {FMA4}.
+    kIdVfmsubaddps,                      //!< Instruction 'vfmsubaddps' {FMA4}.
+    kIdVfmsubpd,                         //!< Instruction 'vfmsubpd' {FMA4}.
+    kIdVfmsubps,                         //!< Instruction 'vfmsubps' {FMA4}.
+    kIdVfmsubsd,                         //!< Instruction 'vfmsubsd' {FMA4}.
+    kIdVfmsubss,                         //!< Instruction 'vfmsubss' {FMA4}.
+    kIdVfmulcph,                         //!< Instruction 'vfmulcph' {AVX512_FP16+VL}.
+    kIdVfmulcsh,                         //!< Instruction 'vfmulcsh' {AVX512_FP16+VL}.
+    kIdVfnmadd132pd,                     //!< Instruction 'vfnmadd132pd' {FMA|AVX512_F+VL}.
+    kIdVfnmadd132ph,                     //!< Instruction 'vfnmadd132ph' {AVX512_FP16+VL}.
+    kIdVfnmadd132ps,                     //!< Instruction 'vfnmadd132ps' {FMA|AVX512_F+VL}.
+    kIdVfnmadd132sd,                     //!< Instruction 'vfnmadd132sd' {FMA|AVX512_F}.
+    kIdVfnmadd132sh,                     //!< Instruction 'vfnmadd132sh' {AVX512_FP16}.
+    kIdVfnmadd132ss,                     //!< Instruction 'vfnmadd132ss' {FMA|AVX512_F}.
+    kIdVfnmadd213pd,                     //!< Instruction 'vfnmadd213pd' {FMA|AVX512_F+VL}.
+    kIdVfnmadd213ph,                     //!< Instruction 'vfnmadd213ph' {AVX512_FP16+VL}.
+    kIdVfnmadd213ps,                     //!< Instruction 'vfnmadd213ps' {FMA|AVX512_F+VL}.
+    kIdVfnmadd213sd,                     //!< Instruction 'vfnmadd213sd' {FMA|AVX512_F}.
+    kIdVfnmadd213sh,                     //!< Instruction 'vfnmadd213sh' {AVX512_FP16}.
+    kIdVfnmadd213ss,                     //!< Instruction 'vfnmadd213ss' {FMA|AVX512_F}.
+    kIdVfnmadd231pd,                     //!< Instruction 'vfnmadd231pd' {FMA|AVX512_F+VL}.
+    kIdVfnmadd231ph,                     //!< Instruction 'vfnmadd231ph' {AVX512_FP16+VL}.
+    kIdVfnmadd231ps,                     //!< Instruction 'vfnmadd231ps' {FMA|AVX512_F+VL}.
+    kIdVfnmadd231sd,                     //!< Instruction 'vfnmadd231sd' {FMA|AVX512_F}.
+    kIdVfnmadd231sh,                     //!< Instruction 'vfnmadd231sh' {AVX512_FP16}.
+    kIdVfnmadd231ss,                     //!< Instruction 'vfnmadd231ss' {FMA|AVX512_F}.
+    kIdVfnmaddpd,                        //!< Instruction 'vfnmaddpd' {FMA4}.
+    kIdVfnmaddps,                        //!< Instruction 'vfnmaddps' {FMA4}.
+    kIdVfnmaddsd,                        //!< Instruction 'vfnmaddsd' {FMA4}.
+    kIdVfnmaddss,                        //!< Instruction 'vfnmaddss' {FMA4}.
+    kIdVfnmsub132pd,                     //!< Instruction 'vfnmsub132pd' {FMA|AVX512_F+VL}.
+    kIdVfnmsub132ph,                     //!< Instruction 'vfnmsub132ph' {AVX512_FP16+VL}.
+    kIdVfnmsub132ps,                     //!< Instruction 'vfnmsub132ps' {FMA|AVX512_F+VL}.
+    kIdVfnmsub132sd,                     //!< Instruction 'vfnmsub132sd' {FMA|AVX512_F}.
+    kIdVfnmsub132sh,                     //!< Instruction 'vfnmsub132sh' {AVX512_FP16}.
+    kIdVfnmsub132ss,                     //!< Instruction 'vfnmsub132ss' {FMA|AVX512_F}.
+    kIdVfnmsub213pd,                     //!< Instruction 'vfnmsub213pd' {FMA|AVX512_F+VL}.
+    kIdVfnmsub213ph,                     //!< Instruction 'vfnmsub213ph' {AVX512_FP16+VL}.
+    kIdVfnmsub213ps,                     //!< Instruction 'vfnmsub213ps' {FMA|AVX512_F+VL}.
+    kIdVfnmsub213sd,                     //!< Instruction 'vfnmsub213sd' {FMA|AVX512_F}.
+    kIdVfnmsub213sh,                     //!< Instruction 'vfnmsub213sh' {AVX512_FP16}.
+    kIdVfnmsub213ss,                     //!< Instruction 'vfnmsub213ss' {FMA|AVX512_F}.
+    kIdVfnmsub231pd,                     //!< Instruction 'vfnmsub231pd' {FMA|AVX512_F+VL}.
+    kIdVfnmsub231ph,                     //!< Instruction 'vfnmsub231ph' {AVX512_FP16+VL}.
+    kIdVfnmsub231ps,                     //!< Instruction 'vfnmsub231ps' {FMA|AVX512_F+VL}.
+    kIdVfnmsub231sd,                     //!< Instruction 'vfnmsub231sd' {FMA|AVX512_F}.
+    kIdVfnmsub231sh,                     //!< Instruction 'vfnmsub231sh' {AVX512_FP16}.
+    kIdVfnmsub231ss,                     //!< Instruction 'vfnmsub231ss' {FMA|AVX512_F}.
+    kIdVfnmsubpd,                        //!< Instruction 'vfnmsubpd' {FMA4}.
+    kIdVfnmsubps,                        //!< Instruction 'vfnmsubps' {FMA4}.
+    kIdVfnmsubsd,                        //!< Instruction 'vfnmsubsd' {FMA4}.
+    kIdVfnmsubss,                        //!< Instruction 'vfnmsubss' {FMA4}.
+    kIdVfpclasspd,                       //!< Instruction 'vfpclasspd' {AVX512_DQ+VL}.
+    kIdVfpclassph,                       //!< Instruction 'vfpclassph' {AVX512_FP16+VL}.
+    kIdVfpclassps,                       //!< Instruction 'vfpclassps' {AVX512_DQ+VL}.
+    kIdVfpclasssd,                       //!< Instruction 'vfpclasssd' {AVX512_DQ}.
+    kIdVfpclasssh,                       //!< Instruction 'vfpclasssh' {AVX512_FP16}.
+    kIdVfpclassss,                       //!< Instruction 'vfpclassss' {AVX512_DQ}.
+    kIdVfrczpd,                          //!< Instruction 'vfrczpd' {XOP}.
+    kIdVfrczps,                          //!< Instruction 'vfrczps' {XOP}.
+    kIdVfrczsd,                          //!< Instruction 'vfrczsd' {XOP}.
+    kIdVfrczss,                          //!< Instruction 'vfrczss' {XOP}.
+    kIdVgatherdpd,                       //!< Instruction 'vgatherdpd' {AVX2|AVX512_F+VL}.
+    kIdVgatherdps,                       //!< Instruction 'vgatherdps' {AVX2|AVX512_F+VL}.
+    kIdVgatherpf0dpd,                    //!< Instruction 'vgatherpf0dpd' {AVX512_PFI}.
+    kIdVgatherpf0dps,                    //!< Instruction 'vgatherpf0dps' {AVX512_PFI}.
+    kIdVgatherpf0qpd,                    //!< Instruction 'vgatherpf0qpd' {AVX512_PFI}.
+    kIdVgatherpf0qps,                    //!< Instruction 'vgatherpf0qps' {AVX512_PFI}.
+    kIdVgatherpf1dpd,                    //!< Instruction 'vgatherpf1dpd' {AVX512_PFI}.
+    kIdVgatherpf1dps,                    //!< Instruction 'vgatherpf1dps' {AVX512_PFI}.
+    kIdVgatherpf1qpd,                    //!< Instruction 'vgatherpf1qpd' {AVX512_PFI}.
+    kIdVgatherpf1qps,                    //!< Instruction 'vgatherpf1qps' {AVX512_PFI}.
+    kIdVgatherqpd,                       //!< Instruction 'vgatherqpd' {AVX2|AVX512_F+VL}.
+    kIdVgatherqps,                       //!< Instruction 'vgatherqps' {AVX2|AVX512_F+VL}.
+    kIdVgetexppd,                        //!< Instruction 'vgetexppd' {AVX512_F+VL}.
+    kIdVgetexpph,                        //!< Instruction 'vgetexpph' {AVX512_FP16+VL}.
+    kIdVgetexpps,                        //!< Instruction 'vgetexpps' {AVX512_F+VL}.
+    kIdVgetexpsd,                        //!< Instruction 'vgetexpsd' {AVX512_F}.
+    kIdVgetexpsh,                        //!< Instruction 'vgetexpsh' {AVX512_FP16}.
+    kIdVgetexpss,                        //!< Instruction 'vgetexpss' {AVX512_F}.
+    kIdVgetmantpd,                       //!< Instruction 'vgetmantpd' {AVX512_F+VL}.
+    kIdVgetmantph,                       //!< Instruction 'vgetmantph' {AVX512_FP16+VL}.
+    kIdVgetmantps,                       //!< Instruction 'vgetmantps' {AVX512_F+VL}.
+    kIdVgetmantsd,                       //!< Instruction 'vgetmantsd' {AVX512_F}.
+    kIdVgetmantsh,                       //!< Instruction 'vgetmantsh' {AVX512_FP16}.
+    kIdVgetmantss,                       //!< Instruction 'vgetmantss' {AVX512_F}.
+    kIdVgf2p8affineinvqb,                //!< Instruction 'vgf2p8affineinvqb' {AVX|AVX512_F+VL & GFNI}.
+    kIdVgf2p8affineqb,                   //!< Instruction 'vgf2p8affineqb' {AVX|AVX512_F+VL & GFNI}.
+    kIdVgf2p8mulb,                       //!< Instruction 'vgf2p8mulb' {AVX|AVX512_F+VL & GFNI}.
+    kIdVhaddpd,                          //!< Instruction 'vhaddpd' {AVX}.
+    kIdVhaddps,                          //!< Instruction 'vhaddps' {AVX}.
+    kIdVhsubpd,                          //!< Instruction 'vhsubpd' {AVX}.
+    kIdVhsubps,                          //!< Instruction 'vhsubps' {AVX}.
+    kIdVinsertf128,                      //!< Instruction 'vinsertf128' {AVX}.
+    kIdVinsertf32x4,                     //!< Instruction 'vinsertf32x4' {AVX512_F+VL}.
+    kIdVinsertf32x8,                     //!< Instruction 'vinsertf32x8' {AVX512_DQ}.
+    kIdVinsertf64x2,                     //!< Instruction 'vinsertf64x2' {AVX512_DQ+VL}.
+    kIdVinsertf64x4,                     //!< Instruction 'vinsertf64x4' {AVX512_F}.
+    kIdVinserti128,                      //!< Instruction 'vinserti128' {AVX2}.
+    kIdVinserti32x4,                     //!< Instruction 'vinserti32x4' {AVX512_F+VL}.
+    kIdVinserti32x8,                     //!< Instruction 'vinserti32x8' {AVX512_DQ}.
+    kIdVinserti64x2,                     //!< Instruction 'vinserti64x2' {AVX512_DQ+VL}.
+    kIdVinserti64x4,                     //!< Instruction 'vinserti64x4' {AVX512_F}.
+    kIdVinsertps,                        //!< Instruction 'vinsertps' {AVX|AVX512_F}.
+    kIdVlddqu,                           //!< Instruction 'vlddqu' {AVX}.
+    kIdVldmxcsr,                         //!< Instruction 'vldmxcsr' {AVX}.
+    kIdVmaskmovdqu,                      //!< Instruction 'vmaskmovdqu' {AVX}.
+    kIdVmaskmovpd,                       //!< Instruction 'vmaskmovpd' {AVX}.
+    kIdVmaskmovps,                       //!< Instruction 'vmaskmovps' {AVX}.
+    kIdVmaxpd,                           //!< Instruction 'vmaxpd' {AVX|AVX512_F+VL}.
+    kIdVmaxph,                           //!< Instruction 'vmaxph' {AVX512_FP16+VL}.
+    kIdVmaxps,                           //!< Instruction 'vmaxps' {AVX|AVX512_F+VL}.
+    kIdVmaxsd,                           //!< Instruction 'vmaxsd' {AVX|AVX512_F+VL}.
+    kIdVmaxsh,                           //!< Instruction 'vmaxsh' {AVX512_FP16}.
+    kIdVmaxss,                           //!< Instruction 'vmaxss' {AVX|AVX512_F+VL}.
+    kIdVmcall,                           //!< Instruction 'vmcall' {VMX}.
+    kIdVmclear,                          //!< Instruction 'vmclear' {VMX}.
+    kIdVmfunc,                           //!< Instruction 'vmfunc' {VMX}.
+    kIdVminpd,                           //!< Instruction 'vminpd' {AVX|AVX512_F+VL}.
+    kIdVminph,                           //!< Instruction 'vminph' {AVX512_FP16+VL}.
+    kIdVminps,                           //!< Instruction 'vminps' {AVX|AVX512_F+VL}.
+    kIdVminsd,                           //!< Instruction 'vminsd' {AVX|AVX512_F+VL}.
+    kIdVminsh,                           //!< Instruction 'vminsh' {AVX512_FP16}.
+    kIdVminss,                           //!< Instruction 'vminss' {AVX|AVX512_F+VL}.
+    kIdVmlaunch,                         //!< Instruction 'vmlaunch' {VMX}.
+    kIdVmload,                           //!< Instruction 'vmload' {SVM}.
+    kIdVmmcall,                          //!< Instruction 'vmmcall' {SVM}.
+    kIdVmovapd,                          //!< Instruction 'vmovapd' {AVX|AVX512_F+VL}.
+    kIdVmovaps,                          //!< Instruction 'vmovaps' {AVX|AVX512_F+VL}.
+    kIdVmovd,                            //!< Instruction 'vmovd' {AVX|AVX512_F}.
+    kIdVmovddup,                         //!< Instruction 'vmovddup' {AVX|AVX512_F+VL}.
+    kIdVmovdqa,                          //!< Instruction 'vmovdqa' {AVX}.
+    kIdVmovdqa32,                        //!< Instruction 'vmovdqa32' {AVX512_F+VL}.
+    kIdVmovdqa64,                        //!< Instruction 'vmovdqa64' {AVX512_F+VL}.
+    kIdVmovdqu,                          //!< Instruction 'vmovdqu' {AVX}.
+    kIdVmovdqu16,                        //!< Instruction 'vmovdqu16' {AVX512_BW+VL}.
+    kIdVmovdqu32,                        //!< Instruction 'vmovdqu32' {AVX512_F+VL}.
+    kIdVmovdqu64,                        //!< Instruction 'vmovdqu64' {AVX512_F+VL}.
+    kIdVmovdqu8,                         //!< Instruction 'vmovdqu8' {AVX512_BW+VL}.
+    kIdVmovhlps,                         //!< Instruction 'vmovhlps' {AVX|AVX512_F}.
+    kIdVmovhpd,                          //!< Instruction 'vmovhpd' {AVX|AVX512_F}.
+    kIdVmovhps,                          //!< Instruction 'vmovhps' {AVX|AVX512_F}.
+    kIdVmovlhps,                         //!< Instruction 'vmovlhps' {AVX|AVX512_F}.
+    kIdVmovlpd,                          //!< Instruction 'vmovlpd' {AVX|AVX512_F}.
+    kIdVmovlps,                          //!< Instruction 'vmovlps' {AVX|AVX512_F}.
+    kIdVmovmskpd,                        //!< Instruction 'vmovmskpd' {AVX}.
+    kIdVmovmskps,                        //!< Instruction 'vmovmskps' {AVX}.
+    kIdVmovntdq,                         //!< Instruction 'vmovntdq' {AVX|AVX512_F+VL}.
+    kIdVmovntdqa,                        //!< Instruction 'vmovntdqa' {AVX|AVX2|AVX512_F+VL}.
+    kIdVmovntpd,                         //!< Instruction 'vmovntpd' {AVX|AVX512_F+VL}.
+    kIdVmovntps,                         //!< Instruction 'vmovntps' {AVX|AVX512_F+VL}.
+    kIdVmovq,                            //!< Instruction 'vmovq' {AVX|AVX512_F}.
+    kIdVmovsd,                           //!< Instruction 'vmovsd' {AVX|AVX512_F}.
+    kIdVmovsh,                           //!< Instruction 'vmovsh' {AVX512_FP16}.
+    kIdVmovshdup,                        //!< Instruction 'vmovshdup' {AVX|AVX512_F+VL}.
+    kIdVmovsldup,                        //!< Instruction 'vmovsldup' {AVX|AVX512_F+VL}.
+    kIdVmovss,                           //!< Instruction 'vmovss' {AVX|AVX512_F}.
+    kIdVmovupd,                          //!< Instruction 'vmovupd' {AVX|AVX512_F+VL}.
+    kIdVmovups,                          //!< Instruction 'vmovups' {AVX|AVX512_F+VL}.
+    kIdVmovw,                            //!< Instruction 'vmovw' {AVX512_FP16}.
+    kIdVmpsadbw,                         //!< Instruction 'vmpsadbw' {AVX|AVX2}.
+    kIdVmptrld,                          //!< Instruction 'vmptrld' {VMX}.
+    kIdVmptrst,                          //!< Instruction 'vmptrst' {VMX}.
+    kIdVmread,                           //!< Instruction 'vmread' {VMX}.
+    kIdVmresume,                         //!< Instruction 'vmresume' {VMX}.
+    kIdVmrun,                            //!< Instruction 'vmrun' {SVM}.
+    kIdVmsave,                           //!< Instruction 'vmsave' {SVM}.
+    kIdVmulpd,                           //!< Instruction 'vmulpd' {AVX|AVX512_F+VL}.
+    kIdVmulph,                           //!< Instruction 'vmulph' {AVX512_FP16+VL}.
+    kIdVmulps,                           //!< Instruction 'vmulps' {AVX|AVX512_F+VL}.
+    kIdVmulsd,                           //!< Instruction 'vmulsd' {AVX|AVX512_F}.
+    kIdVmulsh,                           //!< Instruction 'vmulsh' {AVX512_FP16}.
+    kIdVmulss,                           //!< Instruction 'vmulss' {AVX|AVX512_F}.
+    kIdVmwrite,                          //!< Instruction 'vmwrite' {VMX}.
+    kIdVmxon,                            //!< Instruction 'vmxon' {VMX}.
+    kIdVorpd,                            //!< Instruction 'vorpd' {AVX|AVX512_DQ+VL}.
+    kIdVorps,                            //!< Instruction 'vorps' {AVX|AVX512_DQ+VL}.
+    kIdVp2intersectd,                    //!< Instruction 'vp2intersectd' {AVX512_VP2INTERSECT}.
+    kIdVp2intersectq,                    //!< Instruction 'vp2intersectq' {AVX512_VP2INTERSECT}.
+    kIdVp4dpwssd,                        //!< Instruction 'vp4dpwssd' {AVX512_4VNNIW}.
+    kIdVp4dpwssds,                       //!< Instruction 'vp4dpwssds' {AVX512_4VNNIW}.
+    kIdVpabsb,                           //!< Instruction 'vpabsb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpabsd,                           //!< Instruction 'vpabsd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpabsq,                           //!< Instruction 'vpabsq' {AVX512_F+VL}.
+    kIdVpabsw,                           //!< Instruction 'vpabsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpackssdw,                        //!< Instruction 'vpackssdw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpacksswb,                        //!< Instruction 'vpacksswb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpackusdw,                        //!< Instruction 'vpackusdw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpackuswb,                        //!< Instruction 'vpackuswb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpaddb,                           //!< Instruction 'vpaddb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpaddd,                           //!< Instruction 'vpaddd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpaddq,                           //!< Instruction 'vpaddq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpaddsb,                          //!< Instruction 'vpaddsb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpaddsw,                          //!< Instruction 'vpaddsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpaddusb,                         //!< Instruction 'vpaddusb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpaddusw,                         //!< Instruction 'vpaddusw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpaddw,                           //!< Instruction 'vpaddw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpalignr,                         //!< Instruction 'vpalignr' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpand,                            //!< Instruction 'vpand' {AVX|AVX2}.
+    kIdVpandd,                           //!< Instruction 'vpandd' {AVX512_F+VL}.
+    kIdVpandn,                           //!< Instruction 'vpandn' {AVX|AVX2}.
+    kIdVpandnd,                          //!< Instruction 'vpandnd' {AVX512_F+VL}.
+    kIdVpandnq,                          //!< Instruction 'vpandnq' {AVX512_F+VL}.
+    kIdVpandq,                           //!< Instruction 'vpandq' {AVX512_F+VL}.
+    kIdVpavgb,                           //!< Instruction 'vpavgb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpavgw,                           //!< Instruction 'vpavgw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpblendd,                         //!< Instruction 'vpblendd' {AVX2}.
+    kIdVpblendmb,                        //!< Instruction 'vpblendmb' {AVX512_BW+VL}.
+    kIdVpblendmd,                        //!< Instruction 'vpblendmd' {AVX512_F+VL}.
+    kIdVpblendmq,                        //!< Instruction 'vpblendmq' {AVX512_F+VL}.
+    kIdVpblendmw,                        //!< Instruction 'vpblendmw' {AVX512_BW+VL}.
+    kIdVpblendvb,                        //!< Instruction 'vpblendvb' {AVX|AVX2}.
+    kIdVpblendw,                         //!< Instruction 'vpblendw' {AVX|AVX2}.
+    kIdVpbroadcastb,                     //!< Instruction 'vpbroadcastb' {AVX2|AVX512_BW+VL}.
+    kIdVpbroadcastd,                     //!< Instruction 'vpbroadcastd' {AVX2|AVX512_F+VL}.
+    kIdVpbroadcastmb2q,                  //!< Instruction 'vpbroadcastmb2q' {AVX512_CDI+VL}.
+    kIdVpbroadcastmw2d,                  //!< Instruction 'vpbroadcastmw2d' {AVX512_CDI+VL}.
+    kIdVpbroadcastq,                     //!< Instruction 'vpbroadcastq' {AVX2|AVX512_F+VL}.
+    kIdVpbroadcastw,                     //!< Instruction 'vpbroadcastw' {AVX2|AVX512_BW+VL}.
+    kIdVpclmulqdq,                       //!< Instruction 'vpclmulqdq' {AVX|AVX512_F+VL & PCLMULQDQ|VPCLMULQDQ}.
+    kIdVpcmov,                           //!< Instruction 'vpcmov' {XOP}.
+    kIdVpcmpb,                           //!< Instruction 'vpcmpb' {AVX512_BW+VL}.
+    kIdVpcmpd,                           //!< Instruction 'vpcmpd' {AVX512_F+VL}.
+    kIdVpcmpeqb,                         //!< Instruction 'vpcmpeqb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpcmpeqd,                         //!< Instruction 'vpcmpeqd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpcmpeqq,                         //!< Instruction 'vpcmpeqq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpcmpeqw,                         //!< Instruction 'vpcmpeqw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpcmpestri,                       //!< Instruction 'vpcmpestri' {AVX}.
+    kIdVpcmpestrm,                       //!< Instruction 'vpcmpestrm' {AVX}.
+    kIdVpcmpgtb,                         //!< Instruction 'vpcmpgtb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpcmpgtd,                         //!< Instruction 'vpcmpgtd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpcmpgtq,                         //!< Instruction 'vpcmpgtq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpcmpgtw,                         //!< Instruction 'vpcmpgtw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpcmpistri,                       //!< Instruction 'vpcmpistri' {AVX}.
+    kIdVpcmpistrm,                       //!< Instruction 'vpcmpistrm' {AVX}.
+    kIdVpcmpq,                           //!< Instruction 'vpcmpq' {AVX512_F+VL}.
+    kIdVpcmpub,                          //!< Instruction 'vpcmpub' {AVX512_BW+VL}.
+    kIdVpcmpud,                          //!< Instruction 'vpcmpud' {AVX512_F+VL}.
+    kIdVpcmpuq,                          //!< Instruction 'vpcmpuq' {AVX512_F+VL}.
+    kIdVpcmpuw,                          //!< Instruction 'vpcmpuw' {AVX512_BW+VL}.
+    kIdVpcmpw,                           //!< Instruction 'vpcmpw' {AVX512_BW+VL}.
+    kIdVpcomb,                           //!< Instruction 'vpcomb' {XOP}.
+    kIdVpcomd,                           //!< Instruction 'vpcomd' {XOP}.
+    kIdVpcompressb,                      //!< Instruction 'vpcompressb' {AVX512_VBMI2+VL}.
+    kIdVpcompressd,                      //!< Instruction 'vpcompressd' {AVX512_F+VL}.
+    kIdVpcompressq,                      //!< Instruction 'vpcompressq' {AVX512_F+VL}.
+    kIdVpcompressw,                      //!< Instruction 'vpcompressw' {AVX512_VBMI2+VL}.
+    kIdVpcomq,                           //!< Instruction 'vpcomq' {XOP}.
+    kIdVpcomub,                          //!< Instruction 'vpcomub' {XOP}.
+    kIdVpcomud,                          //!< Instruction 'vpcomud' {XOP}.
+    kIdVpcomuq,                          //!< Instruction 'vpcomuq' {XOP}.
+    kIdVpcomuw,                          //!< Instruction 'vpcomuw' {XOP}.
+    kIdVpcomw,                           //!< Instruction 'vpcomw' {XOP}.
+    kIdVpconflictd,                      //!< Instruction 'vpconflictd' {AVX512_CDI+VL}.
+    kIdVpconflictq,                      //!< Instruction 'vpconflictq' {AVX512_CDI+VL}.
+    kIdVpdpbusd,                         //!< Instruction 'vpdpbusd' {AVX_VNNI|AVX512_VNNI+VL}.
+    kIdVpdpbusds,                        //!< Instruction 'vpdpbusds' {AVX_VNNI|AVX512_VNNI+VL}.
+    kIdVpdpwssd,                         //!< Instruction 'vpdpwssd' {AVX_VNNI|AVX512_VNNI+VL}.
+    kIdVpdpwssds,                        //!< Instruction 'vpdpwssds' {AVX_VNNI|AVX512_VNNI+VL}.
+    kIdVperm2f128,                       //!< Instruction 'vperm2f128' {AVX}.
+    kIdVperm2i128,                       //!< Instruction 'vperm2i128' {AVX2}.
+    kIdVpermb,                           //!< Instruction 'vpermb' {AVX512_VBMI+VL}.
+    kIdVpermd,                           //!< Instruction 'vpermd' {AVX2|AVX512_F+VL}.
+    kIdVpermi2b,                         //!< Instruction 'vpermi2b' {AVX512_VBMI+VL}.
+    kIdVpermi2d,                         //!< Instruction 'vpermi2d' {AVX512_F+VL}.
+    kIdVpermi2pd,                        //!< Instruction 'vpermi2pd' {AVX512_F+VL}.
+    kIdVpermi2ps,                        //!< Instruction 'vpermi2ps' {AVX512_F+VL}.
+    kIdVpermi2q,                         //!< Instruction 'vpermi2q' {AVX512_F+VL}.
+    kIdVpermi2w,                         //!< Instruction 'vpermi2w' {AVX512_BW+VL}.
+    kIdVpermil2pd,                       //!< Instruction 'vpermil2pd' {XOP}.
+    kIdVpermil2ps,                       //!< Instruction 'vpermil2ps' {XOP}.
+    kIdVpermilpd,                        //!< Instruction 'vpermilpd' {AVX|AVX512_F+VL}.
+    kIdVpermilps,                        //!< Instruction 'vpermilps' {AVX|AVX512_F+VL}.
+    kIdVpermpd,                          //!< Instruction 'vpermpd' {AVX2|AVX512_F+VL}.
+    kIdVpermps,                          //!< Instruction 'vpermps' {AVX2|AVX512_F+VL}.
+    kIdVpermq,                           //!< Instruction 'vpermq' {AVX2|AVX512_F+VL}.
+    kIdVpermt2b,                         //!< Instruction 'vpermt2b' {AVX512_VBMI+VL}.
+    kIdVpermt2d,                         //!< Instruction 'vpermt2d' {AVX512_F+VL}.
+    kIdVpermt2pd,                        //!< Instruction 'vpermt2pd' {AVX512_F+VL}.
+    kIdVpermt2ps,                        //!< Instruction 'vpermt2ps' {AVX512_F+VL}.
+    kIdVpermt2q,                         //!< Instruction 'vpermt2q' {AVX512_F+VL}.
+    kIdVpermt2w,                         //!< Instruction 'vpermt2w' {AVX512_BW+VL}.
+    kIdVpermw,                           //!< Instruction 'vpermw' {AVX512_BW+VL}.
+    kIdVpexpandb,                        //!< Instruction 'vpexpandb' {AVX512_VBMI2+VL}.
+    kIdVpexpandd,                        //!< Instruction 'vpexpandd' {AVX512_F+VL}.
+    kIdVpexpandq,                        //!< Instruction 'vpexpandq' {AVX512_F+VL}.
+    kIdVpexpandw,                        //!< Instruction 'vpexpandw' {AVX512_VBMI2+VL}.
+    kIdVpextrb,                          //!< Instruction 'vpextrb' {AVX|AVX512_BW}.
+    kIdVpextrd,                          //!< Instruction 'vpextrd' {AVX|AVX512_DQ}.
+    kIdVpextrq,                          //!< Instruction 'vpextrq' {AVX|AVX512_DQ} (X64).
+    kIdVpextrw,                          //!< Instruction 'vpextrw' {AVX|AVX512_BW}.
+    kIdVpgatherdd,                       //!< Instruction 'vpgatherdd' {AVX2|AVX512_F+VL}.
+    kIdVpgatherdq,                       //!< Instruction 'vpgatherdq' {AVX2|AVX512_F+VL}.
+    kIdVpgatherqd,                       //!< Instruction 'vpgatherqd' {AVX2|AVX512_F+VL}.
+    kIdVpgatherqq,                       //!< Instruction 'vpgatherqq' {AVX2|AVX512_F+VL}.
+    kIdVphaddbd,                         //!< Instruction 'vphaddbd' {XOP}.
+    kIdVphaddbq,                         //!< Instruction 'vphaddbq' {XOP}.
+    kIdVphaddbw,                         //!< Instruction 'vphaddbw' {XOP}.
+    kIdVphaddd,                          //!< Instruction 'vphaddd' {AVX|AVX2}.
+    kIdVphadddq,                         //!< Instruction 'vphadddq' {XOP}.
+    kIdVphaddsw,                         //!< Instruction 'vphaddsw' {AVX|AVX2}.
+    kIdVphaddubd,                        //!< Instruction 'vphaddubd' {XOP}.
+    kIdVphaddubq,                        //!< Instruction 'vphaddubq' {XOP}.
+    kIdVphaddubw,                        //!< Instruction 'vphaddubw' {XOP}.
+    kIdVphaddudq,                        //!< Instruction 'vphaddudq' {XOP}.
+    kIdVphadduwd,                        //!< Instruction 'vphadduwd' {XOP}.
+    kIdVphadduwq,                        //!< Instruction 'vphadduwq' {XOP}.
+    kIdVphaddw,                          //!< Instruction 'vphaddw' {AVX|AVX2}.
+    kIdVphaddwd,                         //!< Instruction 'vphaddwd' {XOP}.
+    kIdVphaddwq,                         //!< Instruction 'vphaddwq' {XOP}.
+    kIdVphminposuw,                      //!< Instruction 'vphminposuw' {AVX}.
+    kIdVphsubbw,                         //!< Instruction 'vphsubbw' {XOP}.
+    kIdVphsubd,                          //!< Instruction 'vphsubd' {AVX|AVX2}.
+    kIdVphsubdq,                         //!< Instruction 'vphsubdq' {XOP}.
+    kIdVphsubsw,                         //!< Instruction 'vphsubsw' {AVX|AVX2}.
+    kIdVphsubw,                          //!< Instruction 'vphsubw' {AVX|AVX2}.
+    kIdVphsubwd,                         //!< Instruction 'vphsubwd' {XOP}.
+    kIdVpinsrb,                          //!< Instruction 'vpinsrb' {AVX|AVX512_BW}.
+    kIdVpinsrd,                          //!< Instruction 'vpinsrd' {AVX|AVX512_DQ}.
+    kIdVpinsrq,                          //!< Instruction 'vpinsrq' {AVX|AVX512_DQ} (X64).
+    kIdVpinsrw,                          //!< Instruction 'vpinsrw' {AVX|AVX512_BW}.
+    kIdVplzcntd,                         //!< Instruction 'vplzcntd' {AVX512_CDI+VL}.
+    kIdVplzcntq,                         //!< Instruction 'vplzcntq' {AVX512_CDI+VL}.
+    kIdVpmacsdd,                         //!< Instruction 'vpmacsdd' {XOP}.
+    kIdVpmacsdqh,                        //!< Instruction 'vpmacsdqh' {XOP}.
+    kIdVpmacsdql,                        //!< Instruction 'vpmacsdql' {XOP}.
+    kIdVpmacssdd,                        //!< Instruction 'vpmacssdd' {XOP}.
+    kIdVpmacssdqh,                       //!< Instruction 'vpmacssdqh' {XOP}.
+    kIdVpmacssdql,                       //!< Instruction 'vpmacssdql' {XOP}.
+    kIdVpmacsswd,                        //!< Instruction 'vpmacsswd' {XOP}.
+    kIdVpmacssww,                        //!< Instruction 'vpmacssww' {XOP}.
+    kIdVpmacswd,                         //!< Instruction 'vpmacswd' {XOP}.
+    kIdVpmacsww,                         //!< Instruction 'vpmacsww' {XOP}.
+    kIdVpmadcsswd,                       //!< Instruction 'vpmadcsswd' {XOP}.
+    kIdVpmadcswd,                        //!< Instruction 'vpmadcswd' {XOP}.
+    kIdVpmadd52huq,                      //!< Instruction 'vpmadd52huq' {AVX512_IFMA+VL}.
+    kIdVpmadd52luq,                      //!< Instruction 'vpmadd52luq' {AVX512_IFMA+VL}.
+    kIdVpmaddubsw,                       //!< Instruction 'vpmaddubsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmaddwd,                         //!< Instruction 'vpmaddwd' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmaskmovd,                       //!< Instruction 'vpmaskmovd' {AVX2}.
+    kIdVpmaskmovq,                       //!< Instruction 'vpmaskmovq' {AVX2}.
+    kIdVpmaxsb,                          //!< Instruction 'vpmaxsb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmaxsd,                          //!< Instruction 'vpmaxsd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmaxsq,                          //!< Instruction 'vpmaxsq' {AVX512_F+VL}.
+    kIdVpmaxsw,                          //!< Instruction 'vpmaxsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmaxub,                          //!< Instruction 'vpmaxub' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmaxud,                          //!< Instruction 'vpmaxud' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmaxuq,                          //!< Instruction 'vpmaxuq' {AVX512_F+VL}.
+    kIdVpmaxuw,                          //!< Instruction 'vpmaxuw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpminsb,                          //!< Instruction 'vpminsb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpminsd,                          //!< Instruction 'vpminsd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpminsq,                          //!< Instruction 'vpminsq' {AVX512_F+VL}.
+    kIdVpminsw,                          //!< Instruction 'vpminsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpminub,                          //!< Instruction 'vpminub' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpminud,                          //!< Instruction 'vpminud' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpminuq,                          //!< Instruction 'vpminuq' {AVX512_F+VL}.
+    kIdVpminuw,                          //!< Instruction 'vpminuw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmovb2m,                         //!< Instruction 'vpmovb2m' {AVX512_BW+VL}.
+    kIdVpmovd2m,                         //!< Instruction 'vpmovd2m' {AVX512_DQ+VL}.
+    kIdVpmovdb,                          //!< Instruction 'vpmovdb' {AVX512_F+VL}.
+    kIdVpmovdw,                          //!< Instruction 'vpmovdw' {AVX512_F+VL}.
+    kIdVpmovm2b,                         //!< Instruction 'vpmovm2b' {AVX512_BW+VL}.
+    kIdVpmovm2d,                         //!< Instruction 'vpmovm2d' {AVX512_DQ+VL}.
+    kIdVpmovm2q,                         //!< Instruction 'vpmovm2q' {AVX512_DQ+VL}.
+    kIdVpmovm2w,                         //!< Instruction 'vpmovm2w' {AVX512_BW+VL}.
+    kIdVpmovmskb,                        //!< Instruction 'vpmovmskb' {AVX|AVX2}.
+    kIdVpmovq2m,                         //!< Instruction 'vpmovq2m' {AVX512_DQ+VL}.
+    kIdVpmovqb,                          //!< Instruction 'vpmovqb' {AVX512_F+VL}.
+    kIdVpmovqd,                          //!< Instruction 'vpmovqd' {AVX512_F+VL}.
+    kIdVpmovqw,                          //!< Instruction 'vpmovqw' {AVX512_F+VL}.
+    kIdVpmovsdb,                         //!< Instruction 'vpmovsdb' {AVX512_F+VL}.
+    kIdVpmovsdw,                         //!< Instruction 'vpmovsdw' {AVX512_F+VL}.
+    kIdVpmovsqb,                         //!< Instruction 'vpmovsqb' {AVX512_F+VL}.
+    kIdVpmovsqd,                         //!< Instruction 'vpmovsqd' {AVX512_F+VL}.
+    kIdVpmovsqw,                         //!< Instruction 'vpmovsqw' {AVX512_F+VL}.
+    kIdVpmovswb,                         //!< Instruction 'vpmovswb' {AVX512_BW+VL}.
+    kIdVpmovsxbd,                        //!< Instruction 'vpmovsxbd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovsxbq,                        //!< Instruction 'vpmovsxbq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovsxbw,                        //!< Instruction 'vpmovsxbw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmovsxdq,                        //!< Instruction 'vpmovsxdq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovsxwd,                        //!< Instruction 'vpmovsxwd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovsxwq,                        //!< Instruction 'vpmovsxwq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovusdb,                        //!< Instruction 'vpmovusdb' {AVX512_F+VL}.
+    kIdVpmovusdw,                        //!< Instruction 'vpmovusdw' {AVX512_F+VL}.
+    kIdVpmovusqb,                        //!< Instruction 'vpmovusqb' {AVX512_F+VL}.
+    kIdVpmovusqd,                        //!< Instruction 'vpmovusqd' {AVX512_F+VL}.
+    kIdVpmovusqw,                        //!< Instruction 'vpmovusqw' {AVX512_F+VL}.
+    kIdVpmovuswb,                        //!< Instruction 'vpmovuswb' {AVX512_BW+VL}.
+    kIdVpmovw2m,                         //!< Instruction 'vpmovw2m' {AVX512_BW+VL}.
+    kIdVpmovwb,                          //!< Instruction 'vpmovwb' {AVX512_BW+VL}.
+    kIdVpmovzxbd,                        //!< Instruction 'vpmovzxbd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovzxbq,                        //!< Instruction 'vpmovzxbq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovzxbw,                        //!< Instruction 'vpmovzxbw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmovzxdq,                        //!< Instruction 'vpmovzxdq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovzxwd,                        //!< Instruction 'vpmovzxwd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmovzxwq,                        //!< Instruction 'vpmovzxwq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmuldq,                          //!< Instruction 'vpmuldq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmulhrsw,                        //!< Instruction 'vpmulhrsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmulhuw,                         //!< Instruction 'vpmulhuw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmulhw,                          //!< Instruction 'vpmulhw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmulld,                          //!< Instruction 'vpmulld' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpmullq,                          //!< Instruction 'vpmullq' {AVX512_DQ+VL}.
+    kIdVpmullw,                          //!< Instruction 'vpmullw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpmultishiftqb,                   //!< Instruction 'vpmultishiftqb' {AVX512_VBMI+VL}.
+    kIdVpmuludq,                         //!< Instruction 'vpmuludq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpopcntb,                         //!< Instruction 'vpopcntb' {AVX512_BITALG+VL}.
+    kIdVpopcntd,                         //!< Instruction 'vpopcntd' {AVX512_VPOPCNTDQ+VL}.
+    kIdVpopcntq,                         //!< Instruction 'vpopcntq' {AVX512_VPOPCNTDQ+VL}.
+    kIdVpopcntw,                         //!< Instruction 'vpopcntw' {AVX512_BITALG+VL}.
+    kIdVpor,                             //!< Instruction 'vpor' {AVX|AVX2}.
+    kIdVpord,                            //!< Instruction 'vpord' {AVX512_F+VL}.
+    kIdVporq,                            //!< Instruction 'vporq' {AVX512_F+VL}.
+    kIdVpperm,                           //!< Instruction 'vpperm' {XOP}.
+    kIdVprold,                           //!< Instruction 'vprold' {AVX512_F+VL}.
+    kIdVprolq,                           //!< Instruction 'vprolq' {AVX512_F+VL}.
+    kIdVprolvd,                          //!< Instruction 'vprolvd' {AVX512_F+VL}.
+    kIdVprolvq,                          //!< Instruction 'vprolvq' {AVX512_F+VL}.
+    kIdVprord,                           //!< Instruction 'vprord' {AVX512_F+VL}.
+    kIdVprorq,                           //!< Instruction 'vprorq' {AVX512_F+VL}.
+    kIdVprorvd,                          //!< Instruction 'vprorvd' {AVX512_F+VL}.
+    kIdVprorvq,                          //!< Instruction 'vprorvq' {AVX512_F+VL}.
+    kIdVprotb,                           //!< Instruction 'vprotb' {XOP}.
+    kIdVprotd,                           //!< Instruction 'vprotd' {XOP}.
+    kIdVprotq,                           //!< Instruction 'vprotq' {XOP}.
+    kIdVprotw,                           //!< Instruction 'vprotw' {XOP}.
+    kIdVpsadbw,                          //!< Instruction 'vpsadbw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpscatterdd,                      //!< Instruction 'vpscatterdd' {AVX512_F+VL}.
+    kIdVpscatterdq,                      //!< Instruction 'vpscatterdq' {AVX512_F+VL}.
+    kIdVpscatterqd,                      //!< Instruction 'vpscatterqd' {AVX512_F+VL}.
+    kIdVpscatterqq,                      //!< Instruction 'vpscatterqq' {AVX512_F+VL}.
+    kIdVpshab,                           //!< Instruction 'vpshab' {XOP}.
+    kIdVpshad,                           //!< Instruction 'vpshad' {XOP}.
+    kIdVpshaq,                           //!< Instruction 'vpshaq' {XOP}.
+    kIdVpshaw,                           //!< Instruction 'vpshaw' {XOP}.
+    kIdVpshlb,                           //!< Instruction 'vpshlb' {XOP}.
+    kIdVpshld,                           //!< Instruction 'vpshld' {XOP}.
+    kIdVpshldd,                          //!< Instruction 'vpshldd' {AVX512_VBMI2+VL}.
+    kIdVpshldq,                          //!< Instruction 'vpshldq' {AVX512_VBMI2+VL}.
+    kIdVpshldvd,                         //!< Instruction 'vpshldvd' {AVX512_VBMI2+VL}.
+    kIdVpshldvq,                         //!< Instruction 'vpshldvq' {AVX512_VBMI2+VL}.
+    kIdVpshldvw,                         //!< Instruction 'vpshldvw' {AVX512_VBMI2+VL}.
+    kIdVpshldw,                          //!< Instruction 'vpshldw' {AVX512_VBMI2+VL}.
+    kIdVpshlq,                           //!< Instruction 'vpshlq' {XOP}.
+    kIdVpshlw,                           //!< Instruction 'vpshlw' {XOP}.
+    kIdVpshrdd,                          //!< Instruction 'vpshrdd' {AVX512_VBMI2+VL}.
+    kIdVpshrdq,                          //!< Instruction 'vpshrdq' {AVX512_VBMI2+VL}.
+    kIdVpshrdvd,                         //!< Instruction 'vpshrdvd' {AVX512_VBMI2+VL}.
+    kIdVpshrdvq,                         //!< Instruction 'vpshrdvq' {AVX512_VBMI2+VL}.
+    kIdVpshrdvw,                         //!< Instruction 'vpshrdvw' {AVX512_VBMI2+VL}.
+    kIdVpshrdw,                          //!< Instruction 'vpshrdw' {AVX512_VBMI2+VL}.
+    kIdVpshufb,                          //!< Instruction 'vpshufb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpshufbitqmb,                     //!< Instruction 'vpshufbitqmb' {AVX512_BITALG+VL}.
+    kIdVpshufd,                          //!< Instruction 'vpshufd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpshufhw,                         //!< Instruction 'vpshufhw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpshuflw,                         //!< Instruction 'vpshuflw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsignb,                          //!< Instruction 'vpsignb' {AVX|AVX2}.
+    kIdVpsignd,                          //!< Instruction 'vpsignd' {AVX|AVX2}.
+    kIdVpsignw,                          //!< Instruction 'vpsignw' {AVX|AVX2}.
+    kIdVpslld,                           //!< Instruction 'vpslld' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpslldq,                          //!< Instruction 'vpslldq' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsllq,                           //!< Instruction 'vpsllq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpsllvd,                          //!< Instruction 'vpsllvd' {AVX2|AVX512_F+VL}.
+    kIdVpsllvq,                          //!< Instruction 'vpsllvq' {AVX2|AVX512_F+VL}.
+    kIdVpsllvw,                          //!< Instruction 'vpsllvw' {AVX512_BW+VL}.
+    kIdVpsllw,                           //!< Instruction 'vpsllw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsrad,                           //!< Instruction 'vpsrad' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpsraq,                           //!< Instruction 'vpsraq' {AVX512_F+VL}.
+    kIdVpsravd,                          //!< Instruction 'vpsravd' {AVX2|AVX512_F+VL}.
+    kIdVpsravq,                          //!< Instruction 'vpsravq' {AVX512_F+VL}.
+    kIdVpsravw,                          //!< Instruction 'vpsravw' {AVX512_BW+VL}.
+    kIdVpsraw,                           //!< Instruction 'vpsraw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsrld,                           //!< Instruction 'vpsrld' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpsrldq,                          //!< Instruction 'vpsrldq' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsrlq,                           //!< Instruction 'vpsrlq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpsrlvd,                          //!< Instruction 'vpsrlvd' {AVX2|AVX512_F+VL}.
+    kIdVpsrlvq,                          //!< Instruction 'vpsrlvq' {AVX2|AVX512_F+VL}.
+    kIdVpsrlvw,                          //!< Instruction 'vpsrlvw' {AVX512_BW+VL}.
+    kIdVpsrlw,                           //!< Instruction 'vpsrlw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsubb,                           //!< Instruction 'vpsubb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsubd,                           //!< Instruction 'vpsubd' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpsubq,                           //!< Instruction 'vpsubq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpsubsb,                          //!< Instruction 'vpsubsb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsubsw,                          //!< Instruction 'vpsubsw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsubusb,                         //!< Instruction 'vpsubusb' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsubusw,                         //!< Instruction 'vpsubusw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpsubw,                           //!< Instruction 'vpsubw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpternlogd,                       //!< Instruction 'vpternlogd' {AVX512_F+VL}.
+    kIdVpternlogq,                       //!< Instruction 'vpternlogq' {AVX512_F+VL}.
+    kIdVptest,                           //!< Instruction 'vptest' {AVX}.
+    kIdVptestmb,                         //!< Instruction 'vptestmb' {AVX512_BW+VL}.
+    kIdVptestmd,                         //!< Instruction 'vptestmd' {AVX512_F+VL}.
+    kIdVptestmq,                         //!< Instruction 'vptestmq' {AVX512_F+VL}.
+    kIdVptestmw,                         //!< Instruction 'vptestmw' {AVX512_BW+VL}.
+    kIdVptestnmb,                        //!< Instruction 'vptestnmb' {AVX512_BW+VL}.
+    kIdVptestnmd,                        //!< Instruction 'vptestnmd' {AVX512_F+VL}.
+    kIdVptestnmq,                        //!< Instruction 'vptestnmq' {AVX512_F+VL}.
+    kIdVptestnmw,                        //!< Instruction 'vptestnmw' {AVX512_BW+VL}.
+    kIdVpunpckhbw,                       //!< Instruction 'vpunpckhbw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpunpckhdq,                       //!< Instruction 'vpunpckhdq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpunpckhqdq,                      //!< Instruction 'vpunpckhqdq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpunpckhwd,                       //!< Instruction 'vpunpckhwd' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpunpcklbw,                       //!< Instruction 'vpunpcklbw' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpunpckldq,                       //!< Instruction 'vpunpckldq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpunpcklqdq,                      //!< Instruction 'vpunpcklqdq' {AVX|AVX2|AVX512_F+VL}.
+    kIdVpunpcklwd,                       //!< Instruction 'vpunpcklwd' {AVX|AVX2|AVX512_BW+VL}.
+    kIdVpxor,                            //!< Instruction 'vpxor' {AVX|AVX2}.
+    kIdVpxord,                           //!< Instruction 'vpxord' {AVX512_F+VL}.
+    kIdVpxorq,                           //!< Instruction 'vpxorq' {AVX512_F+VL}.
+    kIdVrangepd,                         //!< Instruction 'vrangepd' {AVX512_DQ+VL}.
+    kIdVrangeps,                         //!< Instruction 'vrangeps' {AVX512_DQ+VL}.
+    kIdVrangesd,                         //!< Instruction 'vrangesd' {AVX512_DQ}.
+    kIdVrangess,                         //!< Instruction 'vrangess' {AVX512_DQ}.
+    kIdVrcp14pd,                         //!< Instruction 'vrcp14pd' {AVX512_F+VL}.
+    kIdVrcp14ps,                         //!< Instruction 'vrcp14ps' {AVX512_F+VL}.
+    kIdVrcp14sd,                         //!< Instruction 'vrcp14sd' {AVX512_F}.
+    kIdVrcp14ss,                         //!< Instruction 'vrcp14ss' {AVX512_F}.
+    kIdVrcp28pd,                         //!< Instruction 'vrcp28pd' {AVX512_ERI}.
+    kIdVrcp28ps,                         //!< Instruction 'vrcp28ps' {AVX512_ERI}.
+    kIdVrcp28sd,                         //!< Instruction 'vrcp28sd' {AVX512_ERI}.
+    kIdVrcp28ss,                         //!< Instruction 'vrcp28ss' {AVX512_ERI}.
+    kIdVrcpph,                           //!< Instruction 'vrcpph' {AVX512_FP16}.
+    kIdVrcpps,                           //!< Instruction 'vrcpps' {AVX}.
+    kIdVrcpsh,                           //!< Instruction 'vrcpsh' {AVX512_FP16}.
+    kIdVrcpss,                           //!< Instruction 'vrcpss' {AVX}.
+    kIdVreducepd,                        //!< Instruction 'vreducepd' {AVX512_DQ+VL}.
+    kIdVreduceph,                        //!< Instruction 'vreduceph' {AVX512_FP16+VL}.
+    kIdVreduceps,                        //!< Instruction 'vreduceps' {AVX512_DQ+VL}.
+    kIdVreducesd,                        //!< Instruction 'vreducesd' {AVX512_DQ}.
+    kIdVreducesh,                        //!< Instruction 'vreducesh' {AVX512_FP16}.
+    kIdVreducess,                        //!< Instruction 'vreducess' {AVX512_DQ}.
+    kIdVrndscalepd,                      //!< Instruction 'vrndscalepd' {AVX512_F+VL}.
+    kIdVrndscaleph,                      //!< Instruction 'vrndscaleph' {AVX512_FP16+VL}.
+    kIdVrndscaleps,                      //!< Instruction 'vrndscaleps' {AVX512_F+VL}.
+    kIdVrndscalesd,                      //!< Instruction 'vrndscalesd' {AVX512_F}.
+    kIdVrndscalesh,                      //!< Instruction 'vrndscalesh' {AVX512_FP16}.
+    kIdVrndscaless,                      //!< Instruction 'vrndscaless' {AVX512_F}.
+    kIdVroundpd,                         //!< Instruction 'vroundpd' {AVX}.
+    kIdVroundps,                         //!< Instruction 'vroundps' {AVX}.
+    kIdVroundsd,                         //!< Instruction 'vroundsd' {AVX}.
+    kIdVroundss,                         //!< Instruction 'vroundss' {AVX}.
+    kIdVrsqrt14pd,                       //!< Instruction 'vrsqrt14pd' {AVX512_F+VL}.
+    kIdVrsqrt14ps,                       //!< Instruction 'vrsqrt14ps' {AVX512_F+VL}.
+    kIdVrsqrt14sd,                       //!< Instruction 'vrsqrt14sd' {AVX512_F}.
+    kIdVrsqrt14ss,                       //!< Instruction 'vrsqrt14ss' {AVX512_F}.
+    kIdVrsqrt28pd,                       //!< Instruction 'vrsqrt28pd' {AVX512_ERI}.
+    kIdVrsqrt28ps,                       //!< Instruction 'vrsqrt28ps' {AVX512_ERI}.
+    kIdVrsqrt28sd,                       //!< Instruction 'vrsqrt28sd' {AVX512_ERI}.
+    kIdVrsqrt28ss,                       //!< Instruction 'vrsqrt28ss' {AVX512_ERI}.
+    kIdVrsqrtph,                         //!< Instruction 'vrsqrtph' {AVX512_FP16+VL}.
+    kIdVrsqrtps,                         //!< Instruction 'vrsqrtps' {AVX}.
+    kIdVrsqrtsh,                         //!< Instruction 'vrsqrtsh' {AVX512_FP16}.
+    kIdVrsqrtss,                         //!< Instruction 'vrsqrtss' {AVX}.
+    kIdVscalefpd,                        //!< Instruction 'vscalefpd' {AVX512_F+VL}.
+    kIdVscalefph,                        //!< Instruction 'vscalefph' {AVX512_FP16+VL}.
+    kIdVscalefps,                        //!< Instruction 'vscalefps' {AVX512_F+VL}.
+    kIdVscalefsd,                        //!< Instruction 'vscalefsd' {AVX512_F}.
+    kIdVscalefsh,                        //!< Instruction 'vscalefsh' {AVX512_FP16}.
+    kIdVscalefss,                        //!< Instruction 'vscalefss' {AVX512_F}.
+    kIdVscatterdpd,                      //!< Instruction 'vscatterdpd' {AVX512_F+VL}.
+    kIdVscatterdps,                      //!< Instruction 'vscatterdps' {AVX512_F+VL}.
+    kIdVscatterpf0dpd,                   //!< Instruction 'vscatterpf0dpd' {AVX512_PFI}.
+    kIdVscatterpf0dps,                   //!< Instruction 'vscatterpf0dps' {AVX512_PFI}.
+    kIdVscatterpf0qpd,                   //!< Instruction 'vscatterpf0qpd' {AVX512_PFI}.
+    kIdVscatterpf0qps,                   //!< Instruction 'vscatterpf0qps' {AVX512_PFI}.
+    kIdVscatterpf1dpd,                   //!< Instruction 'vscatterpf1dpd' {AVX512_PFI}.
+    kIdVscatterpf1dps,                   //!< Instruction 'vscatterpf1dps' {AVX512_PFI}.
+    kIdVscatterpf1qpd,                   //!< Instruction 'vscatterpf1qpd' {AVX512_PFI}.
+    kIdVscatterpf1qps,                   //!< Instruction 'vscatterpf1qps' {AVX512_PFI}.
+    kIdVscatterqpd,                      //!< Instruction 'vscatterqpd' {AVX512_F+VL}.
+    kIdVscatterqps,                      //!< Instruction 'vscatterqps' {AVX512_F+VL}.
+    kIdVshuff32x4,                       //!< Instruction 'vshuff32x4' {AVX512_F+VL}.
+    kIdVshuff64x2,                       //!< Instruction 'vshuff64x2' {AVX512_F+VL}.
+    kIdVshufi32x4,                       //!< Instruction 'vshufi32x4' {AVX512_F+VL}.
+    kIdVshufi64x2,                       //!< Instruction 'vshufi64x2' {AVX512_F+VL}.
+    kIdVshufpd,                          //!< Instruction 'vshufpd' {AVX|AVX512_F+VL}.
+    kIdVshufps,                          //!< Instruction 'vshufps' {AVX|AVX512_F+VL}.
+    kIdVsqrtpd,                          //!< Instruction 'vsqrtpd' {AVX|AVX512_F+VL}.
+    kIdVsqrtph,                          //!< Instruction 'vsqrtph' {AVX512_FP16+VL}.
+    kIdVsqrtps,                          //!< Instruction 'vsqrtps' {AVX|AVX512_F+VL}.
+    kIdVsqrtsd,                          //!< Instruction 'vsqrtsd' {AVX|AVX512_F}.
+    kIdVsqrtsh,                          //!< Instruction 'vsqrtsh' {AVX512_FP16}.
+    kIdVsqrtss,                          //!< Instruction 'vsqrtss' {AVX|AVX512_F}.
+    kIdVstmxcsr,                         //!< Instruction 'vstmxcsr' {AVX}.
+    kIdVsubpd,                           //!< Instruction 'vsubpd' {AVX|AVX512_F+VL}.
+    kIdVsubph,                           //!< Instruction 'vsubph' {AVX512_FP16+VL}.
+    kIdVsubps,                           //!< Instruction 'vsubps' {AVX|AVX512_F+VL}.
+    kIdVsubsd,                           //!< Instruction 'vsubsd' {AVX|AVX512_F}.
+    kIdVsubsh,                           //!< Instruction 'vsubsh' {AVX512_FP16}.
+    kIdVsubss,                           //!< Instruction 'vsubss' {AVX|AVX512_F}.
+    kIdVtestpd,                          //!< Instruction 'vtestpd' {AVX}.
+    kIdVtestps,                          //!< Instruction 'vtestps' {AVX}.
+    kIdVucomisd,                         //!< Instruction 'vucomisd' {AVX|AVX512_F}.
+    kIdVucomish,                         //!< Instruction 'vucomish' {AVX512_FP16}.
+    kIdVucomiss,                         //!< Instruction 'vucomiss' {AVX|AVX512_F}.
+    kIdVunpckhpd,                        //!< Instruction 'vunpckhpd' {AVX|AVX512_F+VL}.
+    kIdVunpckhps,                        //!< Instruction 'vunpckhps' {AVX|AVX512_F+VL}.
+    kIdVunpcklpd,                        //!< Instruction 'vunpcklpd' {AVX|AVX512_F+VL}.
+    kIdVunpcklps,                        //!< Instruction 'vunpcklps' {AVX|AVX512_F+VL}.
+    kIdVxorpd,                           //!< Instruction 'vxorpd' {AVX|AVX512_DQ+VL}.
+    kIdVxorps,                           //!< Instruction 'vxorps' {AVX|AVX512_DQ+VL}.
+    kIdVzeroall,                         //!< Instruction 'vzeroall' {AVX}.
+    kIdVzeroupper,                       //!< Instruction 'vzeroupper' {AVX}.
+    kIdWbinvd,                           //!< Instruction 'wbinvd'.
+    kIdWbnoinvd,                         //!< Instruction 'wbnoinvd' {WBNOINVD}.
+    kIdWrfsbase,                         //!< Instruction 'wrfsbase' {FSGSBASE} (X64).
+    kIdWrgsbase,                         //!< Instruction 'wrgsbase' {FSGSBASE} (X64).
+    kIdWrmsr,                            //!< Instruction 'wrmsr' {MSR}.
+    kIdWrssd,                            //!< Instruction 'wrssd' {CET_SS}.
+    kIdWrssq,                            //!< Instruction 'wrssq' {CET_SS} (X64).
+    kIdWrussd,                           //!< Instruction 'wrussd' {CET_SS}.
+    kIdWrussq,                           //!< Instruction 'wrussq' {CET_SS} (X64).
+    kIdXabort,                           //!< Instruction 'xabort' {RTM}.
+    kIdXadd,                             //!< Instruction 'xadd' {I486}.
+    kIdXbegin,                           //!< Instruction 'xbegin' {RTM}.
+    kIdXchg,                             //!< Instruction 'xchg'.
+    kIdXend,                             //!< Instruction 'xend' {RTM}.
+    kIdXgetbv,                           //!< Instruction 'xgetbv' {XSAVE}.
+    kIdXlatb,                            //!< Instruction 'xlatb'.
+    kIdXor,                              //!< Instruction 'xor'.
+    kIdXorpd,                            //!< Instruction 'xorpd' {SSE2}.
+    kIdXorps,                            //!< Instruction 'xorps' {SSE}.
+    kIdXresldtrk,                        //!< Instruction 'xresldtrk' {TSXLDTRK}.
+    kIdXrstor,                           //!< Instruction 'xrstor' {XSAVE}.
+    kIdXrstor64,                         //!< Instruction 'xrstor64' {XSAVE} (X64).
+    kIdXrstors,                          //!< Instruction 'xrstors' {XSAVES}.
+    kIdXrstors64,                        //!< Instruction 'xrstors64' {XSAVES} (X64).
+    kIdXsave,                            //!< Instruction 'xsave' {XSAVE}.
+    kIdXsave64,                          //!< Instruction 'xsave64' {XSAVE} (X64).
+    kIdXsavec,                           //!< Instruction 'xsavec' {XSAVEC}.
+    kIdXsavec64,                         //!< Instruction 'xsavec64' {XSAVEC} (X64).
+    kIdXsaveopt,                         //!< Instruction 'xsaveopt' {XSAVEOPT}.
+    kIdXsaveopt64,                       //!< Instruction 'xsaveopt64' {XSAVEOPT} (X64).
+    kIdXsaves,                           //!< Instruction 'xsaves' {XSAVES}.
+    kIdXsaves64,                         //!< Instruction 'xsaves64' {XSAVES} (X64).
+    kIdXsetbv,                           //!< Instruction 'xsetbv' {XSAVE}.
+    kIdXsusldtrk,                        //!< Instruction 'xsusldtrk' {TSXLDTRK}.
+    kIdXtest,                            //!< Instruction 'xtest' {TSX}.
+    _kIdCount
+    // ${InstId:End}
+  };
+
+  //! Tests whether the `instId` is defined.
+  static inline constexpr bool isDefinedId(InstId instId) noexcept { return instId < _kIdCount; }
+
+  //! \cond
+  #define ASMJIT_INST_FROM_COND(ID) \
+    ID##o, ID##no, ID##b , ID##ae,  \
+    ID##e, ID##ne, ID##be, ID##a ,  \
+    ID##s, ID##ns, ID##pe, ID##po,  \
+    ID##l, ID##ge, ID##le, ID##g
+
+    static constexpr uint16_t _jccTable[] = { ASMJIT_INST_FROM_COND(Inst::kIdJ) };
+    static constexpr uint16_t _setccTable[] = { ASMJIT_INST_FROM_COND(Inst::kIdSet) };
+    static constexpr uint16_t _cmovccTable[] = { ASMJIT_INST_FROM_COND(Inst::kIdCmov) };
+
+  #undef ASMJIT_INST_FROM_COND
+  //! \endcond
+
+  //! Translates a condition code `cond` to a `jcc` instruction id.
+  static constexpr InstId jccFromCond(CondCode cond) noexcept { return _jccTable[uint8_t(cond)]; }
+  //! Translates a condition code `cond` to a `setcc` instruction id.
+  static constexpr InstId setccFromCond(CondCode cond) noexcept { return _setccTable[uint8_t(cond)]; }
+  //! Translates a condition code `cond` to a `cmovcc` instruction id.
+  static constexpr InstId cmovccFromCond(CondCode cond) noexcept { return _cmovccTable[uint8_t(cond)]; }
+} // {Inst}
+
+//! FPU status word bits.
+enum class FpuStatusWord : uint16_t {
+  kNone          = 0x0000u,     //!< No bits set.
+
+  kInvalid       = 0x0001u,     //!< Invalid operation.
+  kDenormalized  = 0x0002u,     //!< Denormalized operand.
+  kDivByZero     = 0x0004u,     //!< Division by zero.
+  kOverflow      = 0x0008u,     //!< Overflown.
+  kUnderflow     = 0x0010u,     //!< Underflown.
+  kPrecision     = 0x0020u,     //!< Precision lost.
+  kStackFault    = 0x0040u,     //!< Stack fault.
+  kInterrupt     = 0x0080u,     //!< Interrupt.
+  kC0            = 0x0100u,     //!< C0 flag.
+  kC1            = 0x0200u,     //!< C1 flag.
+  kC2            = 0x0400u,     //!< C2 flag.
+  kTopMask       = 0x3800u,     //!< Top of the stack (mask).
+  kC3            = 0x4000u,     //!< C3 flag.
+  kBusy          = 0x8000u      //!< FPU is busy.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(FpuStatusWord)
+
+//! FPU control word bits.
+enum class FpuControlWord : uint16_t {
+  kNone          = 0x0000u,     //!< No bits set.
+
+  // Bits 0-5
+  // --------
+
+  kEM_Mask       = 0x003Fu,     //!< Exception mask (0x3F).
+  kEM_Invalid    = 0x0001u,     //!< Invalid operation exception.
+  kEM_Denormal   = 0x0002u,     //!< Denormalized operand exception.
+  kEM_DivByZero  = 0x0004u,     //!< Division by zero exception.
+  kEM_Overflow   = 0x0008u,     //!< Overflow exception.
+  kEM_Underflow  = 0x0010u,     //!< Underflow exception.
+  kEM_Inexact    = 0x0020u,     //!< Inexact operation exception.
+
+  // Bits 8-9
+  // --------
+
+  kPC_Mask       = 0x0300u,     //!< Precision control mask.
+  kPC_Float      = 0x0000u,     //!< Single precision (24 bits).
+  kPC_Reserved   = 0x0100u,     //!< Reserved.
+  kPC_Double     = 0x0200u,     //!< Double precision (53 bits).
+  kPC_Extended   = 0x0300u,     //!< Extended precision (64 bits).
+
+  // Bits 10-11
+  // ----------
+
+  kRC_Mask       = 0x0C00u,     //!< Rounding control mask.
+  kRC_Nearest    = 0x0000u,     //!< Round to nearest even.
+  kRC_Down       = 0x0400u,     //!< Round down (floor).
+  kRC_Up         = 0x0800u,     //!< Round up (ceil).
+  kRC_Truncate   = 0x0C00u,     //!< Round towards zero (truncate).
+
+  // Bit 12
+  // ------
+
+  kIC_Mask       = 0x1000u,     //!< Infinity control.
+  kIC_Projective = 0x0000u,     //!< Projective (not supported on X64).
+  kIC_Affine     = 0x1000u      //!< Affine (default).
+};
+ASMJIT_DEFINE_ENUM_FLAGS(FpuControlWord)
+
+//! An immediate value that can be used with CMP[PD|PS|SD|SS] instructions.
+enum class CmpImm : uint8_t {
+  kEQ            = 0x00u,       //!< Equal (Quiet), same as \ref VCmpImm::kEQ_OQ.
+  kLT            = 0x01u,       //!< Less (Signaling), same as \ref VCmpImm::kLT_OS.
+  kLE            = 0x02u,       //!< Less/Equal (Signaling), same as \ref VCmpImm::kLE_OS.
+  kUNORD         = 0x03u,       //!< Unordered (Quiet), same as \ref VCmpImm::kUNORD_Q.
+  kNEQ           = 0x04u,       //!< Not Equal (Quiet), same as \ref VCmpImm::kNEQ_UQ.
+  kNLT           = 0x05u,       //!< Not Less (Signaling), same as \ref VCmpImm::kNLT_US.
+  kNLE           = 0x06u,       //!< Not Less/Equal (Signaling), same as \ref VCmpImm::kNLE_US.
+  kORD           = 0x07u        //!< Ordered (Quiet), same as \ref VCmpImm::kORD_Q.
+};
+
+//! An immediate value that can be used with [V]PCMP[I|E]STR[I|M] instructions.
+enum class PCmpStrImm : uint8_t {
+  // Source Data Format
+  // ------------------
+
+  kUB            = 0x00u << 0,  //!< The source data format is unsigned bytes.
+  kUW            = 0x01u << 0,  //!< The source data format is unsigned words.
+  kSB            = 0x02u << 0,  //!< The source data format is signed bytes.
+  kSW            = 0x03u << 0,  //!< The source data format is signed words.
+
+  // Aggregation Operation
+  // ---------------------
+
+  kEqualAny      = 0x00u << 2,  //!< The arithmetic comparison is "equal".
+  kRanges        = 0x01u << 2,  //!< The arithmetic comparison is "greater than or equal" between even indexed
+                                //!< elements and "less than or equal" between odd indexed elements.
+  kEqualEach     = 0x02u << 2,  //!< The arithmetic comparison is "equal".
+  kEqualOrdered  = 0x03u << 2,  //!< The arithmetic comparison is "equal".
+
+  // Polarity
+  // --------
+
+  kPosPolarity   = 0x00u << 4,  //!< IntRes2 = IntRes1.
+  kNegPolarity   = 0x01u << 4,  //!< IntRes2 = -1 XOR IntRes1.
+  kPosMasked     = 0x02u << 4,  //!< IntRes2 = IntRes1.
+  kNegMasked     = 0x03u << 4,  //!< IntRes2[i] = second[i] == invalid ? IntRes1[i] : ~IntRes1[i].
+
+  // Output Selection (pcmpstri)
+  // ---------------------------
+
+  kOutputLSI     = 0x00u << 6,  //!< The index returned to ECX is of the least significant set bit in IntRes2.
+  kOutputMSI     = 0x01u << 6,  //!< The index returned to ECX is of the most significant set bit in IntRes2.
+
+  // Output Selection (pcmpstrm)
+  // ---------------------------
+
+  kBitMask       = 0x00u << 6,  //!< IntRes2 is returned as the mask to the least significant bits of XMM0.
+  kIndexMask     = 0x01u << 6   //!< IntRes2 is expanded into a byte/word mask and placed in XMM0.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(PCmpStrImm)
+
+//! An immediate value that can be used with ROUND[PD|PS|SD|SS] instructions.
+//!
+//! \note `kSuppress` is a mask that can be used with any other value.
+enum class RoundImm : uint8_t {
+  kNearest       = 0x00u,       //!< Round to nearest (even).
+  kDown          = 0x01u,       //!< Round to down toward -INF (floor),
+  kUp            = 0x02u,       //!< Round to up toward +INF (ceil).
+  kTrunc         = 0x03u,       //!< Round toward zero (truncate).
+  kCurrent       = 0x04u,       //!< Round to the current rounding mode set (ignores other RC bits).
+  kSuppress      = 0x08u        //!< Supress exceptions (avoids inexact exception, if set).
+};
+ASMJIT_DEFINE_ENUM_FLAGS(RoundImm)
+
+//! An immediate value that can be used with VCMP[PD|PS|SD|SS] instructions (AVX).
+//!
+//! The first 8 values are compatible with \ref CmpImm.
+enum class VCmpImm : uint8_t {
+  kEQ_OQ         = 0x00u,       //!< Equal             (Quiet    , Ordered)  , same as \ref CmpImm::kEQ.
+  kLT_OS         = 0x01u,       //!< Less              (Signaling, Ordered)  , same as \ref CmpImm::kLT.
+  kLE_OS         = 0x02u,       //!< Less/Equal        (Signaling, Ordered)  , same as \ref CmpImm::kLE.
+  kUNORD_Q       = 0x03u,       //!< Unordered         (Quiet)               , same as \ref CmpImm::kUNORD.
+  kNEQ_UQ        = 0x04u,       //!< Not Equal         (Quiet    , Unordered), same as \ref CmpImm::kNEQ.
+  kNLT_US        = 0x05u,       //!< Not Less          (Signaling, Unordered), same as \ref CmpImm::kNLT.
+  kNLE_US        = 0x06u,       //!< Not Less/Equal    (Signaling, Unordered), same as \ref CmpImm::kNLE.
+  kORD_Q         = 0x07u,       //!< Ordered           (Quiet)               , same as \ref CmpImm::kORD.
+  kEQ_UQ         = 0x08u,       //!< Equal             (Quiet    , Unordered).
+  kNGE_US        = 0x09u,       //!< Not Greater/Equal (Signaling, Unordered).
+  kNGT_US        = 0x0Au,       //!< Not Greater       (Signaling, Unordered).
+  kFALSE_OQ      = 0x0Bu,       //!< False             (Quiet    , Ordered).
+  kNEQ_OQ        = 0x0Cu,       //!< Not Equal         (Quiet    , Ordered).
+  kGE_OS         = 0x0Du,       //!< Greater/Equal     (Signaling, Ordered).
+  kGT_OS         = 0x0Eu,       //!< Greater           (Signaling, Ordered).
+  kTRUE_UQ       = 0x0Fu,       //!< True              (Quiet    , Unordered).
+  kEQ_OS         = 0x10u,       //!< Equal             (Signaling, Ordered).
+  kLT_OQ         = 0x11u,       //!< Less              (Quiet    , Ordered).
+  kLE_OQ         = 0x12u,       //!< Less/Equal        (Quiet    , Ordered).
+  kUNORD_S       = 0x13u,       //!< Unordered         (Signaling).
+  kNEQ_US        = 0x14u,       //!< Not Equal         (Signaling, Unordered).
+  kNLT_UQ        = 0x15u,       //!< Not Less          (Quiet    , Unordered).
+  kNLE_UQ        = 0x16u,       //!< Not Less/Equal    (Quiet    , Unordered).
+  kORD_S         = 0x17u,       //!< Ordered           (Signaling).
+  kEQ_US         = 0x18u,       //!< Equal             (Signaling, Unordered).
+  kNGE_UQ        = 0x19u,       //!< Not Greater/Equal (Quiet    , Unordered).
+  kNGT_UQ        = 0x1Au,       //!< Not Greater       (Quiet    , Unordered).
+  kFALSE_OS      = 0x1Bu,       //!< False             (Signaling, Ordered).
+  kNEQ_OS        = 0x1Cu,       //!< Not Equal         (Signaling, Ordered).
+  kGE_OQ         = 0x1Du,       //!< Greater/Equal     (Quiet    , Ordered).
+  kGT_OQ         = 0x1Eu,       //!< Greater           (Quiet    , Ordered).
+  kTRUE_US       = 0x1Fu        //!< True              (Signaling, Unordered).
+};
+
+//! An immediate value that can be used with VFIXUPIMM[PD|PS|SD|SS] instructions (AVX-512).
+//!
+//! The final immediate is a combination of all possible control bits.
+enum class VFixupImm : uint8_t {
+  kNone          = 0x00u,
+  kZEOnZero      = 0x01u,
+  kIEOnZero      = 0x02u,
+  kZEOnOne       = 0x04u,
+  kIEOnOne       = 0x08u,
+  kIEOnSNaN      = 0x10u,
+  kIEOnNInf      = 0x20u,
+  kIEOnNegative  = 0x40u,
+  kIEOnPInf      = 0x80u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(VFixupImm)
+
+//! An immediate value that can be used with VFPCLASS[PD|PS|SD|SS] instructions (AVX-512).
+//!
+//! The values can be combined together to form the final 8-bit mask.
+enum class VFPClassImm : uint8_t {
+  kNone          = 0x00u,
+  kQNaN          = 0x01u,       //!< Checks for QNaN.
+  kPZero         = 0x02u,       //!< Checks for +0.
+  kNZero         = 0x04u,       //!< Checks for -0.
+  kPInf          = 0x08u,       //!< Checks for +Inf.
+  kNInf          = 0x10u,       //!< Checks for -Inf.
+  kDenormal      = 0x20u,       //!< Checks for denormal.
+  kNegative      = 0x40u,       //!< Checks for negative finite value.
+  kSNaN          = 0x80u        //!< Checks for SNaN.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(VFPClassImm)
+
+//! An immediate value that can be used with VGETMANT[PD|PS|SD|SS] instructions (AVX-512).
+//!
+//! The value is a combination of a normalization interval and a sign control.
+enum class VGetMantImm : uint8_t {
+  // Normalization Interval
+  // ----------------------
+
+  k1To2          = 0x00u,       //!< Normalization interval is [1, 2)
+  k1Div2To2      = 0x01u,       //!< Normalization interval is [0.5, 2)
+  k1Div2To1      = 0x02u,       //!< Normalization interval is [0.5, 1)
+  k3Div4To3Div2  = 0x03u,       //!< Normalization interval is [3/4, 3/2)
+
+  // Sign Control
+  // ------------
+
+  kSrcSign       = 0x00u,       //!< Source sign.
+  kNoSign        = 0x04u,       //!< Zero sign
+  kQNaNIfSign    = 0x08u        //!< QNAN_Indefinite if sign(src) != 0, regardless of `kSignSrc` or `kNoSign`.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(VGetMantImm)
+
+//! A predicate used by VPCMP[U][B|W|D|Q] instructions (AVX-512).
+enum class VPCmpImm : uint8_t {
+  kEQ            = 0x00u,       //!< Equal.
+  kLT            = 0x01u,       //!< Less.
+  kLE            = 0x02u,       //!< Less/Equal.
+  kFALSE         = 0x03u,       //!< False.
+  kNE            = 0x04u,       //!< Not Equal.
+  kGE            = 0x05u,       //!< Greater/Equal.
+  kGT            = 0x06u,       //!< Greater.
+  kTRUE          = 0x07u        //!< True.
+};
+
+//! A predicate used by VPCOM[U][B|W|D|Q] instructions (XOP).
+enum class VPComImm : uint8_t {
+  kLT            = 0x00u,       //!< Less.
+  kLE            = 0x01u,       //!< Less/Equal
+  kGT            = 0x02u,       //!< Greater.
+  kGE            = 0x03u,       //!< Greater/Equal.
+  kEQ            = 0x04u,       //!< Equal.
+  kNE            = 0x05u,       //!< Not Equal.
+  kFALSE         = 0x06u,       //!< False.
+  kTRUE          = 0x07u        //!< True.
+};
+
+//! A predicate used by VRANGE[PD|PS|SD|SS] instructions (AVX-512).
+enum class VRangeImm : uint8_t {
+  // Selector
+  // --------
+
+  kSelectMin     = 0x00u,       //!< Select minimum value.
+  kSelectMax     = 0x01u,       //!< Select maximum value.
+  kSelectAbsMin  = 0x02u,       //!< Select minimum absolute value.
+  kSelectAbsMax  = 0x03u,       //!< Select maximum absolute value.
+
+  // Sign
+  // ----
+
+  kSignSrc1      = 0x00u,       //!< Select sign of SRC1.
+  kSignSrc2      = 0x04u,       //!< Select sign of SRC2.
+  kSign0         = 0x08u,       //!< Set sign to 0.
+  kSign1         = 0x0Cu        //!< Set sign to 1.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(VRangeImm)
+
+//! A predicate used by VREDUCE[PD|PS|SD|SS] instructions (AVX-512).
+enum class VReduceImm : uint8_t {
+  kRoundEven     = 0x00u,       //!< Round to nearest even.
+  kRoundDown     = 0x01u,       //!< Round down.
+  kRoundUp       = 0x02u,       //!< Round up.
+  kRoundTrunc    = 0x03u,       //!< Truncate.
+  kRoundCurrent  = 0x04u,       //!< Round to the current mode set.
+  kSuppress      = 0x08u,       //!< Suppress exceptions.
+  kFixedImmMask  = 0xF0u        //!< Fixed length value mask.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(VReduceImm)
+
+//! Creates a \ref VReduceImm from a combination of `flags` and `fixedPointLength`.
+static inline constexpr VReduceImm vReduceImm(VReduceImm flags, uint32_t fixedPointLength) noexcept {
+  return flags | VReduceImm(fixedPointLength << 4);
+}
+
+//! A predicate that can be used as an immediate value with VPTERNLOG[D|Q] instruction.
+//!
+//! There are 3 inputs to the instruction (\ref kA, \ref kB, \ref kC). Ternary logic can define any combination
+//! that would be performed on these 3 inputs to get the desired output - any combination of AND, OR, XOR, NOT
+//! is possible.
+//!
+//! \sa \ref tLogFromBits and \ref fLogIfElse
+enum class TLogImm : uint8_t {
+  k0             = 0x00u,       //!< 0 value.
+  k1             = 0xFFu,       //!< 1 value.
+  kA             = 0xF0u,       //!< A value.
+  kB             = 0xCCu,       //!< B value.
+  kC             = 0xAAu,       //!< C value.
+
+  kNotA          = kA ^ k1,     //!< `!A` expression.
+  kNotB          = kB ^ k1,     //!< `!B` expression.
+  kNotC          = kC ^ k1,     //!< `!C` expression.
+
+  kAB            = kA & kB,     //!< `A & B` expression.
+  kAC            = kA & kC,     //!< `A & C` expression.
+  kBC            = kB & kC,     //!< `B & C` expression.
+  kNotAB         = kAB ^ k1,    //!< `!(A & B)` expression.
+  kNotAC         = kAC ^ k1,    //!< `!(A & C)` expression.
+  kNotBC         = kBC ^ k1,    //!< `!(B & C)` expression.
+
+  kABC           = kAB & kC,    //!< `A & B & C` expression.
+  kNotABC        = kABC ^ k1    //!< `!(A & B & C)` expression.
+};
+ASMJIT_DEFINE_ENUM_FLAGS(TLogImm)
+
+//! Creates an immediate that can be used by VPTERNLOG[D|Q] instructions.
+static inline constexpr TLogImm tLogFromBits(uint8_t b000, uint8_t b001, uint8_t b010, uint8_t b011, uint8_t b100, uint8_t b101, uint8_t b110, uint8_t b111) noexcept {
+  return TLogImm(uint8_t(b000 << 0) |
+                 uint8_t(b001 << 1) |
+                 uint8_t(b010 << 2) |
+                 uint8_t(b011 << 3) |
+                 uint8_t(b100 << 4) |
+                 uint8_t(b101 << 5) |
+                 uint8_t(b110 << 6) |
+                 uint8_t(b111 << 7));
+}
+
+//! Creates an if/else logic that can be used by VPTERNLOG[D|Q] instructions.
+static inline constexpr TLogImm fLogIfElse(TLogImm condition, TLogImm a, TLogImm b) noexcept { return (condition & a) | (~condition & b); }
+
+//! Creates a shuffle immediate value that be used with SSE/AVX/AVX-512 instructions to shuffle 2 elements in a vector.
+//!
+//! \param a Position of the first  component [0, 1].
+//! \param b Position of the second component [0, 1].
+//!
+//! Shuffle constants can be used to encode an immediate for these instructions:
+//!   - `shufpd|vshufpd`
+static inline constexpr uint32_t shuffleImm(uint32_t a, uint32_t b) noexcept {
+  return (a << 1) | b;
+}
+
+//! Creates a shuffle immediate value that be used with SSE/AVX/AVX-512 instructions to shuffle 4 elements in a vector.
+//!
+//! \param a Position of the first  component [0, 3].
+//! \param b Position of the second component [0, 3].
+//! \param c Position of the third  component [0, 3].
+//! \param d Position of the fourth component [0, 3].
+//!
+//! Shuffle constants can be used to encode an immediate for these instructions:
+//!   - `pshufw`
+//!   - `pshuflw|vpshuflw`
+//!   - `pshufhw|vpshufhw`
+//!   - `pshufd|vpshufd`
+//!   - `shufps|vshufps`
+static inline constexpr uint32_t shuffleImm(uint32_t a, uint32_t b, uint32_t c, uint32_t d) noexcept {
+  return (a << 6) | (b << 4) | (c << 2) | d;
+}
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86GLOBALS_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86instapi.cpp b/lib/lepton/asmjit/x86/x86instapi.cpp
new file mode 100644
index 0000000000..3857cc1212
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86instapi.cpp
@@ -0,0 +1,1732 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+// ----------------------------------------------------------------------------
+// IMPORTANT: AsmJit now uses an external instruction database to populate
+// static tables within this file. Perform the following steps to regenerate
+// all tables enclosed by ${...}:
+//
+//   1. Install node.js environment <https://nodejs.org>
+//   2. Go to asmjit/tools directory
+//   3. Get the latest asmdb from <https://github.com/asmjit/asmdb> and
+//      copy/link the `asmdb` directory to `asmjit/tools/asmdb`.
+//   4. Execute `node tablegen-x86.js`
+//
+// Instruction encoding and opcodes were added to the `x86inst.cpp` database
+// manually in the past and they are not updated by the script as it became
+// tricky. However, everything else is updated including instruction operands
+// and tables required to validate them, instruction read/write information
+// (including registers and flags), and all indexes to all tables.
+// ----------------------------------------------------------------------------
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../core/cpuinfo.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../x86/x86instapi_p.h"
+#include "../x86/x86instdb_p.h"
+#include "../x86/x86opcode_p.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::InstInternal - Text
+// ========================
+
+#ifndef ASMJIT_NO_TEXT
+Error InstInternal::instIdToString(Arch arch, InstId instId, String& output) noexcept {
+  DebugUtils::unused(arch);
+
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(instId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  const InstDB::InstInfo& info = InstDB::infoById(instId);
+  return output.append(InstDB::_nameData + info._nameDataIndex);
+}
+
+InstId InstInternal::stringToInstId(Arch arch, const char* s, size_t len) noexcept {
+  DebugUtils::unused(arch);
+
+  if (ASMJIT_UNLIKELY(!s))
+    return Inst::kIdNone;
+
+  if (len == SIZE_MAX)
+    len = strlen(s);
+
+  if (ASMJIT_UNLIKELY(len == 0 || len > InstDB::kMaxNameSize))
+    return Inst::kIdNone;
+
+  uint32_t prefix = uint32_t(s[0]) - 'a';
+  if (ASMJIT_UNLIKELY(prefix > 'z' - 'a'))
+    return Inst::kIdNone;
+
+  uint32_t index = InstDB::instNameIndex[prefix].start;
+  if (ASMJIT_UNLIKELY(!index))
+    return Inst::kIdNone;
+
+  const char* nameData = InstDB::_nameData;
+  const InstDB::InstInfo* table = InstDB::_instInfoTable;
+
+  const InstDB::InstInfo* base = table + index;
+  const InstDB::InstInfo* end  = table + InstDB::instNameIndex[prefix].end;
+
+  for (size_t lim = (size_t)(end - base); lim != 0; lim >>= 1) {
+    const InstDB::InstInfo* cur = base + (lim >> 1);
+    int result = Support::cmpInstName(nameData + cur[0]._nameDataIndex, s, len);
+
+    if (result < 0) {
+      base = cur + 1;
+      lim--;
+      continue;
+    }
+
+    if (result > 0)
+      continue;
+
+    return InstId((size_t)(cur - table));
+  }
+
+  return Inst::kIdNone;
+}
+#endif // !ASMJIT_NO_TEXT
+
+// x86::InstInternal - Validate
+// ============================
+
+#ifndef ASMJIT_NO_VALIDATION
+struct X86ValidationData {
+  //! Allowed registers by \ref RegType.
+  RegMask allowedRegMask[uint32_t(RegType::kMaxValue) + 1];
+  uint32_t allowedMemBaseRegs;
+  uint32_t allowedMemIndexRegs;
+};
+
+#define VALUE(x) \
+  (x == uint32_t(RegType::kX86_GpbLo)) ? InstDB::OpFlags::kRegGpbLo : \
+  (x == uint32_t(RegType::kX86_GpbHi)) ? InstDB::OpFlags::kRegGpbHi : \
+  (x == uint32_t(RegType::kX86_Gpw  )) ? InstDB::OpFlags::kRegGpw   : \
+  (x == uint32_t(RegType::kX86_Gpd  )) ? InstDB::OpFlags::kRegGpd   : \
+  (x == uint32_t(RegType::kX86_Gpq  )) ? InstDB::OpFlags::kRegGpq   : \
+  (x == uint32_t(RegType::kX86_Xmm  )) ? InstDB::OpFlags::kRegXmm   : \
+  (x == uint32_t(RegType::kX86_Ymm  )) ? InstDB::OpFlags::kRegYmm   : \
+  (x == uint32_t(RegType::kX86_Zmm  )) ? InstDB::OpFlags::kRegZmm   : \
+  (x == uint32_t(RegType::kX86_Mm   )) ? InstDB::OpFlags::kRegMm    : \
+  (x == uint32_t(RegType::kX86_KReg )) ? InstDB::OpFlags::kRegKReg  : \
+  (x == uint32_t(RegType::kX86_SReg )) ? InstDB::OpFlags::kRegSReg  : \
+  (x == uint32_t(RegType::kX86_CReg )) ? InstDB::OpFlags::kRegCReg  : \
+  (x == uint32_t(RegType::kX86_DReg )) ? InstDB::OpFlags::kRegDReg  : \
+  (x == uint32_t(RegType::kX86_St   )) ? InstDB::OpFlags::kRegSt    : \
+  (x == uint32_t(RegType::kX86_Bnd  )) ? InstDB::OpFlags::kRegBnd   : \
+  (x == uint32_t(RegType::kX86_Tmm  )) ? InstDB::OpFlags::kRegTmm   : \
+  (x == uint32_t(RegType::kX86_Rip  )) ? InstDB::OpFlags::kNone     : InstDB::OpFlags::kNone
+static const InstDB::OpFlags _x86OpFlagFromRegType[uint32_t(RegType::kMaxValue) + 1] = { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) };
+#undef VALUE
+
+#define REG_MASK_FROM_REG_TYPE_X86(x) \
+  (x == uint32_t(RegType::kX86_GpbLo)) ? 0x0000000Fu : \
+  (x == uint32_t(RegType::kX86_GpbHi)) ? 0x0000000Fu : \
+  (x == uint32_t(RegType::kX86_Gpw  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Gpd  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Gpq  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Xmm  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Ymm  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Zmm  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Mm   )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_KReg )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_SReg )) ? 0x0000007Eu : \
+  (x == uint32_t(RegType::kX86_CReg )) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_DReg )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_St   )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Bnd  )) ? 0x0000000Fu : \
+  (x == uint32_t(RegType::kX86_Tmm  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Rip  )) ? 0x00000001u : 0u
+
+#define REG_MASK_FROM_REG_TYPE_X64(x) \
+  (x == uint32_t(RegType::kX86_GpbLo)) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_GpbHi)) ? 0x0000000Fu : \
+  (x == uint32_t(RegType::kX86_Gpw  )) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_Gpd  )) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_Gpq  )) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_Xmm  )) ? 0xFFFFFFFFu : \
+  (x == uint32_t(RegType::kX86_Ymm  )) ? 0xFFFFFFFFu : \
+  (x == uint32_t(RegType::kX86_Zmm  )) ? 0xFFFFFFFFu : \
+  (x == uint32_t(RegType::kX86_Mm   )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_KReg )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_SReg )) ? 0x0000007Eu : \
+  (x == uint32_t(RegType::kX86_CReg )) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_DReg )) ? 0x0000FFFFu : \
+  (x == uint32_t(RegType::kX86_St   )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Bnd  )) ? 0x0000000Fu : \
+  (x == uint32_t(RegType::kX86_Tmm  )) ? 0x000000FFu : \
+  (x == uint32_t(RegType::kX86_Rip  )) ? 0x00000001u : 0u
+
+#define B(RegType) (uint32_t(1) << uint32_t(RegType))
+
+static const X86ValidationData _x86ValidationData = {
+  { ASMJIT_LOOKUP_TABLE_32(REG_MASK_FROM_REG_TYPE_X86, 0) },
+  B(RegType::kX86_Gpw) | B(RegType::kX86_Gpd) | B(RegType::kX86_Rip) | B(RegType::kLabelTag),
+  B(RegType::kX86_Gpw) | B(RegType::kX86_Gpd) | B(RegType::kX86_Xmm) | B(RegType::kX86_Ymm) | B(RegType::kX86_Zmm)
+};
+
+static const X86ValidationData _x64ValidationData = {
+  { ASMJIT_LOOKUP_TABLE_32(REG_MASK_FROM_REG_TYPE_X64, 0) },
+  B(RegType::kX86_Gpd) | B(RegType::kX86_Gpq) | B(RegType::kX86_Rip) | B(RegType::kLabelTag),
+  B(RegType::kX86_Gpd) | B(RegType::kX86_Gpq) | B(RegType::kX86_Xmm) | B(RegType::kX86_Ymm) | B(RegType::kX86_Zmm)
+};
+
+#undef B
+
+#undef REG_MASK_FROM_REG_TYPE_X64
+#undef REG_MASK_FROM_REG_TYPE_X86
+
+static ASMJIT_FORCE_INLINE bool x86IsZmmOrM512(const Operand_& op) noexcept {
+  return Reg::isZmm(op) || (op.isMem() && op.size() == 64);
+}
+
+static ASMJIT_FORCE_INLINE bool x86CheckOSig(const InstDB::OpSignature& op, const InstDB::OpSignature& ref, bool& immOutOfRange) noexcept {
+  // Fail if operand types are incompatible.
+  InstDB::OpFlags commonFlags = op.flags() & ref.flags();
+
+  if (!Support::test(commonFlags, InstDB::OpFlags::kOpMask)) {
+    // Mark temporarily `immOutOfRange` so we can return a more descriptive error later.
+    if (op.hasImm() && ref.hasImm()) {
+      immOutOfRange = true;
+      return true;
+    }
+
+    return false;
+  }
+
+  // Fail if some memory specific flags do not match.
+  if (Support::test(commonFlags, InstDB::OpFlags::kMemMask)) {
+    if (ref.hasFlag(InstDB::OpFlags::kFlagMemBase) && !op.hasFlag(InstDB::OpFlags::kFlagMemBase))
+      return false;
+  }
+
+  // Fail if register indexes do not match.
+  if (Support::test(commonFlags, InstDB::OpFlags::kRegMask)) {
+    if (ref.regMask() && !Support::test(op.regMask(), ref.regMask()))
+      return false;
+  }
+
+  return true;
+}
+
+ASMJIT_FAVOR_SIZE Error InstInternal::validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept {
+  // Only called when `arch` matches X86 family.
+  ASMJIT_ASSERT(Environment::isFamilyX86(arch));
+
+  const X86ValidationData* vd;
+  if (arch == Arch::kX86)
+    vd = &_x86ValidationData;
+  else
+    vd = &_x64ValidationData;
+
+  uint32_t i;
+  InstDB::Mode mode = InstDB::modeFromArch(arch);
+
+  // Get the instruction data.
+  InstId instId = inst.id();
+  InstOptions options = inst.options();
+
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(instId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
+  const InstDB::CommonInfo& commonInfo = instInfo.commonInfo();
+
+  InstDB::InstFlags iFlags = instInfo.flags();
+
+  constexpr InstOptions kRepAny = InstOptions::kX86_Rep | InstOptions::kX86_Repne;
+  constexpr InstOptions kXAcqXRel = InstOptions::kX86_XAcquire | InstOptions::kX86_XRelease;
+  constexpr InstOptions kAvx512Options = InstOptions::kX86_ZMask | InstOptions::kX86_ER | InstOptions::kX86_SAE;
+
+  // Validate LOCK|XACQUIRE|XRELEASE Prefixes
+  // ----------------------------------------
+
+  if (Support::test(options, InstOptions::kX86_Lock | kXAcqXRel)) {
+    if (Support::test(options, InstOptions::kX86_Lock)) {
+      if (ASMJIT_UNLIKELY(!Support::test(iFlags, InstDB::InstFlags::kLock) && !Support::test(options, kXAcqXRel)))
+        return DebugUtils::errored(kErrorInvalidLockPrefix);
+
+      if (ASMJIT_UNLIKELY(opCount < 1 || !operands[0].isMem()))
+        return DebugUtils::errored(kErrorInvalidLockPrefix);
+    }
+
+    if (Support::test(options, kXAcqXRel)) {
+      if (ASMJIT_UNLIKELY(!Support::test(options, InstOptions::kX86_Lock) || (options & kXAcqXRel) == kXAcqXRel))
+        return DebugUtils::errored(kErrorInvalidPrefixCombination);
+
+      if (ASMJIT_UNLIKELY(Support::test(options, InstOptions::kX86_XAcquire) && !Support::test(iFlags, InstDB::InstFlags::kXAcquire)))
+        return DebugUtils::errored(kErrorInvalidXAcquirePrefix);
+
+      if (ASMJIT_UNLIKELY(Support::test(options, InstOptions::kX86_XRelease) && !Support::test(iFlags, InstDB::InstFlags::kXRelease)))
+        return DebugUtils::errored(kErrorInvalidXReleasePrefix);
+    }
+  }
+
+  // Validate REP and REPNE Prefixes
+  // -------------------------------
+
+  if (Support::test(options, kRepAny)) {
+    if (ASMJIT_UNLIKELY((options & kRepAny) == kRepAny))
+      return DebugUtils::errored(kErrorInvalidPrefixCombination);
+
+    if (ASMJIT_UNLIKELY(!Support::test(iFlags, InstDB::InstFlags::kRep)))
+      return DebugUtils::errored(kErrorInvalidRepPrefix);
+  }
+
+  // Translate Each Operand to the Corresponding OpSignature
+  // -------------------------------------------------------
+
+  InstDB::OpSignature oSigTranslated[Globals::kMaxOpCount];
+  InstDB::OpFlags combinedOpFlags = InstDB::OpFlags::kNone;
+  uint32_t combinedRegMask = 0;
+  const Mem* memOp = nullptr;
+
+  for (i = 0; i < opCount; i++) {
+    const Operand_& op = operands[i];
+    if (op.opType() == OperandType::kNone)
+      break;
+
+    InstDB::OpFlags opFlags = InstDB::OpFlags::kNone;
+    RegMask regMask = 0;
+
+    switch (op.opType()) {
+      case OperandType::kReg: {
+        RegType regType = op.as<BaseReg>().type();
+        opFlags = _x86OpFlagFromRegType[size_t(regType)];
+
+        if (ASMJIT_UNLIKELY(opFlags == InstDB::OpFlags::kNone))
+          return DebugUtils::errored(kErrorInvalidRegType);
+
+        // If `regId` is equal or greater than Operand::kVirtIdMin it means that the register is virtual and its
+        // index will be assigned later by the register allocator. We must pass unless asked to disallow virtual
+        // registers.
+        uint32_t regId = op.id();
+        if (regId < Operand::kVirtIdMin) {
+          if (ASMJIT_UNLIKELY(regId >= 32))
+            return DebugUtils::errored(kErrorInvalidPhysId);
+
+          if (ASMJIT_UNLIKELY(Support::bitTest(vd->allowedRegMask[size_t(regType)], regId) == 0))
+            return DebugUtils::errored(kErrorInvalidPhysId);
+
+          regMask = Support::bitMask(regId);
+          combinedRegMask |= regMask;
+        }
+        else {
+          if (uint32_t(validationFlags & ValidationFlags::kEnableVirtRegs) == 0)
+            return DebugUtils::errored(kErrorIllegalVirtReg);
+          regMask = 0xFFFFFFFFu;
+        }
+        break;
+      }
+
+      // TODO: Validate base and index and combine these with `combinedRegMask`.
+      case OperandType::kMem: {
+        const Mem& m = op.as<Mem>();
+        memOp = &m;
+
+        uint32_t memSize = m.size();
+        RegType baseType = m.baseType();
+        RegType indexType = m.indexType();
+
+        if (m.segmentId() > 6)
+          return DebugUtils::errored(kErrorInvalidSegment);
+
+        // Validate AVX-512 broadcast {1tox}.
+        if (m.hasBroadcast()) {
+          if (memSize != 0) {
+            // If the size is specified it has to match the broadcast size.
+            if (ASMJIT_UNLIKELY(commonInfo.hasAvx512B32() && memSize != 4))
+              return DebugUtils::errored(kErrorInvalidBroadcast);
+
+            if (ASMJIT_UNLIKELY(commonInfo.hasAvx512B64() && memSize != 8))
+              return DebugUtils::errored(kErrorInvalidBroadcast);
+          }
+          else {
+            // If there is no size we implicitly calculate it so we can validate N in {1toN} properly.
+            memSize = commonInfo.hasAvx512B64() ? 8 :
+                      commonInfo.hasAvx512B32() ? 4 : 2;
+          }
+
+          memSize <<= uint32_t(m.getBroadcast());
+        }
+
+        if (baseType != RegType::kNone && baseType > RegType::kLabelTag) {
+          uint32_t baseId = m.baseId();
+
+          if (m.isRegHome()) {
+            // Home address of a virtual register. In such case we don't want to validate the type of the
+            // base register as it will always be patched to ESP|RSP.
+          }
+          else {
+            if (ASMJIT_UNLIKELY(!Support::bitTest(vd->allowedMemBaseRegs, baseType)))
+              return DebugUtils::errored(kErrorInvalidAddress);
+          }
+
+          // Create information that will be validated only if this is an implicit memory operand. Basically
+          // only usable for string instructions and other instructions where memory operand is implicit and
+          // has 'seg:[reg]' form.
+          if (baseId < Operand::kVirtIdMin) {
+            if (ASMJIT_UNLIKELY(baseId >= 32))
+              return DebugUtils::errored(kErrorInvalidPhysId);
+
+            // Physical base id.
+            regMask = Support::bitMask(baseId);
+            combinedRegMask |= regMask;
+          }
+          else {
+            // Virtual base id - fill the whole mask for implicit mem validation. The register is not assigned
+            // yet, so we cannot predict the phys id.
+            if (uint32_t(validationFlags & ValidationFlags::kEnableVirtRegs) == 0)
+              return DebugUtils::errored(kErrorIllegalVirtReg);
+            regMask = 0xFFFFFFFFu;
+          }
+
+          if (indexType == RegType::kNone && !m.offsetLo32())
+            opFlags |= InstDB::OpFlags::kFlagMemBase;
+        }
+        else if (baseType == RegType::kLabelTag) {
+          // [Label] - there is no need to validate the base as it's label.
+        }
+        else {
+          // Base is a 64-bit address.
+          int64_t offset = m.offset();
+          if (!Support::isInt32(offset)) {
+            if (mode == InstDB::Mode::kX86) {
+              // 32-bit mode: Make sure that the address is either `int32_t` or `uint32_t`.
+              if (!Support::isUInt32(offset))
+                return DebugUtils::errored(kErrorInvalidAddress64Bit);
+            }
+            else {
+              // 64-bit mode: Zero extension is allowed if the address has 32-bit index register or the address
+              // has no index register (it's still encodable).
+              if (indexType != RegType::kNone) {
+                if (!Support::isUInt32(offset))
+                  return DebugUtils::errored(kErrorInvalidAddress64Bit);
+
+                if (indexType != RegType::kX86_Gpd)
+                  return DebugUtils::errored(kErrorInvalidAddress64BitZeroExtension);
+              }
+              else {
+                // We don't validate absolute 64-bit addresses without an index register as this also depends
+                // on the target's base address. We don't have the information to do it at this moment.
+              }
+            }
+          }
+        }
+
+        if (indexType != RegType::kNone) {
+          if (ASMJIT_UNLIKELY(!Support::bitTest(vd->allowedMemIndexRegs, indexType)))
+            return DebugUtils::errored(kErrorInvalidAddress);
+
+          if (indexType == RegType::kX86_Xmm) {
+            opFlags |= InstDB::OpFlags::kVm32x | InstDB::OpFlags::kVm64x;
+          }
+          else if (indexType == RegType::kX86_Ymm) {
+            opFlags |= InstDB::OpFlags::kVm32y | InstDB::OpFlags::kVm64y;
+          }
+          else if (indexType == RegType::kX86_Zmm) {
+            opFlags |= InstDB::OpFlags::kVm32z | InstDB::OpFlags::kVm64z;
+          }
+          else {
+            if (baseType != RegType::kNone)
+              opFlags |= InstDB::OpFlags::kFlagMib;
+          }
+
+          // [RIP + {XMM|YMM|ZMM}] is not allowed.
+          if (baseType == RegType::kX86_Rip && Support::test(opFlags, InstDB::OpFlags::kVmMask))
+            return DebugUtils::errored(kErrorInvalidAddress);
+
+          uint32_t indexId = m.indexId();
+          if (indexId < Operand::kVirtIdMin) {
+            if (ASMJIT_UNLIKELY(indexId >= 32))
+              return DebugUtils::errored(kErrorInvalidPhysId);
+
+            combinedRegMask |= Support::bitMask(indexId);
+          }
+          else {
+            if (uint32_t(validationFlags & ValidationFlags::kEnableVirtRegs) == 0)
+              return DebugUtils::errored(kErrorIllegalVirtReg);
+          }
+
+          // Only used for implicit memory operands having 'seg:[reg]' form, so clear it.
+          regMask = 0;
+        }
+
+        switch (memSize) {
+          case  0: opFlags |= InstDB::OpFlags::kMemUnspecified; break;
+          case  1: opFlags |= InstDB::OpFlags::kMem8; break;
+          case  2: opFlags |= InstDB::OpFlags::kMem16; break;
+          case  4: opFlags |= InstDB::OpFlags::kMem32; break;
+          case  6: opFlags |= InstDB::OpFlags::kMem48; break;
+          case  8: opFlags |= InstDB::OpFlags::kMem64; break;
+          case 10: opFlags |= InstDB::OpFlags::kMem80; break;
+          case 16: opFlags |= InstDB::OpFlags::kMem128; break;
+          case 32: opFlags |= InstDB::OpFlags::kMem256; break;
+          case 64: opFlags |= InstDB::OpFlags::kMem512; break;
+
+          default:
+            return DebugUtils::errored(kErrorInvalidOperandSize);
+        }
+
+        break;
+      }
+
+      case OperandType::kImm: {
+        uint64_t immValue = op.as<Imm>().valueAs<uint64_t>();
+
+        if (int64_t(immValue) >= 0) {
+          if (immValue <= 0x7u)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32 |
+                      InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmU16 | InstDB::OpFlags::kImmI8  | InstDB::OpFlags::kImmU8  |
+                      InstDB::OpFlags::kImmI4  | InstDB::OpFlags::kImmU4  ;
+          else if (immValue <= 0xFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32 |
+                      InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmU16 | InstDB::OpFlags::kImmI8  | InstDB::OpFlags::kImmU8  |
+                      InstDB::OpFlags::kImmU4  ;
+          else if (immValue <= 0x7Fu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32 |
+                      InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmU16 | InstDB::OpFlags::kImmI8  | InstDB::OpFlags::kImmU8  ;
+          else if (immValue <= 0xFFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32 |
+                      InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmU16 | InstDB::OpFlags::kImmU8  ;
+          else if (immValue <= 0x7FFFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32 |
+                      InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmU16 ;
+          else if (immValue <= 0xFFFFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32 |
+                      InstDB::OpFlags::kImmU16 ;
+          else if (immValue <= 0x7FFFFFFFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmU32;
+          else if (immValue <= 0xFFFFFFFFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64 | InstDB::OpFlags::kImmU32;
+          else if (immValue <= 0x7FFFFFFFFFFFFFFFu)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmU64;
+          else
+            opFlags = InstDB::OpFlags::kImmU64;
+        }
+        else {
+          immValue = Support::neg(immValue);
+          if (immValue <= 0x8u)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmI8 | InstDB::OpFlags::kImmI4;
+          else if (immValue <= 0x80u)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmI16 | InstDB::OpFlags::kImmI8;
+          else if (immValue <= 0x8000u)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmI32 | InstDB::OpFlags::kImmI16;
+          else if (immValue <= 0x80000000u)
+            opFlags = InstDB::OpFlags::kImmI64 | InstDB::OpFlags::kImmI32;
+          else
+            opFlags = InstDB::OpFlags::kImmI64;
+        }
+        break;
+      }
+
+      case OperandType::kLabel: {
+        opFlags |= InstDB::OpFlags::kRel8 | InstDB::OpFlags::kRel32;
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidState);
+    }
+
+    InstDB::OpSignature& oSigDst = oSigTranslated[i];
+    oSigDst._flags = uint64_t(opFlags) & 0x00FFFFFFFFFFFFFFu;
+    oSigDst._regMask = uint8_t(regMask & 0xFFu);
+    combinedOpFlags |= opFlags;
+  }
+
+  // Decrease the number of operands of those that are none. This is important as Assembler and Compiler may just pass
+  // more operands padded with none (which means that no operand is given at that index). However, validate that there
+  // are no gaps (like [reg, none, reg] or [none, reg]).
+  if (i < opCount) {
+    while (--opCount > i)
+      if (ASMJIT_UNLIKELY(!operands[opCount].isNone()))
+        return DebugUtils::errored(kErrorInvalidInstruction);
+  }
+
+  // Validate X86 and X64 specific cases.
+  if (mode == InstDB::Mode::kX86) {
+    // Illegal use of 64-bit register in 32-bit mode.
+    if (ASMJIT_UNLIKELY(Support::test(combinedOpFlags, InstDB::OpFlags::kRegGpq)))
+      return DebugUtils::errored(kErrorInvalidUseOfGpq);
+  }
+  else {
+    // Illegal use of a high 8-bit register with REX prefix.
+    bool hasREX = inst.hasOption(InstOptions::kX86_Rex) || (combinedRegMask & 0xFFFFFF00u) != 0;
+    if (ASMJIT_UNLIKELY(hasREX && Support::test(combinedOpFlags, InstDB::OpFlags::kRegGpbHi)))
+      return DebugUtils::errored(kErrorInvalidUseOfGpbHi);
+  }
+
+  // Validate Instruction Signature by Comparing Against All `iSig` Rows
+  // -------------------------------------------------------------------
+
+  const InstDB::InstSignature* iSig = InstDB::_instSignatureTable + commonInfo._iSignatureIndex;
+  const InstDB::InstSignature* iEnd = iSig + commonInfo._iSignatureCount;
+
+  if (iSig != iEnd) {
+    const InstDB::OpSignature* opSignatureTable = InstDB::_opSignatureTable;
+
+    // If set it means that we matched a signature where only immediate value
+    // was out of bounds. We can return a more descriptive error if we know this.
+    bool globalImmOutOfRange = false;
+
+    do {
+      // Check if the architecture is compatible.
+      if (!iSig->supportsMode(mode))
+        continue;
+
+      // Compare the operands table with reference operands.
+      uint32_t j = 0;
+      uint32_t iSigCount = iSig->opCount();
+      bool localImmOutOfRange = false;
+
+      if (iSigCount == opCount) {
+        for (j = 0; j < opCount; j++)
+          if (!x86CheckOSig(oSigTranslated[j], iSig->opSignature(j), localImmOutOfRange))
+            break;
+      }
+      else if (iSigCount - iSig->implicitOpCount() == opCount) {
+        uint32_t r = 0;
+        for (j = 0; j < opCount && r < iSigCount; j++, r++) {
+          const InstDB::OpSignature* oChk = oSigTranslated + j;
+          const InstDB::OpSignature* oRef;
+Next:
+          oRef = opSignatureTable + iSig->opSignatureIndex(r);
+          // Skip implicit operands.
+          if (oRef->isImplicit()) {
+            if (++r >= iSigCount)
+              break;
+            else
+              goto Next;
+          }
+
+          if (!x86CheckOSig(*oChk, *oRef, localImmOutOfRange))
+            break;
+        }
+      }
+
+      if (j == opCount) {
+        if (!localImmOutOfRange) {
+          // Match, must clear possible `globalImmOutOfRange`.
+          globalImmOutOfRange = false;
+          break;
+        }
+        globalImmOutOfRange = localImmOutOfRange;
+      }
+    } while (++iSig != iEnd);
+
+    if (iSig == iEnd) {
+      if (globalImmOutOfRange)
+        return DebugUtils::errored(kErrorInvalidImmediate);
+      else
+        return DebugUtils::errored(kErrorInvalidInstruction);
+    }
+  }
+
+  // Validate AVX512 Options
+  // -----------------------
+
+  const RegOnly& extraReg = inst.extraReg();
+
+  if (Support::test(options, kAvx512Options)) {
+    if (commonInfo.hasFlag(InstDB::InstFlags::kEvex)) {
+      // Validate AVX-512 {z}.
+      if (Support::test(options, InstOptions::kX86_ZMask)) {
+        if (ASMJIT_UNLIKELY(Support::test(options, InstOptions::kX86_ZMask) && !commonInfo.hasAvx512Z()))
+          return DebugUtils::errored(kErrorInvalidKZeroUse);
+      }
+
+      // Validate AVX-512 {sae} and {er}.
+      if (Support::test(options, InstOptions::kX86_SAE | InstOptions::kX86_ER)) {
+        // Rounding control is impossible if the instruction is not reg-to-reg.
+        if (ASMJIT_UNLIKELY(memOp))
+          return DebugUtils::errored(kErrorInvalidEROrSAE);
+
+        // Check if {sae} or {er} is supported by the instruction.
+        if (Support::test(options, InstOptions::kX86_ER)) {
+          // NOTE: if both {sae} and {er} are set, we don't care, as {sae} is implied.
+          if (ASMJIT_UNLIKELY(!commonInfo.hasAvx512ER()))
+            return DebugUtils::errored(kErrorInvalidEROrSAE);
+        }
+        else {
+          if (ASMJIT_UNLIKELY(!commonInfo.hasAvx512SAE()))
+            return DebugUtils::errored(kErrorInvalidEROrSAE);
+        }
+
+        // {sae} and {er} are defined for either scalar ops or vector ops that require LL to be 10 (512-bit vector
+        // operations). We don't need any more bits in the instruction database to be able to validate this, as
+        // each AVX512 instruction that has broadcast is vector instruction (in this case we require zmm registers),
+        // otherwise it's a scalar instruction, which is valid.
+        if (commonInfo.hasAvx512B()) {
+          // Supports broadcast, thus we require LL to be '10', which means there have to be ZMM registers used. We
+          // don't calculate LL here, but we know that it would be '10' if there is at least one ZMM register used.
+
+          // There is no {er}/{sae}-enabled instruction with less than two operands.
+          ASMJIT_ASSERT(opCount >= 2);
+          if (ASMJIT_UNLIKELY(!x86IsZmmOrM512(operands[0]) && !x86IsZmmOrM512(operands[1])))
+            return DebugUtils::errored(kErrorInvalidEROrSAE);
+        }
+      }
+    }
+    else {
+      // Not an AVX512 instruction - maybe OpExtra is xCX register used by REP/REPNE prefix.
+      if (Support::test(options, kAvx512Options) || !Support::test(options, kRepAny))
+        return DebugUtils::errored(kErrorInvalidInstruction);
+    }
+  }
+
+  // Validate {Extra} Register
+  // -------------------------
+
+  if (extraReg.isReg()) {
+    if (Support::test(options, kRepAny)) {
+      // Validate REP|REPNE {cx|ecx|rcx}.
+      if (ASMJIT_UNLIKELY(Support::test(iFlags, InstDB::InstFlags::kRepIgnored)))
+        return DebugUtils::errored(kErrorInvalidExtraReg);
+
+      if (extraReg.isPhysReg()) {
+        if (ASMJIT_UNLIKELY(extraReg.id() != Gp::kIdCx))
+          return DebugUtils::errored(kErrorInvalidExtraReg);
+      }
+
+      // The type of the {...} register must match the type of the base register
+      // of memory operand. So if the memory operand uses 32-bit register the
+      // count register must also be 32-bit, etc...
+      if (ASMJIT_UNLIKELY(!memOp || extraReg.type() != memOp->baseType()))
+        return DebugUtils::errored(kErrorInvalidExtraReg);
+    }
+    else if (commonInfo.hasFlag(InstDB::InstFlags::kEvex)) {
+      // Validate AVX-512 {k}.
+      if (ASMJIT_UNLIKELY(extraReg.type() != RegType::kX86_KReg))
+        return DebugUtils::errored(kErrorInvalidExtraReg);
+
+      if (ASMJIT_UNLIKELY(extraReg.id() == 0 || !commonInfo.hasAvx512K()))
+        return DebugUtils::errored(kErrorInvalidKMaskUse);
+    }
+    else {
+      return DebugUtils::errored(kErrorInvalidExtraReg);
+    }
+  }
+
+  return kErrorOk;
+}
+#endif // !ASMJIT_NO_VALIDATION
+
+// x86::InstInternal - QueryRWInfo
+// ===============================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+static const Support::Array<uint64_t, uint32_t(RegGroup::kMaxValue) + 1> rwRegGroupByteMask = {{
+  0x00000000000000FFu, // GP.
+  0xFFFFFFFFFFFFFFFFu, // XMM|YMM|ZMM.
+  0x00000000000000FFu, // MM.
+  0x00000000000000FFu, // KReg.
+  0x0000000000000003u, // SReg.
+  0x00000000000000FFu, // CReg.
+  0x00000000000000FFu, // DReg.
+  0x00000000000003FFu, // St().
+  0x000000000000FFFFu, // BND.
+  0x00000000000000FFu  // RIP.
+}};
+
+static ASMJIT_FORCE_INLINE void rwZeroExtendGp(OpRWInfo& opRwInfo, const Gp& reg, uint32_t nativeGpSize) noexcept {
+  ASMJIT_ASSERT(BaseReg::isGp(reg.as<Operand>()));
+  if (reg.size() + 4 == nativeGpSize) {
+    opRwInfo.addOpFlags(OpRWFlags::kZExt);
+    opRwInfo.setExtendByteMask(~opRwInfo.writeByteMask() & 0xFFu);
+  }
+}
+
+static ASMJIT_FORCE_INLINE void rwZeroExtendAvxVec(OpRWInfo& opRwInfo, const Vec& reg) noexcept {
+  DebugUtils::unused(reg);
+
+  uint64_t msk = ~Support::fillTrailingBits(opRwInfo.writeByteMask());
+  if (msk) {
+    opRwInfo.addOpFlags(OpRWFlags::kZExt);
+    opRwInfo.setExtendByteMask(msk);
+  }
+}
+
+static ASMJIT_FORCE_INLINE void rwZeroExtendNonVec(OpRWInfo& opRwInfo, const Reg& reg) noexcept {
+  uint64_t msk = ~Support::fillTrailingBits(opRwInfo.writeByteMask()) & rwRegGroupByteMask[reg.group()];
+  if (msk) {
+    opRwInfo.addOpFlags(OpRWFlags::kZExt);
+    opRwInfo.setExtendByteMask(msk);
+  }
+}
+
+static ASMJIT_FORCE_INLINE Error rwHandleAVX512(const BaseInst& inst, const InstDB::CommonInfo& commonInfo, InstRWInfo* out) noexcept {
+  if (inst.hasExtraReg() && inst.extraReg().type() == RegType::kX86_KReg && out->opCount() > 0) {
+    // AVX-512 instruction that uses a destination with {k} register (zeroing vs masking).
+    out->_extraReg.addOpFlags(OpRWFlags::kRead);
+    out->_extraReg.setReadByteMask(0xFF);
+    if (!inst.hasOption(InstOptions::kX86_ZMask) && !commonInfo.hasAvx512Flag(InstDB::Avx512Flags::kImplicitZ)) {
+      out->_operands[0].addOpFlags(OpRWFlags::kRead);
+      out->_operands[0]._readByteMask |= out->_operands[0]._writeByteMask;
+    }
+  }
+
+  return kErrorOk;
+}
+
+static ASMJIT_FORCE_INLINE bool hasSameRegType(const BaseReg* regs, size_t opCount) noexcept {
+  ASMJIT_ASSERT(opCount > 0);
+  RegType regType = regs[0].type();
+  for (size_t i = 1; i < opCount; i++)
+    if (regs[i].type() != regType)
+      return false;
+  return true;
+}
+
+Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept {
+  // Only called when `arch` matches X86 family.
+  ASMJIT_ASSERT(Environment::isFamilyX86(arch));
+
+  // Get the instruction data.
+  InstId instId = inst.id();
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(instId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  // Read/Write flags.
+  const InstDB::InstInfo& instInfo = InstDB::_instInfoTable[instId];
+  const InstDB::CommonInfo& commonInfo = InstDB::_commonInfoTable[instInfo._commonInfoIndex];
+  const InstDB::AdditionalInfo& additionalInfo = InstDB::_additionalInfoTable[instInfo._additionalInfoIndex];
+  const InstDB::RWFlagsInfoTable& rwFlags = InstDB::_rwFlagsInfoTable[additionalInfo._rwFlagsIndex];
+
+  // There are two data tables, one for `opCount == 2` and the second for
+  // `opCount != 2`. There are two reasons for that:
+  //   - There are instructions that share the same name that have both 2 or 3 operands, which have different
+  //     RW information / semantics.
+  //   - There must be 2 tables otherwise the lookup index won't fit into 8 bits (there is more than 256 records
+  //     of combined rwInfo A and B).
+  const InstDB::RWInfo& instRwInfo = opCount == 2 ? InstDB::rwInfoA[InstDB::rwInfoIndexA[instId]]
+                                                  : InstDB::rwInfoB[InstDB::rwInfoIndexB[instId]];
+  const InstDB::RWInfoRm& instRmInfo = InstDB::rwInfoRm[instRwInfo.rmInfo];
+
+  out->_instFlags = InstDB::_instFlagsTable[additionalInfo._instFlagsIndex];
+  out->_opCount = uint8_t(opCount);
+  out->_rmFeature = instRmInfo.rmFeature;
+  out->_extraReg.reset();
+  out->_readFlags = CpuRWFlags(rwFlags.readFlags);
+  out->_writeFlags = CpuRWFlags(rwFlags.writeFlags);
+
+  uint32_t opTypeMask = 0u;
+  uint32_t nativeGpSize = Environment::registerSizeFromArch(arch);
+
+  constexpr OpRWFlags R = OpRWFlags::kRead;
+  constexpr OpRWFlags W = OpRWFlags::kWrite;
+  constexpr OpRWFlags X = OpRWFlags::kRW;
+  constexpr OpRWFlags RegM = OpRWFlags::kRegMem;
+  constexpr OpRWFlags RegPhys = OpRWFlags::kRegPhysId;
+  constexpr OpRWFlags MibRead = OpRWFlags::kMemBaseRead | OpRWFlags::kMemIndexRead;
+
+  if (instRwInfo.category == InstDB::RWInfo::kCategoryGeneric) {
+    uint32_t i;
+    uint32_t rmOpsMask = 0;
+    uint32_t rmMaxSize = 0;
+
+    for (i = 0; i < opCount; i++) {
+      OpRWInfo& op = out->_operands[i];
+      const Operand_& srcOp = operands[i];
+      const InstDB::RWInfoOp& rwOpData = InstDB::rwInfoOp[instRwInfo.opInfoIndex[i]];
+
+      opTypeMask |= Support::bitMask(srcOp.opType());
+
+      if (!srcOp.isRegOrMem()) {
+        op.reset();
+        continue;
+      }
+
+      op._opFlags = rwOpData.flags & ~OpRWFlags::kZExt;
+      op._physId = rwOpData.physId;
+      op._rmSize = 0;
+      op._resetReserved();
+
+      uint64_t rByteMask = rwOpData.rByteMask;
+      uint64_t wByteMask = rwOpData.wByteMask;
+
+      if (op.isRead()  && !rByteMask) rByteMask = Support::lsbMask<uint64_t>(srcOp.size());
+      if (op.isWrite() && !wByteMask) wByteMask = Support::lsbMask<uint64_t>(srcOp.size());
+
+      op._readByteMask = rByteMask;
+      op._writeByteMask = wByteMask;
+      op._extendByteMask = 0;
+      op._consecutiveLeadCount = rwOpData.consecutiveLeadCount;
+
+      if (srcOp.isReg()) {
+        // Zero extension.
+        if (op.isWrite()) {
+          if (srcOp.as<Reg>().isGp()) {
+            // GP registers on X64 are special:
+            //   - 8-bit and 16-bit writes aren't zero extended.
+            //   - 32-bit writes ARE zero extended.
+            rwZeroExtendGp(op, srcOp.as<Gp>(), nativeGpSize);
+          }
+          else if (Support::test(rwOpData.flags, OpRWFlags::kZExt)) {
+            // Otherwise follow ZExt.
+            rwZeroExtendNonVec(op, srcOp.as<Gp>());
+          }
+        }
+
+        // Aggregate values required to calculate valid Reg/M info.
+        rmMaxSize  = Support::max(rmMaxSize, srcOp.size());
+        rmOpsMask |= Support::bitMask<uint32_t>(i);
+      }
+      else {
+        const x86::Mem& memOp = srcOp.as<x86::Mem>();
+        // The RW flags of BASE+INDEX are either provided by the data, which means
+        // that the instruction is border-case, or they are deduced from the operand.
+        if (memOp.hasBaseReg() && !op.hasOpFlag(OpRWFlags::kMemBaseRW))
+          op.addOpFlags(OpRWFlags::kMemBaseRead);
+        if (memOp.hasIndexReg() && !op.hasOpFlag(OpRWFlags::kMemIndexRW))
+          op.addOpFlags(OpRWFlags::kMemIndexRead);
+      }
+    }
+
+    // Only keep kMovOp if the instruction is actually register to register move of the same kind.
+    if (out->hasInstFlag(InstRWFlags::kMovOp)) {
+      if (!(opCount >= 2 && opTypeMask == Support::bitMask(OperandType::kReg) && hasSameRegType(reinterpret_cast<const BaseReg*>(operands), opCount)))
+        out->_instFlags &= ~InstRWFlags::kMovOp;
+    }
+
+    // Special cases require more logic.
+    if (instRmInfo.flags & (InstDB::RWInfoRm::kFlagMovssMovsd | InstDB::RWInfoRm::kFlagPextrw | InstDB::RWInfoRm::kFlagFeatureIfRMI)) {
+      if (instRmInfo.flags & InstDB::RWInfoRm::kFlagMovssMovsd) {
+        if (opCount == 2) {
+          if (operands[0].isReg() && operands[1].isReg()) {
+            // Doesn't zero extend the destination.
+            out->_operands[0]._extendByteMask = 0;
+          }
+        }
+      }
+      else if (instRmInfo.flags & InstDB::RWInfoRm::kFlagPextrw) {
+        if (opCount == 3 && Reg::isMm(operands[1])) {
+          out->_rmFeature = 0;
+          rmOpsMask = 0;
+        }
+      }
+      else if (instRmInfo.flags & InstDB::RWInfoRm::kFlagFeatureIfRMI) {
+        if (opCount != 3 || !operands[2].isImm()) {
+          out->_rmFeature = 0;
+        }
+      }
+    }
+
+    rmOpsMask &= instRmInfo.rmOpsMask;
+    if (rmOpsMask) {
+      Support::BitWordIterator<uint32_t> it(rmOpsMask);
+      do {
+        i = it.next();
+
+        OpRWInfo& op = out->_operands[i];
+        op.addOpFlags(RegM);
+
+        switch (instRmInfo.category) {
+          case InstDB::RWInfoRm::kCategoryFixed:
+            op.setRmSize(instRmInfo.fixedSize);
+            break;
+          case InstDB::RWInfoRm::kCategoryConsistent:
+            op.setRmSize(operands[i].size());
+            break;
+          case InstDB::RWInfoRm::kCategoryHalf:
+            op.setRmSize(rmMaxSize / 2u);
+            break;
+          case InstDB::RWInfoRm::kCategoryQuarter:
+            op.setRmSize(rmMaxSize / 4u);
+            break;
+          case InstDB::RWInfoRm::kCategoryEighth:
+            op.setRmSize(rmMaxSize / 8u);
+            break;
+        }
+      } while (it.hasNext());
+    }
+
+    return rwHandleAVX512(inst, commonInfo, out);
+  }
+
+  switch (instRwInfo.category) {
+    case InstDB::RWInfo::kCategoryMov: {
+      // Special case for 'mov' instruction. Here there are some variants that we have to handle as 'mov' can be
+      // used to move between GP, segment, control and debug registers. Moving between GP registers also allow to
+      // use memory operand.
+
+      // We will again set the flag if it's actually a move from GP to GP register, otherwise this flag cannot be set.
+      out->_instFlags &= ~InstRWFlags::kMovOp;
+
+      if (opCount == 2) {
+        if (operands[0].isReg() && operands[1].isReg()) {
+          const Reg& o0 = operands[0].as<Reg>();
+          const Reg& o1 = operands[1].as<Reg>();
+
+          if (o0.isGp() && o1.isGp()) {
+            out->_operands[0].reset(W | RegM, operands[0].size());
+            out->_operands[1].reset(R | RegM, operands[1].size());
+
+            rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+            out->_instFlags |= InstRWFlags::kMovOp;
+            return kErrorOk;
+          }
+
+          if (o0.isGp() && o1.isSReg()) {
+            out->_operands[0].reset(W | RegM, nativeGpSize);
+            out->_operands[0].setRmSize(2);
+            out->_operands[1].reset(R, 2);
+            return kErrorOk;
+          }
+
+          if (o0.isSReg() && o1.isGp()) {
+            out->_operands[0].reset(W, 2);
+            out->_operands[1].reset(R | RegM, 2);
+            out->_operands[1].setRmSize(2);
+            return kErrorOk;
+          }
+
+          if (o0.isGp() && (o1.isCReg() || o1.isDReg())) {
+            out->_operands[0].reset(W, nativeGpSize);
+            out->_operands[1].reset(R, nativeGpSize);
+            out->_writeFlags = CpuRWFlags::kX86_OF |
+                               CpuRWFlags::kX86_SF |
+                               CpuRWFlags::kX86_ZF |
+                               CpuRWFlags::kX86_AF |
+                               CpuRWFlags::kX86_PF |
+                               CpuRWFlags::kX86_CF;
+            return kErrorOk;
+          }
+
+          if ((o0.isCReg() || o0.isDReg()) && o1.isGp()) {
+            out->_operands[0].reset(W, nativeGpSize);
+            out->_operands[1].reset(R, nativeGpSize);
+            out->_writeFlags = CpuRWFlags::kX86_OF |
+                               CpuRWFlags::kX86_SF |
+                               CpuRWFlags::kX86_ZF |
+                               CpuRWFlags::kX86_AF |
+                               CpuRWFlags::kX86_PF |
+                               CpuRWFlags::kX86_CF;
+            return kErrorOk;
+          }
+        }
+
+        if (operands[0].isReg() && operands[1].isMem()) {
+          const Reg& o0 = operands[0].as<Reg>();
+          const Mem& o1 = operands[1].as<Mem>();
+
+          if (o0.isGp()) {
+            if (!o1.isOffset64Bit())
+              out->_operands[0].reset(W, o0.size());
+            else
+              out->_operands[0].reset(W | RegPhys, o0.size(), Gp::kIdAx);
+
+            out->_operands[1].reset(R | MibRead, o0.size());
+            rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+            return kErrorOk;
+          }
+
+          if (o0.isSReg()) {
+            out->_operands[0].reset(W, 2);
+            out->_operands[1].reset(R, 2);
+            return kErrorOk;
+          }
+        }
+
+        if (operands[0].isMem() && operands[1].isReg()) {
+          const Mem& o0 = operands[0].as<Mem>();
+          const Reg& o1 = operands[1].as<Reg>();
+
+          if (o1.isGp()) {
+            out->_operands[0].reset(W | MibRead, o1.size());
+            if (!o0.isOffset64Bit())
+              out->_operands[1].reset(R, o1.size());
+            else
+              out->_operands[1].reset(R | RegPhys, o1.size(), Gp::kIdAx);
+            return kErrorOk;
+          }
+
+          if (o1.isSReg()) {
+            out->_operands[0].reset(W | MibRead, 2);
+            out->_operands[1].reset(R, 2);
+            return kErrorOk;
+          }
+        }
+
+        if (Reg::isGp(operands[0]) && operands[1].isImm()) {
+          const Reg& o0 = operands[0].as<Reg>();
+          out->_operands[0].reset(W | RegM, o0.size());
+          out->_operands[1].reset();
+
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+          return kErrorOk;
+        }
+
+        if (operands[0].isMem() && operands[1].isImm()) {
+          const Reg& o0 = operands[0].as<Reg>();
+          out->_operands[0].reset(W | MibRead, o0.size());
+          out->_operands[1].reset();
+          return kErrorOk;
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryMovabs: {
+      if (opCount == 2) {
+        if (Reg::isGp(operands[0]) && operands[1].isMem()) {
+          const Reg& o0 = operands[0].as<Reg>();
+          out->_operands[0].reset(W | RegPhys, o0.size(), Gp::kIdAx);
+          out->_operands[1].reset(R | MibRead, o0.size());
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+          return kErrorOk;
+        }
+
+        if (operands[0].isMem() && Reg::isGp(operands[1])) {
+          const Reg& o1 = operands[1].as<Reg>();
+          out->_operands[0].reset(W | MibRead, o1.size());
+          out->_operands[1].reset(R | RegPhys, o1.size(), Gp::kIdAx);
+          return kErrorOk;
+        }
+
+        if (Reg::isGp(operands[0]) && operands[1].isImm()) {
+          const Reg& o0 = operands[0].as<Reg>();
+          out->_operands[0].reset(W, o0.size());
+          out->_operands[1].reset();
+
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+          return kErrorOk;
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryImul: {
+      // Special case for 'imul' instruction.
+      //
+      // There are 3 variants in general:
+      //
+      //   1. Standard multiplication: 'A = A * B'.
+      //   2. Multiplication with imm: 'A = B * C'.
+      //   3. Extended multiplication: 'A:B = B * C'.
+
+      if (opCount == 2) {
+        if (operands[0].isReg() && operands[1].isImm()) {
+          out->_operands[0].reset(X, operands[0].size());
+          out->_operands[1].reset();
+
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+          return kErrorOk;
+        }
+
+        if (Reg::isGpw(operands[0]) && operands[1].size() == 1) {
+          // imul ax, r8/m8 <- AX = AL * r8/m8
+          out->_operands[0].reset(X | RegPhys, 2, Gp::kIdAx);
+          out->_operands[0].setReadByteMask(Support::lsbMask<uint64_t>(1));
+          out->_operands[1].reset(R | RegM, 1);
+        }
+        else {
+          // imul r?, r?/m?
+          out->_operands[0].reset(X, operands[0].size());
+          out->_operands[1].reset(R | RegM, operands[0].size());
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+        }
+
+        if (operands[1].isMem())
+          out->_operands[1].addOpFlags(MibRead);
+        return kErrorOk;
+      }
+
+      if (opCount == 3) {
+        if (operands[2].isImm()) {
+          out->_operands[0].reset(W, operands[0].size());
+          out->_operands[1].reset(R | RegM, operands[1].size());
+          out->_operands[2].reset();
+
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+          if (operands[1].isMem())
+            out->_operands[1].addOpFlags(MibRead);
+          return kErrorOk;
+        }
+        else {
+          out->_operands[0].reset(W | RegPhys, operands[0].size(), Gp::kIdDx);
+          out->_operands[1].reset(X | RegPhys, operands[1].size(), Gp::kIdAx);
+          out->_operands[2].reset(R | RegM, operands[2].size());
+
+          rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+          rwZeroExtendGp(out->_operands[1], operands[1].as<Gp>(), nativeGpSize);
+          if (operands[2].isMem())
+            out->_operands[2].addOpFlags(MibRead);
+          return kErrorOk;
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryMovh64: {
+      // Special case for 'movhpd|movhps' instructions. Note that this is only required for legacy (non-AVX)
+      // variants as AVX instructions use either 2 or 3 operands that are in `kCategoryGeneric` category.
+      if (opCount == 2) {
+        if (BaseReg::isVec(operands[0]) && operands[1].isMem()) {
+          out->_operands[0].reset(W, 8);
+          out->_operands[0].setWriteByteMask(Support::lsbMask<uint64_t>(8) << 8);
+          out->_operands[1].reset(R | MibRead, 8);
+          return kErrorOk;
+        }
+
+        if (operands[0].isMem() && BaseReg::isVec(operands[1])) {
+          out->_operands[0].reset(W | MibRead, 8);
+          out->_operands[1].reset(R, 8);
+          out->_operands[1].setReadByteMask(Support::lsbMask<uint64_t>(8) << 8);
+          return kErrorOk;
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryPunpcklxx: {
+      // Special case for 'punpcklbw|punpckldq|punpcklwd' instructions.
+      if (opCount == 2) {
+        if (Reg::isXmm(operands[0])) {
+          out->_operands[0].reset(X, 16);
+          out->_operands[0].setReadByteMask(0x0F0Fu);
+          out->_operands[0].setWriteByteMask(0xFFFFu);
+          out->_operands[1].reset(R, 16);
+          out->_operands[1].setWriteByteMask(0x0F0Fu);
+
+          if (Reg::isXmm(operands[1])) {
+            return kErrorOk;
+          }
+
+          if (operands[1].isMem()) {
+            out->_operands[1].addOpFlags(MibRead);
+            return kErrorOk;
+          }
+        }
+
+        if (Reg::isMm(operands[0])) {
+          out->_operands[0].reset(X, 8);
+          out->_operands[0].setReadByteMask(0x0Fu);
+          out->_operands[0].setWriteByteMask(0xFFu);
+          out->_operands[1].reset(R, 4);
+          out->_operands[1].setReadByteMask(0x0Fu);
+
+          if (Reg::isMm(operands[1])) {
+            return kErrorOk;
+          }
+
+          if (operands[1].isMem()) {
+            out->_operands[1].addOpFlags(MibRead);
+            return kErrorOk;
+          }
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryVmaskmov: {
+      // Special case for 'vmaskmovpd|vmaskmovps|vpmaskmovd|vpmaskmovq' instructions.
+      if (opCount == 3) {
+        if (BaseReg::isVec(operands[0]) && BaseReg::isVec(operands[1]) && operands[2].isMem()) {
+          out->_operands[0].reset(W, operands[0].size());
+          out->_operands[1].reset(R, operands[1].size());
+          out->_operands[2].reset(R | MibRead, operands[1].size());
+
+          rwZeroExtendAvxVec(out->_operands[0], operands[0].as<Vec>());
+          return kErrorOk;
+        }
+
+        if (operands[0].isMem() && BaseReg::isVec(operands[1]) && BaseReg::isVec(operands[2])) {
+          out->_operands[0].reset(X | MibRead, operands[1].size());
+          out->_operands[1].reset(R, operands[1].size());
+          out->_operands[2].reset(R, operands[2].size());
+          return kErrorOk;
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryVmovddup: {
+      // Special case for 'vmovddup' instruction. This instruction has an interesting semantic as 128-bit XMM
+      // version only uses 64-bit memory operand (m64), however, 256/512-bit versions use 256/512-bit memory
+      // operand, respectively.
+      if (opCount == 2) {
+        if (BaseReg::isVec(operands[0]) && BaseReg::isVec(operands[1])) {
+          uint32_t o0Size = operands[0].size();
+          uint32_t o1Size = o0Size == 16 ? 8 : o0Size;
+
+          out->_operands[0].reset(W, o0Size);
+          out->_operands[1].reset(R | RegM, o1Size);
+          out->_operands[1]._readByteMask &= 0x00FF00FF00FF00FFu;
+
+          rwZeroExtendAvxVec(out->_operands[0], operands[0].as<Vec>());
+          return rwHandleAVX512(inst, commonInfo, out);
+        }
+
+        if (BaseReg::isVec(operands[0]) && operands[1].isMem()) {
+          uint32_t o0Size = operands[0].size();
+          uint32_t o1Size = o0Size == 16 ? 8 : o0Size;
+
+          out->_operands[0].reset(W, o0Size);
+          out->_operands[1].reset(R | MibRead, o1Size);
+
+          rwZeroExtendAvxVec(out->_operands[0], operands[0].as<Vec>());
+          return rwHandleAVX512(inst, commonInfo, out);
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryVmovmskpd:
+    case InstDB::RWInfo::kCategoryVmovmskps: {
+      // Special case for 'vmovmskpd|vmovmskps' instructions.
+      if (opCount == 2) {
+        if (BaseReg::isGp(operands[0]) && BaseReg::isVec(operands[1])) {
+          out->_operands[0].reset(W, 1);
+          out->_operands[0].setExtendByteMask(Support::lsbMask<uint32_t>(nativeGpSize - 1) << 1);
+          out->_operands[1].reset(R, operands[1].size());
+          return kErrorOk;
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryVmov1_2:
+    case InstDB::RWInfo::kCategoryVmov1_4:
+    case InstDB::RWInfo::kCategoryVmov1_8: {
+      // Special case for instructions where the destination is 1:N (narrowing).
+      //
+      // Vmov1_2:
+      //   vcvtpd2dq|vcvttpd2dq
+      //   vcvtpd2udq|vcvttpd2udq
+      //   vcvtpd2ps|vcvtps2ph
+      //   vcvtqq2ps|vcvtuqq2ps
+      //   vpmovwb|vpmovswb|vpmovuswb
+      //   vpmovdw|vpmovsdw|vpmovusdw
+      //   vpmovqd|vpmovsqd|vpmovusqd
+      //
+      // Vmov1_4:
+      //   vpmovdb|vpmovsdb|vpmovusdb
+      //   vpmovqw|vpmovsqw|vpmovusqw
+      //
+      // Vmov1_8:
+      //   pmovmskb|vpmovmskb
+      //   vpmovqb|vpmovsqb|vpmovusqb
+      uint32_t shift = instRwInfo.category - InstDB::RWInfo::kCategoryVmov1_2 + 1;
+
+      if (opCount >= 2) {
+        if (opCount >= 3) {
+          if (opCount > 3)
+            return DebugUtils::errored(kErrorInvalidInstruction);
+          out->_operands[2].reset();
+        }
+
+        if (operands[0].isReg() && operands[1].isReg()) {
+          uint32_t size1 = operands[1].size();
+          uint32_t size0 = size1 >> shift;
+
+          out->_operands[0].reset(W, size0);
+          out->_operands[1].reset(R, size1);
+
+          if (instRmInfo.rmOpsMask & 0x1) {
+            out->_operands[0].addOpFlags(RegM);
+            out->_operands[0].setRmSize(size0);
+          }
+
+          if (instRmInfo.rmOpsMask & 0x2) {
+            out->_operands[1].addOpFlags(RegM);
+            out->_operands[1].setRmSize(size1);
+          }
+
+          // Handle 'pmovmskb|vpmovmskb'.
+          if (BaseReg::isGp(operands[0]))
+            rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
+
+          if (BaseReg::isVec(operands[0]))
+            rwZeroExtendAvxVec(out->_operands[0], operands[0].as<Vec>());
+
+          return rwHandleAVX512(inst, commonInfo, out);
+        }
+
+        if (operands[0].isReg() && operands[1].isMem()) {
+          uint32_t size1 = operands[1].size() ? operands[1].size() : uint32_t(16);
+          uint32_t size0 = size1 >> shift;
+
+          out->_operands[0].reset(W, size0);
+          out->_operands[1].reset(R | MibRead, size1);
+          return kErrorOk;
+        }
+
+        if (operands[0].isMem() && operands[1].isReg()) {
+          uint32_t size1 = operands[1].size();
+          uint32_t size0 = size1 >> shift;
+
+          out->_operands[0].reset(W | MibRead, size0);
+          out->_operands[1].reset(R, size1);
+
+          return rwHandleAVX512(inst, commonInfo, out);
+        }
+      }
+      break;
+    }
+
+    case InstDB::RWInfo::kCategoryVmov2_1:
+    case InstDB::RWInfo::kCategoryVmov4_1:
+    case InstDB::RWInfo::kCategoryVmov8_1: {
+      // Special case for instructions where the destination is N:1 (widening).
+      //
+      // Vmov2_1:
+      //   vcvtdq2pd|vcvtudq2pd
+      //   vcvtps2pd|vcvtph2ps
+      //   vcvtps2qq|vcvtps2uqq
+      //   vcvttps2qq|vcvttps2uqq
+      //   vpmovsxbw|vpmovzxbw
+      //   vpmovsxwd|vpmovzxwd
+      //   vpmovsxdq|vpmovzxdq
+      //
+      // Vmov4_1:
+      //   vpmovsxbd|vpmovzxbd
+      //   vpmovsxwq|vpmovzxwq
+      //
+      // Vmov8_1:
+      //   vpmovsxbq|vpmovzxbq
+      uint32_t shift = instRwInfo.category - InstDB::RWInfo::kCategoryVmov2_1 + 1;
+
+      if (opCount >= 2) {
+        if (opCount >= 3) {
+          if (opCount > 3)
+            return DebugUtils::errored(kErrorInvalidInstruction);
+          out->_operands[2].reset();
+        }
+
+        uint32_t size0 = operands[0].size();
+        uint32_t size1 = size0 >> shift;
+
+        out->_operands[0].reset(W, size0);
+        out->_operands[1].reset(R, size1);
+
+        if (operands[0].isReg() && operands[1].isReg()) {
+          if (instRmInfo.rmOpsMask & 0x1) {
+            out->_operands[0].addOpFlags(RegM);
+            out->_operands[0].setRmSize(size0);
+          }
+
+          if (instRmInfo.rmOpsMask & 0x2) {
+            out->_operands[1].addOpFlags(RegM);
+            out->_operands[1].setRmSize(size1);
+          }
+
+          return rwHandleAVX512(inst, commonInfo, out);
+        }
+
+        if (operands[0].isReg() && operands[1].isMem()) {
+          out->_operands[1].addOpFlags(MibRead);
+
+          return rwHandleAVX512(inst, commonInfo, out);
+        }
+      }
+      break;
+    }
+  }
+
+  return DebugUtils::errored(kErrorInvalidInstruction);
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+// x86::InstInternal - QueryFeatures
+// =================================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+struct RegAnalysis {
+  uint32_t regTypeMask;
+  uint32_t highVecUsed;
+
+  inline bool hasRegType(RegType regType) const noexcept {
+    return Support::bitTest(regTypeMask, regType);
+  }
+};
+
+static RegAnalysis InstInternal_regAnalysis(const Operand_* operands, size_t opCount) noexcept {
+  uint32_t mask = 0;
+  uint32_t highVecUsed = 0;
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand_& op = operands[i];
+    if (op.isReg()) {
+      const BaseReg& reg = op.as<BaseReg>();
+      mask |= Support::bitMask(reg.type());
+      if (reg.isVec())
+        highVecUsed |= uint32_t(reg.id() >= 16 && reg.id() < 32);
+    }
+    else if (op.isMem()) {
+      const BaseMem& mem = op.as<BaseMem>();
+      if (mem.hasBaseReg()) mask |= Support::bitMask(mem.baseType());
+      if (mem.hasIndexReg()) {
+        mask |= Support::bitMask(mem.indexType());
+        highVecUsed |= uint32_t(mem.indexId() >= 16 && mem.indexId() < 32);
+      }
+    }
+  }
+
+  return RegAnalysis { mask, highVecUsed };
+}
+
+static inline uint32_t InstInternal_usesAvx512(InstOptions instOptions, const RegOnly& extraReg, const RegAnalysis& regAnalysis) noexcept {
+  uint32_t hasEvex = uint32_t(instOptions & (InstOptions::kX86_Evex | InstOptions::kX86_AVX512Mask));
+  uint32_t hasKMask = extraReg.type() == RegType::kX86_KReg;
+  uint32_t hasKOrZmm = regAnalysis.regTypeMask & Support::bitMask(RegType::kX86_Zmm, RegType::kX86_KReg);
+
+  return hasEvex | hasKMask | hasKOrZmm;
+}
+
+Error InstInternal::queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept {
+  // Only called when `arch` matches X86 family.
+  DebugUtils::unused(arch);
+  ASMJIT_ASSERT(Environment::isFamilyX86(arch));
+
+  // Get the instruction data.
+  InstId instId = inst.id();
+  InstOptions options = inst.options();
+
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(instId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
+  const InstDB::AdditionalInfo& additionalInfo = InstDB::_additionalInfoTable[instInfo._additionalInfoIndex];
+
+  const uint8_t* fData = additionalInfo.featuresBegin();
+  const uint8_t* fEnd = additionalInfo.featuresEnd();
+
+  // Copy all features to `out`.
+  out->reset();
+  do {
+    uint32_t feature = fData[0];
+    if (!feature)
+      break;
+    out->add(feature);
+  } while (++fData != fEnd);
+
+  // Since AsmJit aggregates instructions that share the same name we have to
+  // deal with some special cases and also with MMX/SSE and AVX/AVX2 overlaps.
+  if (fData != additionalInfo.featuresBegin()) {
+    RegAnalysis regAnalysis = InstInternal_regAnalysis(operands, opCount);
+
+    // Handle MMX vs SSE overlap.
+    if (out->has(CpuFeatures::X86::kMMX) || out->has(CpuFeatures::X86::kMMX2)) {
+      // Only instructions defined by SSE and SSE2 overlap. Instructions introduced by newer instruction sets like
+      // SSE3+ don't state MMX as they require SSE3+.
+      if (out->has(CpuFeatures::X86::kSSE) || out->has(CpuFeatures::X86::kSSE2)) {
+        if (!regAnalysis.hasRegType(RegType::kX86_Xmm)) {
+          // The instruction doesn't use XMM register(s), thus it's MMX/MMX2 only.
+          out->remove(CpuFeatures::X86::kSSE);
+          out->remove(CpuFeatures::X86::kSSE2);
+          out->remove(CpuFeatures::X86::kSSE4_1);
+        }
+        else {
+          out->remove(CpuFeatures::X86::kMMX);
+          out->remove(CpuFeatures::X86::kMMX2);
+        }
+
+        // Special case: PEXTRW instruction is MMX/SSE2 instruction. However, MMX/SSE version cannot access memory
+        // (only register to register extract) so when SSE4.1 introduced the whole family of PEXTR/PINSR instructions
+        // they also introduced PEXTRW with a new opcode 0x15 that can extract directly to memory. This instruction
+        // is, of course, not compatible with MMX/SSE2 and would #UD if SSE4.1 is not supported.
+        if (instId == Inst::kIdPextrw) {
+          if (opCount >= 1 && operands[0].isMem())
+            out->remove(CpuFeatures::X86::kSSE2);
+          else
+            out->remove(CpuFeatures::X86::kSSE4_1);
+        }
+      }
+    }
+
+    // Handle PCLMULQDQ vs VPCLMULQDQ.
+    if (out->has(CpuFeatures::X86::kVPCLMULQDQ)) {
+      if (regAnalysis.hasRegType(RegType::kX86_Zmm) || Support::test(options, InstOptions::kX86_Evex)) {
+        // AVX512_F & VPCLMULQDQ.
+        out->remove(CpuFeatures::X86::kAVX, CpuFeatures::X86::kPCLMULQDQ);
+      }
+      else if (regAnalysis.hasRegType(RegType::kX86_Ymm)) {
+        out->remove(CpuFeatures::X86::kAVX512_F, CpuFeatures::X86::kAVX512_VL);
+      }
+      else {
+        // AVX & PCLMULQDQ.
+        out->remove(CpuFeatures::X86::kAVX512_F, CpuFeatures::X86::kAVX512_VL, CpuFeatures::X86::kVPCLMULQDQ);
+      }
+    }
+
+    // Handle AVX vs AVX2 overlap.
+    if (out->has(CpuFeatures::X86::kAVX) && out->has(CpuFeatures::X86::kAVX2)) {
+      bool isAVX2 = true;
+      // Special case: VBROADCASTSS and VBROADCASTSD were introduced in AVX, but only version that uses memory as a
+      // source operand. AVX2 then added support for register source operand.
+      if (instId == Inst::kIdVbroadcastss || instId == Inst::kIdVbroadcastsd) {
+        if (opCount > 1 && operands[1].isMem())
+          isAVX2 = false;
+      }
+      else {
+        // AVX instruction set doesn't support integer operations on YMM registers as these were later introcuced by
+        // AVX2. In our case we have to check if YMM register(s) are in use and if that is the case this is an AVX2
+        // instruction.
+        if (!(regAnalysis.regTypeMask & Support::bitMask(RegType::kX86_Ymm, RegType::kX86_Zmm)))
+          isAVX2 = false;
+      }
+
+      if (isAVX2)
+        out->remove(CpuFeatures::X86::kAVX);
+      else
+        out->remove(CpuFeatures::X86::kAVX2);
+    }
+
+    // Handle AVX|AVX2|FMA|F16C vs AVX512 overlap.
+    if (out->has(CpuFeatures::X86::kAVX) || out->has(CpuFeatures::X86::kAVX2) || out->has(CpuFeatures::X86::kFMA) || out->has(CpuFeatures::X86::kF16C)) {
+      // Only AVX512-F|BW|DQ allow to encode AVX/AVX2/FMA/F16C instructions
+      if (out->has(CpuFeatures::X86::kAVX512_F) || out->has(CpuFeatures::X86::kAVX512_BW) || out->has(CpuFeatures::X86::kAVX512_DQ)) {
+        uint32_t usesAvx512 = InstInternal_usesAvx512(options, inst.extraReg(), regAnalysis);
+        uint32_t mustUseEvex = 0;
+
+        switch (instId) {
+          // Special case: VPSLLDQ and VPSRLDQ instructions only allow `reg, reg. imm` combination in AVX|AVX2 mode,
+          // then AVX-512 introduced `reg, reg/mem, imm` combination that uses EVEX prefix. This means that if the
+          // second operand is memory then this is AVX-512_BW instruction and not AVX/AVX2 instruction.
+          case Inst::kIdVpslldq:
+          case Inst::kIdVpsrldq:
+            mustUseEvex = opCount >= 2 && operands[1].isMem();
+            break;
+
+          // Special case: VPBROADCAST[B|D|Q|W] only supports r32/r64 with EVEX prefix.
+          case Inst::kIdVpbroadcastb:
+          case Inst::kIdVpbroadcastd:
+          case Inst::kIdVpbroadcastq:
+          case Inst::kIdVpbroadcastw:
+            mustUseEvex = opCount >= 2 && x86::Reg::isGp(operands[1]);
+            break;
+
+          // Special case: VPERMPD - AVX2 vs AVX512-F case.
+          case Inst::kIdVpermpd:
+            mustUseEvex = opCount >= 3 && !operands[2].isImm();
+            break;
+
+          // Special case: VPERMQ - AVX2 vs AVX512-F case.
+          case Inst::kIdVpermq:
+            mustUseEvex = opCount >= 3 && (operands[1].isMem() || !operands[2].isImm());
+            break;
+        }
+
+        if (!(usesAvx512 | mustUseEvex | regAnalysis.highVecUsed))
+          out->remove(CpuFeatures::X86::kAVX512_F, CpuFeatures::X86::kAVX512_BW, CpuFeatures::X86::kAVX512_DQ, CpuFeatures::X86::kAVX512_VL);
+        else
+          out->remove(CpuFeatures::X86::kAVX, CpuFeatures::X86::kAVX2, CpuFeatures::X86::kFMA, CpuFeatures::X86::kF16C);
+      }
+    }
+
+    // Handle AVX_VNNI vs AVX512_VNNI overlap.
+    if (out->has(CpuFeatures::X86::kAVX512_VNNI)) {
+      // By default the AVX512_VNNI instruction should be used, because it was introduced first. However, VEX|VEX3
+      // prefix can be used to force AVX_VNNI instead.
+      uint32_t usesAvx512 = InstInternal_usesAvx512(options, inst.extraReg(), regAnalysis);
+
+      if (!usesAvx512 && Support::test(options, InstOptions::kX86_Vex | InstOptions::kX86_Vex3))
+        out->remove(CpuFeatures::X86::kAVX512_VNNI, CpuFeatures::X86::kAVX512_VL);
+      else
+        out->remove(CpuFeatures::X86::kAVX_VNNI);
+    }
+
+    // Clear AVX512_VL if ZMM register is used.
+    if (regAnalysis.hasRegType(RegType::kX86_Zmm))
+      out->remove(CpuFeatures::X86::kAVX512_VL);
+  }
+
+  return kErrorOk;
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+// x86::InstInternal - Tests
+// =========================
+
+#if defined(ASMJIT_TEST)
+UNIT(x86_inst_api_text) {
+  // All known instructions should be matched.
+  INFO("Matching all X86 instructions");
+  for (uint32_t a = 1; a < Inst::_kIdCount; a++) {
+    StringTmp<128> aName;
+    EXPECT(InstInternal::instIdToString(Arch::kX86, a, aName) == kErrorOk,
+           "Failed to get the name of instruction #%u", a);
+
+    uint32_t b = InstInternal::stringToInstId(Arch::kX86, aName.data(), aName.size());
+    StringTmp<128> bName;
+    InstInternal::instIdToString(Arch::kX86, b, bName);
+
+    EXPECT(a == b,
+           "Instructions do not match \"%s\" (#%u) != \"%s\" (#%u)", aName.data(), a, bName.data(), b);
+  }
+}
+
+template<typename... Args>
+static Error queryRWInfoSimple(InstRWInfo* out, Arch arch, InstId instId, InstOptions options, Args&&... args) {
+  BaseInst inst(instId);
+  inst.addOptions(options);
+  Operand_ opArray[] = { std::forward<Args>(args)... };
+  return InstInternal::queryRWInfo(arch, inst, opArray, sizeof...(args), out);
+}
+
+UNIT(x86_inst_api_rm_feature) {
+  INFO("Verifying whether RM/feature is reported correctly for PEXTRW instruction");
+  {
+    InstRWInfo rwi;
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdPextrw, InstOptions::kNone, eax, mm1, imm(1));
+    EXPECT(rwi.rmFeature() == 0);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdPextrw, InstOptions::kNone, eax, xmm1, imm(1));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kSSE4_1);
+  }
+
+  INFO("Verifying whether RM/feature is reported correctly for AVX512 shift instructions");
+  {
+    InstRWInfo rwi;
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslld, InstOptions::kNone, xmm1, xmm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllq, InstOptions::kNone, ymm1, ymm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrad, InstOptions::kNone, xmm1, xmm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrld, InstOptions::kNone, ymm1, ymm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrlq, InstOptions::kNone, xmm1, xmm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslldq, InstOptions::kNone, xmm1, xmm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllw, InstOptions::kNone, ymm1, ymm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsraw, InstOptions::kNone, xmm1, xmm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrldq, InstOptions::kNone, ymm1, ymm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrlw, InstOptions::kNone, xmm1, xmm2, imm(8));
+    EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslld, InstOptions::kNone, xmm1, xmm2, xmm3);
+    EXPECT(rwi.rmFeature() == 0);
+
+    queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllw, InstOptions::kNone, xmm1, xmm2, xmm3);
+    EXPECT(rwi.rmFeature() == 0);
+  }
+}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
diff --git a/lib/lepton/asmjit/x86/x86instapi_p.h b/lib/lepton/asmjit/x86/x86instapi_p.h
new file mode 100644
index 0000000000..56f7fb928f
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86instapi_p.h
@@ -0,0 +1,41 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86INSTAPI_P_H_INCLUDED
+#define ASMJIT_X86_X86INSTAPI_P_H_INCLUDED
+
+#include "../core/inst.h"
+#include "../core/operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+namespace InstInternal {
+
+#ifndef ASMJIT_NO_TEXT
+Error ASMJIT_CDECL instIdToString(Arch arch, InstId instId, String& output) noexcept;
+InstId ASMJIT_CDECL stringToInstId(Arch arch, const char* s, size_t len) noexcept;
+#endif // !ASMJIT_NO_TEXT
+
+#ifndef ASMJIT_NO_VALIDATION
+Error ASMJIT_CDECL validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept;
+#endif // !ASMJIT_NO_VALIDATION
+
+#ifndef ASMJIT_NO_INTROSPECTION
+Error ASMJIT_CDECL queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept;
+Error ASMJIT_CDECL queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept;
+#endif // !ASMJIT_NO_INTROSPECTION
+
+} // {InstInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86INSTAPI_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86instdb.cpp b/lib/lepton/asmjit/x86/x86instdb.cpp
new file mode 100644
index 0000000000..ee22526c34
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86instdb.cpp
@@ -0,0 +1,4427 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+// ----------------------------------------------------------------------------
+// IMPORTANT: AsmJit now uses an external instruction database to populate
+// static tables within this file. Perform the following steps to regenerate
+// all tables enclosed by ${...}:
+//
+//   1. Install node.js environment <https://nodejs.org>
+//   2. Go to asmjit/tools directory
+//   3. Get the latest asmdb from <https://github.com/asmjit/asmdb> and
+//      copy/link the `asmdb` directory to `asmjit/tools/asmdb`.
+//   4. Execute `node tablegen-x86.js`
+//
+// Instruction encoding and opcodes were added to the `x86inst.cpp` database
+// manually in the past and they are not updated by the script as it became
+// tricky. However, everything else is updated including instruction operands
+// and tables required to validate them, instruction read/write information
+// (including registers and flags), and all indexes to all tables.
+// ----------------------------------------------------------------------------
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../core/cpuinfo.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../x86/x86instdb_p.h"
+#include "../x86/x86opcode_p.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::InstDB - InstInfo
+// ======================
+
+// Instruction opcode definitions:
+//   - `O` encodes X86|MMX|SSE instructions.
+//   - `V` encodes VEX|XOP|EVEX instructions.
+//   - `E` encodes EVEX instructions only.
+#define O_ENCODE(PREFIX, OPCODE, O, L, W, EvexW, N, TT) ((PREFIX) | (OPCODE) | (O) | (L) | (W) | (EvexW) | (N) | (TT))
+
+#define O(PREFIX, OPCODE, ModO, LL, W, EvexW, N, ModRM) (O_ENCODE(Opcode::k##PREFIX, 0x##OPCODE, Opcode::kModO_##ModO, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kModRM_##ModRM))
+#define V(PREFIX, OPCODE, ModO, LL, W, EvexW, N, TT) (O_ENCODE(Opcode::k##PREFIX, 0x##OPCODE, Opcode::kModO_##ModO, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kCDTT_##TT))
+#define E(PREFIX, OPCODE, ModO, LL, W, EvexW, N, TT) (O_ENCODE(Opcode::k##PREFIX, 0x##OPCODE, Opcode::kModO_##ModO, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kCDTT_##TT) | Opcode::kMM_ForceEvex)
+#define O_FPU(PREFIX, OPCODE, ModO) (Opcode::kFPU_##PREFIX | (0x##OPCODE & 0xFFu) | ((0x##OPCODE >> 8) << Opcode::kFPU_2B_Shift) | Opcode::kModO_##ModO)
+
+// Don't store `_nameDataIndex` if instruction names are disabled. Since some
+// APIs can use `_nameDataIndex` it's much safer if it's zero if it's not defined.
+#ifndef ASMJIT_NO_TEXT
+  #define NAME_DATA_INDEX(Index) Index
+#else
+  #define NAME_DATA_INDEX(Index) 0
+#endif
+
+// Defines an X86 instruction.
+#define INST(id, encoding, opcode0, opcode1, mainOpcodeIndex, altOpcodeIndex, nameDataIndex, commomInfoIndex, additionalInfoIndex) { \
+  uint32_t(NAME_DATA_INDEX(nameDataIndex)), \
+  uint32_t(commomInfoIndex),                \
+  uint32_t(additionalInfoIndex),            \
+  uint8_t(InstDB::kEncoding##encoding),     \
+  uint8_t((opcode0) & 0xFFu),               \
+  uint8_t(mainOpcodeIndex),                 \
+  uint8_t(altOpcodeIndex)                   \
+}
+
+const InstDB::InstInfo InstDB::_instInfoTable[] = {
+  /*--------------------+--------------------+------------------+--------+------------------+--------+----+----+------+----+----+
+  |    Instruction      |    Instruction     |    Main Opcode   |  EVEX  |Alternative Opcode|  EVEX  |Op0X|Op1X|Name-X|IdxA|IdxB|
+  |     Id & Name       |      Encoding      |  (pp+mmm|op/o|L|w|W|N|TT.)|--(pp+mmm|op/o|L|w|W|N|TT.)|     (auto-generated)     |
+  +---------------------+--------------------+---------+----+-+-+-+-+----+---------+----+-+-+-+-+----+----+----+------+----+---*/
+  // ${InstInfo:Begin}
+  INST(None             , None               , 0                         , 0                         , 0  , 0  , 0    , 0  , 0  ), // #0
+  INST(Aaa              , X86Op_xAX          , O(000000,37,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1    , 1  , 1  ), // #1
+  INST(Aad              , X86I_xAX           , O(000000,D5,_,_,_,_,_,_  ), 0                         , 0  , 0  , 5    , 2  , 1  ), // #2
+  INST(Aam              , X86I_xAX           , O(000000,D4,_,_,_,_,_,_  ), 0                         , 0  , 0  , 9    , 2  , 1  ), // #3
+  INST(Aas              , X86Op_xAX          , O(000000,3F,_,_,_,_,_,_  ), 0                         , 0  , 0  , 13   , 1  , 1  ), // #4
+  INST(Adc              , X86Arith           , O(000000,10,2,_,x,_,_,_  ), 0                         , 1  , 0  , 17   , 3  , 2  ), // #5
+  INST(Adcx             , X86Rm              , O(660F38,F6,_,_,x,_,_,_  ), 0                         , 2  , 0  , 21   , 4  , 3  ), // #6
+  INST(Add              , X86Arith           , O(000000,00,0,_,x,_,_,_  ), 0                         , 0  , 0  , 3146 , 3  , 1  ), // #7
+  INST(Addpd            , ExtRm              , O(660F00,58,_,_,_,_,_,_  ), 0                         , 3  , 0  , 5788 , 5  , 4  ), // #8
+  INST(Addps            , ExtRm              , O(000F00,58,_,_,_,_,_,_  ), 0                         , 4  , 0  , 5800 , 5  , 5  ), // #9
+  INST(Addsd            , ExtRm              , O(F20F00,58,_,_,_,_,_,_  ), 0                         , 5  , 0  , 6118 , 6  , 4  ), // #10
+  INST(Addss            , ExtRm              , O(F30F00,58,_,_,_,_,_,_  ), 0                         , 6  , 0  , 3283 , 7  , 5  ), // #11
+  INST(Addsubpd         , ExtRm              , O(660F00,D0,_,_,_,_,_,_  ), 0                         , 3  , 0  , 5410 , 5  , 6  ), // #12
+  INST(Addsubps         , ExtRm              , O(F20F00,D0,_,_,_,_,_,_  ), 0                         , 5  , 0  , 5422 , 5  , 6  ), // #13
+  INST(Adox             , X86Rm              , O(F30F38,F6,_,_,x,_,_,_  ), 0                         , 7  , 0  , 26   , 4  , 7  ), // #14
+  INST(Aesdec           , ExtRm              , O(660F38,DE,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3352 , 5  , 8  ), // #15
+  INST(Aesdeclast       , ExtRm              , O(660F38,DF,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3360 , 5  , 8  ), // #16
+  INST(Aesenc           , ExtRm              , O(660F38,DC,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3372 , 5  , 8  ), // #17
+  INST(Aesenclast       , ExtRm              , O(660F38,DD,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3380 , 5  , 8  ), // #18
+  INST(Aesimc           , ExtRm              , O(660F38,DB,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3392 , 5  , 8  ), // #19
+  INST(Aeskeygenassist  , ExtRmi             , O(660F3A,DF,_,_,_,_,_,_  ), 0                         , 8  , 0  , 3400 , 8  , 8  ), // #20
+  INST(And              , X86Arith           , O(000000,20,4,_,x,_,_,_  ), 0                         , 9  , 0  , 2525 , 9  , 1  ), // #21
+  INST(Andn             , VexRvm_Wx          , V(000F38,F2,_,0,x,_,_,_  ), 0                         , 10 , 0  , 7789 , 10 , 9  ), // #22
+  INST(Andnpd           , ExtRm              , O(660F00,55,_,_,_,_,_,_  ), 0                         , 3  , 0  , 3433 , 5  , 4  ), // #23
+  INST(Andnps           , ExtRm              , O(000F00,55,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3441 , 5  , 5  ), // #24
+  INST(Andpd            , ExtRm              , O(660F00,54,_,_,_,_,_,_  ), 0                         , 3  , 0  , 4745 , 11 , 4  ), // #25
+  INST(Andps            , ExtRm              , O(000F00,54,_,_,_,_,_,_  ), 0                         , 4  , 0  , 4755 , 11 , 5  ), // #26
+  INST(Arpl             , X86Mr_NoSize       , O(000000,63,_,_,_,_,_,_  ), 0                         , 0  , 0  , 31   , 12 , 10 ), // #27
+  INST(Bextr            , VexRmv_Wx          , V(000F38,F7,_,0,x,_,_,_  ), 0                         , 10 , 0  , 36   , 13 , 9  ), // #28
+  INST(Blcfill          , VexVm_Wx           , V(XOP_M9,01,1,0,x,_,_,_  ), 0                         , 11 , 0  , 42   , 14 , 11 ), // #29
+  INST(Blci             , VexVm_Wx           , V(XOP_M9,02,6,0,x,_,_,_  ), 0                         , 12 , 0  , 50   , 14 , 11 ), // #30
+  INST(Blcic            , VexVm_Wx           , V(XOP_M9,01,5,0,x,_,_,_  ), 0                         , 13 , 0  , 55   , 14 , 11 ), // #31
+  INST(Blcmsk           , VexVm_Wx           , V(XOP_M9,02,1,0,x,_,_,_  ), 0                         , 11 , 0  , 61   , 14 , 11 ), // #32
+  INST(Blcs             , VexVm_Wx           , V(XOP_M9,01,3,0,x,_,_,_  ), 0                         , 14 , 0  , 68   , 14 , 11 ), // #33
+  INST(Blendpd          , ExtRmi             , O(660F3A,0D,_,_,_,_,_,_  ), 0                         , 8  , 0  , 3483 , 8  , 12 ), // #34
+  INST(Blendps          , ExtRmi             , O(660F3A,0C,_,_,_,_,_,_  ), 0                         , 8  , 0  , 3492 , 8  , 12 ), // #35
+  INST(Blendvpd         , ExtRm_XMM0         , O(660F38,15,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3501 , 15 , 12 ), // #36
+  INST(Blendvps         , ExtRm_XMM0         , O(660F38,14,_,_,_,_,_,_  ), 0                         , 2  , 0  , 3511 , 15 , 12 ), // #37
+  INST(Blsfill          , VexVm_Wx           , V(XOP_M9,01,2,0,x,_,_,_  ), 0                         , 15 , 0  , 73   , 14 , 11 ), // #38
+  INST(Blsi             , VexVm_Wx           , V(000F38,F3,3,0,x,_,_,_  ), 0                         , 16 , 0  , 81   , 14 , 9  ), // #39
+  INST(Blsic            , VexVm_Wx           , V(XOP_M9,01,6,0,x,_,_,_  ), 0                         , 12 , 0  , 86   , 14 , 11 ), // #40
+  INST(Blsmsk           , VexVm_Wx           , V(000F38,F3,2,0,x,_,_,_  ), 0                         , 17 , 0  , 92   , 14 , 9  ), // #41
+  INST(Blsr             , VexVm_Wx           , V(000F38,F3,1,0,x,_,_,_  ), 0                         , 18 , 0  , 99   , 14 , 9  ), // #42
+  INST(Bndcl            , X86Rm              , O(F30F00,1A,_,_,_,_,_,_  ), 0                         , 6  , 0  , 104  , 16 , 13 ), // #43
+  INST(Bndcn            , X86Rm              , O(F20F00,1B,_,_,_,_,_,_  ), 0                         , 5  , 0  , 110  , 16 , 13 ), // #44
+  INST(Bndcu            , X86Rm              , O(F20F00,1A,_,_,_,_,_,_  ), 0                         , 5  , 0  , 116  , 16 , 13 ), // #45
+  INST(Bndldx           , X86Rm              , O(000F00,1A,_,_,_,_,_,_  ), 0                         , 4  , 0  , 122  , 17 , 13 ), // #46
+  INST(Bndmk            , X86Rm              , O(F30F00,1B,_,_,_,_,_,_  ), 0                         , 6  , 0  , 129  , 18 , 13 ), // #47
+  INST(Bndmov           , X86Bndmov          , O(660F00,1A,_,_,_,_,_,_  ), O(660F00,1B,_,_,_,_,_,_  ), 3  , 1  , 135  , 19 , 13 ), // #48
+  INST(Bndstx           , X86Mr              , O(000F00,1B,_,_,_,_,_,_  ), 0                         , 4  , 0  , 142  , 20 , 13 ), // #49
+  INST(Bound            , X86Rm              , O(000000,62,_,_,_,_,_,_  ), 0                         , 0  , 0  , 149  , 21 , 0  ), // #50
+  INST(Bsf              , X86Rm              , O(000F00,BC,_,_,x,_,_,_  ), 0                         , 4  , 0  , 155  , 22 , 1  ), // #51
+  INST(Bsr              , X86Rm              , O(000F00,BD,_,_,x,_,_,_  ), 0                         , 4  , 0  , 159  , 22 , 1  ), // #52
+  INST(Bswap            , X86Bswap           , O(000F00,C8,_,_,x,_,_,_  ), 0                         , 4  , 0  , 163  , 23 , 0  ), // #53
+  INST(Bt               , X86Bt              , O(000F00,A3,_,_,x,_,_,_  ), O(000F00,BA,4,_,x,_,_,_  ), 4  , 2  , 169  , 24 , 14 ), // #54
+  INST(Btc              , X86Bt              , O(000F00,BB,_,_,x,_,_,_  ), O(000F00,BA,7,_,x,_,_,_  ), 4  , 3  , 172  , 25 , 14 ), // #55
+  INST(Btr              , X86Bt              , O(000F00,B3,_,_,x,_,_,_  ), O(000F00,BA,6,_,x,_,_,_  ), 4  , 4  , 176  , 25 , 14 ), // #56
+  INST(Bts              , X86Bt              , O(000F00,AB,_,_,x,_,_,_  ), O(000F00,BA,5,_,x,_,_,_  ), 4  , 5  , 180  , 25 , 14 ), // #57
+  INST(Bzhi             , VexRmv_Wx          , V(000F38,F5,_,0,x,_,_,_  ), 0                         , 10 , 0  , 184  , 13 , 15 ), // #58
+  INST(Call             , X86Call            , O(000000,FF,2,_,_,_,_,_  ), 0                         , 1  , 0  , 3038 , 26 , 1  ), // #59
+  INST(Cbw              , X86Op_xAX          , O(660000,98,_,_,_,_,_,_  ), 0                         , 19 , 0  , 189  , 27 , 0  ), // #60
+  INST(Cdq              , X86Op_xDX_xAX      , O(000000,99,_,_,_,_,_,_  ), 0                         , 0  , 0  , 193  , 28 , 0  ), // #61
+  INST(Cdqe             , X86Op_xAX          , O(000000,98,_,_,1,_,_,_  ), 0                         , 20 , 0  , 197  , 29 , 0  ), // #62
+  INST(Clac             , X86Op              , O(000F01,CA,_,_,_,_,_,_  ), 0                         , 21 , 0  , 202  , 30 , 16 ), // #63
+  INST(Clc              , X86Op              , O(000000,F8,_,_,_,_,_,_  ), 0                         , 0  , 0  , 207  , 30 , 17 ), // #64
+  INST(Cld              , X86Op              , O(000000,FC,_,_,_,_,_,_  ), 0                         , 0  , 0  , 211  , 30 , 18 ), // #65
+  INST(Cldemote         , X86M_Only          , O(000F00,1C,0,_,_,_,_,_  ), 0                         , 4  , 0  , 215  , 31 , 19 ), // #66
+  INST(Clflush          , X86M_Only          , O(000F00,AE,7,_,_,_,_,_  ), 0                         , 22 , 0  , 224  , 31 , 20 ), // #67
+  INST(Clflushopt       , X86M_Only          , O(660F00,AE,7,_,_,_,_,_  ), 0                         , 23 , 0  , 232  , 31 , 21 ), // #68
+  INST(Clgi             , X86Op              , O(000F01,DD,_,_,_,_,_,_  ), 0                         , 21 , 0  , 243  , 30 , 22 ), // #69
+  INST(Cli              , X86Op              , O(000000,FA,_,_,_,_,_,_  ), 0                         , 0  , 0  , 248  , 30 , 23 ), // #70
+  INST(Clrssbsy         , X86M_Only          , O(F30F00,AE,6,_,_,_,_,_  ), 0                         , 24 , 0  , 252  , 32 , 24 ), // #71
+  INST(Clts             , X86Op              , O(000F00,06,_,_,_,_,_,_  ), 0                         , 4  , 0  , 261  , 30 , 0  ), // #72
+  INST(Clui             , X86Op              , O(F30F01,EE,_,_,_,_,_,_  ), 0                         , 25 , 0  , 266  , 33 , 25 ), // #73
+  INST(Clwb             , X86M_Only          , O(660F00,AE,6,_,_,_,_,_  ), 0                         , 26 , 0  , 271  , 31 , 26 ), // #74
+  INST(Clzero           , X86Op_MemZAX       , O(000F01,FC,_,_,_,_,_,_  ), 0                         , 21 , 0  , 276  , 34 , 27 ), // #75
+  INST(Cmc              , X86Op              , O(000000,F5,_,_,_,_,_,_  ), 0                         , 0  , 0  , 283  , 30 , 28 ), // #76
+  INST(Cmova            , X86Rm              , O(000F00,47,_,_,x,_,_,_  ), 0                         , 4  , 0  , 287  , 22 , 29 ), // #77
+  INST(Cmovae           , X86Rm              , O(000F00,43,_,_,x,_,_,_  ), 0                         , 4  , 0  , 293  , 22 , 30 ), // #78
+  INST(Cmovb            , X86Rm              , O(000F00,42,_,_,x,_,_,_  ), 0                         , 4  , 0  , 648  , 22 , 30 ), // #79
+  INST(Cmovbe           , X86Rm              , O(000F00,46,_,_,x,_,_,_  ), 0                         , 4  , 0  , 655  , 22 , 29 ), // #80
+  INST(Cmovc            , X86Rm              , O(000F00,42,_,_,x,_,_,_  ), 0                         , 4  , 0  , 300  , 22 , 30 ), // #81
+  INST(Cmove            , X86Rm              , O(000F00,44,_,_,x,_,_,_  ), 0                         , 4  , 0  , 663  , 22 , 31 ), // #82
+  INST(Cmovg            , X86Rm              , O(000F00,4F,_,_,x,_,_,_  ), 0                         , 4  , 0  , 306  , 22 , 32 ), // #83
+  INST(Cmovge           , X86Rm              , O(000F00,4D,_,_,x,_,_,_  ), 0                         , 4  , 0  , 312  , 22 , 33 ), // #84
+  INST(Cmovl            , X86Rm              , O(000F00,4C,_,_,x,_,_,_  ), 0                         , 4  , 0  , 319  , 22 , 33 ), // #85
+  INST(Cmovle           , X86Rm              , O(000F00,4E,_,_,x,_,_,_  ), 0                         , 4  , 0  , 325  , 22 , 32 ), // #86
+  INST(Cmovna           , X86Rm              , O(000F00,46,_,_,x,_,_,_  ), 0                         , 4  , 0  , 332  , 22 , 29 ), // #87
+  INST(Cmovnae          , X86Rm              , O(000F00,42,_,_,x,_,_,_  ), 0                         , 4  , 0  , 339  , 22 , 30 ), // #88
+  INST(Cmovnb           , X86Rm              , O(000F00,43,_,_,x,_,_,_  ), 0                         , 4  , 0  , 670  , 22 , 30 ), // #89
+  INST(Cmovnbe          , X86Rm              , O(000F00,47,_,_,x,_,_,_  ), 0                         , 4  , 0  , 678  , 22 , 29 ), // #90
+  INST(Cmovnc           , X86Rm              , O(000F00,43,_,_,x,_,_,_  ), 0                         , 4  , 0  , 347  , 22 , 30 ), // #91
+  INST(Cmovne           , X86Rm              , O(000F00,45,_,_,x,_,_,_  ), 0                         , 4  , 0  , 687  , 22 , 31 ), // #92
+  INST(Cmovng           , X86Rm              , O(000F00,4E,_,_,x,_,_,_  ), 0                         , 4  , 0  , 354  , 22 , 32 ), // #93
+  INST(Cmovnge          , X86Rm              , O(000F00,4C,_,_,x,_,_,_  ), 0                         , 4  , 0  , 361  , 22 , 33 ), // #94
+  INST(Cmovnl           , X86Rm              , O(000F00,4D,_,_,x,_,_,_  ), 0                         , 4  , 0  , 369  , 22 , 33 ), // #95
+  INST(Cmovnle          , X86Rm              , O(000F00,4F,_,_,x,_,_,_  ), 0                         , 4  , 0  , 376  , 22 , 32 ), // #96
+  INST(Cmovno           , X86Rm              , O(000F00,41,_,_,x,_,_,_  ), 0                         , 4  , 0  , 384  , 22 , 34 ), // #97
+  INST(Cmovnp           , X86Rm              , O(000F00,4B,_,_,x,_,_,_  ), 0                         , 4  , 0  , 391  , 22 , 35 ), // #98
+  INST(Cmovns           , X86Rm              , O(000F00,49,_,_,x,_,_,_  ), 0                         , 4  , 0  , 398  , 22 , 36 ), // #99
+  INST(Cmovnz           , X86Rm              , O(000F00,45,_,_,x,_,_,_  ), 0                         , 4  , 0  , 405  , 22 , 31 ), // #100
+  INST(Cmovo            , X86Rm              , O(000F00,40,_,_,x,_,_,_  ), 0                         , 4  , 0  , 412  , 22 , 34 ), // #101
+  INST(Cmovp            , X86Rm              , O(000F00,4A,_,_,x,_,_,_  ), 0                         , 4  , 0  , 418  , 22 , 35 ), // #102
+  INST(Cmovpe           , X86Rm              , O(000F00,4A,_,_,x,_,_,_  ), 0                         , 4  , 0  , 424  , 22 , 35 ), // #103
+  INST(Cmovpo           , X86Rm              , O(000F00,4B,_,_,x,_,_,_  ), 0                         , 4  , 0  , 431  , 22 , 35 ), // #104
+  INST(Cmovs            , X86Rm              , O(000F00,48,_,_,x,_,_,_  ), 0                         , 4  , 0  , 438  , 22 , 36 ), // #105
+  INST(Cmovz            , X86Rm              , O(000F00,44,_,_,x,_,_,_  ), 0                         , 4  , 0  , 444  , 22 , 31 ), // #106
+  INST(Cmp              , X86Arith           , O(000000,38,7,_,x,_,_,_  ), 0                         , 27 , 0  , 450  , 35 , 1  ), // #107
+  INST(Cmppd            , ExtRmi             , O(660F00,C2,_,_,_,_,_,_  ), 0                         , 3  , 0  , 3737 , 8  , 4  ), // #108
+  INST(Cmpps            , ExtRmi             , O(000F00,C2,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3751 , 8  , 5  ), // #109
+  INST(Cmps             , X86StrMm           , O(000000,A6,_,_,_,_,_,_  ), 0                         , 0  , 0  , 454  , 36 , 37 ), // #110
+  INST(Cmpsd            , ExtRmi             , O(F20F00,C2,_,_,_,_,_,_  ), 0                         , 5  , 0  , 3758 , 37 , 4  ), // #111
+  INST(Cmpss            , ExtRmi             , O(F30F00,C2,_,_,_,_,_,_  ), 0                         , 6  , 0  , 3772 , 38 , 5  ), // #112
+  INST(Cmpxchg          , X86Cmpxchg         , O(000F00,B0,_,_,x,_,_,_  ), 0                         , 4  , 0  , 459  , 39 , 38 ), // #113
+  INST(Cmpxchg16b       , X86Cmpxchg8b_16b   , O(000F00,C7,1,_,1,_,_,_  ), 0                         , 28 , 0  , 467  , 40 , 39 ), // #114
+  INST(Cmpxchg8b        , X86Cmpxchg8b_16b   , O(000F00,C7,1,_,_,_,_,_  ), 0                         , 29 , 0  , 478  , 41 , 40 ), // #115
+  INST(Comisd           , ExtRm              , O(660F00,2F,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11391, 6  , 41 ), // #116
+  INST(Comiss           , ExtRm              , O(000F00,2F,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11409, 7  , 42 ), // #117
+  INST(Cpuid            , X86Op              , O(000F00,A2,_,_,_,_,_,_  ), 0                         , 4  , 0  , 488  , 42 , 43 ), // #118
+  INST(Cqo              , X86Op_xDX_xAX      , O(000000,99,_,_,1,_,_,_  ), 0                         , 20 , 0  , 494  , 43 , 0  ), // #119
+  INST(Crc32            , X86Crc             , O(F20F38,F0,_,_,x,_,_,_  ), 0                         , 30 , 0  , 498  , 44 , 44 ), // #120
+  INST(Cvtdq2pd         , ExtRm              , O(F30F00,E6,_,_,_,_,_,_  ), 0                         , 6  , 0  , 3827 , 6  , 4  ), // #121
+  INST(Cvtdq2ps         , ExtRm              , O(000F00,5B,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3847 , 5  , 4  ), // #122
+  INST(Cvtpd2dq         , ExtRm              , O(F20F00,E6,_,_,_,_,_,_  ), 0                         , 5  , 0  , 3886 , 5  , 4  ), // #123
+  INST(Cvtpd2pi         , ExtRm              , O(660F00,2D,_,_,_,_,_,_  ), 0                         , 3  , 0  , 504  , 45 , 4  ), // #124
+  INST(Cvtpd2ps         , ExtRm              , O(660F00,5A,_,_,_,_,_,_  ), 0                         , 3  , 0  , 3906 , 5  , 4  ), // #125
+  INST(Cvtpi2pd         , ExtRm              , O(660F00,2A,_,_,_,_,_,_  ), 0                         , 3  , 0  , 513  , 46 , 4  ), // #126
+  INST(Cvtpi2ps         , ExtRm              , O(000F00,2A,_,_,_,_,_,_  ), 0                         , 4  , 0  , 522  , 46 , 5  ), // #127
+  INST(Cvtps2dq         , ExtRm              , O(660F00,5B,_,_,_,_,_,_  ), 0                         , 3  , 0  , 4040 , 5  , 4  ), // #128
+  INST(Cvtps2pd         , ExtRm              , O(000F00,5A,_,_,_,_,_,_  ), 0                         , 4  , 0  , 4050 , 6  , 4  ), // #129
+  INST(Cvtps2pi         , ExtRm              , O(000F00,2D,_,_,_,_,_,_  ), 0                         , 4  , 0  , 531  , 47 , 5  ), // #130
+  INST(Cvtsd2si         , ExtRm_Wx_GpqOnly   , O(F20F00,2D,_,_,x,_,_,_  ), 0                         , 5  , 0  , 4153 , 48 , 4  ), // #131
+  INST(Cvtsd2ss         , ExtRm              , O(F20F00,5A,_,_,_,_,_,_  ), 0                         , 5  , 0  , 4163 , 6  , 4  ), // #132
+  INST(Cvtsi2sd         , ExtRm_Wx           , O(F20F00,2A,_,_,x,_,_,_  ), 0                         , 5  , 0  , 4225 , 49 , 4  ), // #133
+  INST(Cvtsi2ss         , ExtRm_Wx           , O(F30F00,2A,_,_,x,_,_,_  ), 0                         , 6  , 0  , 4245 , 49 , 5  ), // #134
+  INST(Cvtss2sd         , ExtRm              , O(F30F00,5A,_,_,_,_,_,_  ), 0                         , 6  , 0  , 4255 , 7  , 4  ), // #135
+  INST(Cvtss2si         , ExtRm_Wx_GpqOnly   , O(F30F00,2D,_,_,x,_,_,_  ), 0                         , 6  , 0  , 4275 , 50 , 5  ), // #136
+  INST(Cvttpd2dq        , ExtRm              , O(660F00,E6,_,_,_,_,_,_  ), 0                         , 3  , 0  , 4296 , 5  , 4  ), // #137
+  INST(Cvttpd2pi        , ExtRm              , O(660F00,2C,_,_,_,_,_,_  ), 0                         , 3  , 0  , 540  , 45 , 4  ), // #138
+  INST(Cvttps2dq        , ExtRm              , O(F30F00,5B,_,_,_,_,_,_  ), 0                         , 6  , 0  , 4409 , 5  , 4  ), // #139
+  INST(Cvttps2pi        , ExtRm              , O(000F00,2C,_,_,_,_,_,_  ), 0                         , 4  , 0  , 550  , 47 , 5  ), // #140
+  INST(Cvttsd2si        , ExtRm_Wx_GpqOnly   , O(F20F00,2C,_,_,x,_,_,_  ), 0                         , 5  , 0  , 4455 , 48 , 4  ), // #141
+  INST(Cvttss2si        , ExtRm_Wx_GpqOnly   , O(F30F00,2C,_,_,x,_,_,_  ), 0                         , 6  , 0  , 4501 , 50 , 5  ), // #142
+  INST(Cwd              , X86Op_xDX_xAX      , O(660000,99,_,_,_,_,_,_  ), 0                         , 19 , 0  , 560  , 51 , 0  ), // #143
+  INST(Cwde             , X86Op_xAX          , O(000000,98,_,_,_,_,_,_  ), 0                         , 0  , 0  , 564  , 52 , 0  ), // #144
+  INST(Daa              , X86Op              , O(000000,27,_,_,_,_,_,_  ), 0                         , 0  , 0  , 569  , 1  , 1  ), // #145
+  INST(Das              , X86Op              , O(000000,2F,_,_,_,_,_,_  ), 0                         , 0  , 0  , 573  , 1  , 1  ), // #146
+  INST(Dec              , X86IncDec          , O(000000,FE,1,_,x,_,_,_  ), O(000000,48,_,_,x,_,_,_  ), 31 , 6  , 3355 , 53 , 45 ), // #147
+  INST(Div              , X86M_GPB_MulDiv    , O(000000,F6,6,_,x,_,_,_  ), 0                         , 32 , 0  , 810  , 54 , 1  ), // #148
+  INST(Divpd            , ExtRm              , O(660F00,5E,_,_,_,_,_,_  ), 0                         , 3  , 0  , 4652 , 5  , 4  ), // #149
+  INST(Divps            , ExtRm              , O(000F00,5E,_,_,_,_,_,_  ), 0                         , 4  , 0  , 4666 , 5  , 5  ), // #150
+  INST(Divsd            , ExtRm              , O(F20F00,5E,_,_,_,_,_,_  ), 0                         , 5  , 0  , 4673 , 6  , 4  ), // #151
+  INST(Divss            , ExtRm              , O(F30F00,5E,_,_,_,_,_,_  ), 0                         , 6  , 0  , 4687 , 7  , 5  ), // #152
+  INST(Dppd             , ExtRmi             , O(660F3A,41,_,_,_,_,_,_  ), 0                         , 8  , 0  , 4704 , 8  , 12 ), // #153
+  INST(Dpps             , ExtRmi             , O(660F3A,40,_,_,_,_,_,_  ), 0                         , 8  , 0  , 4710 , 8  , 12 ), // #154
+  INST(Emms             , X86Op              , O(000F00,77,_,_,_,_,_,_  ), 0                         , 4  , 0  , 778  , 55 , 46 ), // #155
+  INST(Endbr32          , X86Op_Mod11RM      , O(F30F00,1E,7,_,_,_,_,3  ), 0                         , 33 , 0  , 577  , 30 , 47 ), // #156
+  INST(Endbr64          , X86Op_Mod11RM      , O(F30F00,1E,7,_,_,_,_,2  ), 0                         , 34 , 0  , 585  , 30 , 47 ), // #157
+  INST(Enqcmd           , X86EnqcmdMovdir64b , O(F20F38,F8,_,_,_,_,_,_  ), 0                         , 30 , 0  , 593  , 56 , 48 ), // #158
+  INST(Enqcmds          , X86EnqcmdMovdir64b , O(F30F38,F8,_,_,_,_,_,_  ), 0                         , 7  , 0  , 600  , 56 , 48 ), // #159
+  INST(Enter            , X86Enter           , O(000000,C8,_,_,_,_,_,_  ), 0                         , 0  , 0  , 3046 , 57 , 0  ), // #160
+  INST(Extractps        , ExtExtract         , O(660F3A,17,_,_,_,_,_,_  ), 0                         , 8  , 0  , 4900 , 58 , 12 ), // #161
+  INST(Extrq            , ExtExtrq           , O(660F00,79,_,_,_,_,_,_  ), O(660F00,78,0,_,_,_,_,_  ), 3  , 7  , 8625 , 59 , 49 ), // #162
+  INST(F2xm1            , FpuOp              , O_FPU(00,D9F0,_)          , 0                         , 35 , 0  , 608  , 30 , 0  ), // #163
+  INST(Fabs             , FpuOp              , O_FPU(00,D9E1,_)          , 0                         , 35 , 0  , 614  , 30 , 0  ), // #164
+  INST(Fadd             , FpuArith           , O_FPU(00,C0C0,0)          , 0                         , 36 , 0  , 2121 , 60 , 0  ), // #165
+  INST(Faddp            , FpuRDef            , O_FPU(00,DEC0,_)          , 0                         , 37 , 0  , 619  , 61 , 0  ), // #166
+  INST(Fbld             , X86M_Only          , O_FPU(00,00DF,4)          , 0                         , 38 , 0  , 625  , 62 , 0  ), // #167
+  INST(Fbstp            , X86M_Only          , O_FPU(00,00DF,6)          , 0                         , 39 , 0  , 630  , 62 , 0  ), // #168
+  INST(Fchs             , FpuOp              , O_FPU(00,D9E0,_)          , 0                         , 35 , 0  , 636  , 30 , 0  ), // #169
+  INST(Fclex            , FpuOp              , O_FPU(9B,DBE2,_)          , 0                         , 40 , 0  , 641  , 30 , 0  ), // #170
+  INST(Fcmovb           , FpuR               , O_FPU(00,DAC0,_)          , 0                         , 41 , 0  , 647  , 63 , 30 ), // #171
+  INST(Fcmovbe          , FpuR               , O_FPU(00,DAD0,_)          , 0                         , 41 , 0  , 654  , 63 , 29 ), // #172
+  INST(Fcmove           , FpuR               , O_FPU(00,DAC8,_)          , 0                         , 41 , 0  , 662  , 63 , 31 ), // #173
+  INST(Fcmovnb          , FpuR               , O_FPU(00,DBC0,_)          , 0                         , 42 , 0  , 669  , 63 , 30 ), // #174
+  INST(Fcmovnbe         , FpuR               , O_FPU(00,DBD0,_)          , 0                         , 42 , 0  , 677  , 63 , 29 ), // #175
+  INST(Fcmovne          , FpuR               , O_FPU(00,DBC8,_)          , 0                         , 42 , 0  , 686  , 63 , 31 ), // #176
+  INST(Fcmovnu          , FpuR               , O_FPU(00,DBD8,_)          , 0                         , 42 , 0  , 694  , 63 , 35 ), // #177
+  INST(Fcmovu           , FpuR               , O_FPU(00,DAD8,_)          , 0                         , 41 , 0  , 702  , 63 , 35 ), // #178
+  INST(Fcom             , FpuCom             , O_FPU(00,D0D0,2)          , 0                         , 43 , 0  , 709  , 64 , 0  ), // #179
+  INST(Fcomi            , FpuR               , O_FPU(00,DBF0,_)          , 0                         , 42 , 0  , 714  , 63 , 50 ), // #180
+  INST(Fcomip           , FpuR               , O_FPU(00,DFF0,_)          , 0                         , 44 , 0  , 720  , 63 , 50 ), // #181
+  INST(Fcomp            , FpuCom             , O_FPU(00,D8D8,3)          , 0                         , 45 , 0  , 727  , 64 , 0  ), // #182
+  INST(Fcompp           , FpuOp              , O_FPU(00,DED9,_)          , 0                         , 37 , 0  , 733  , 30 , 0  ), // #183
+  INST(Fcos             , FpuOp              , O_FPU(00,D9FF,_)          , 0                         , 35 , 0  , 740  , 30 , 0  ), // #184
+  INST(Fdecstp          , FpuOp              , O_FPU(00,D9F6,_)          , 0                         , 35 , 0  , 745  , 30 , 0  ), // #185
+  INST(Fdiv             , FpuArith           , O_FPU(00,F0F8,6)          , 0                         , 46 , 0  , 753  , 60 , 0  ), // #186
+  INST(Fdivp            , FpuRDef            , O_FPU(00,DEF8,_)          , 0                         , 37 , 0  , 758  , 61 , 0  ), // #187
+  INST(Fdivr            , FpuArith           , O_FPU(00,F8F0,7)          , 0                         , 47 , 0  , 764  , 60 , 0  ), // #188
+  INST(Fdivrp           , FpuRDef            , O_FPU(00,DEF0,_)          , 0                         , 37 , 0  , 770  , 61 , 0  ), // #189
+  INST(Femms            , X86Op              , O(000F00,0E,_,_,_,_,_,_  ), 0                         , 4  , 0  , 777  , 30 , 51 ), // #190
+  INST(Ffree            , FpuR               , O_FPU(00,DDC0,_)          , 0                         , 48 , 0  , 783  , 63 , 0  ), // #191
+  INST(Fiadd            , FpuM               , O_FPU(00,00DA,0)          , 0                         , 49 , 0  , 789  , 65 , 0  ), // #192
+  INST(Ficom            , FpuM               , O_FPU(00,00DA,2)          , 0                         , 50 , 0  , 795  , 65 , 0  ), // #193
+  INST(Ficomp           , FpuM               , O_FPU(00,00DA,3)          , 0                         , 51 , 0  , 801  , 65 , 0  ), // #194
+  INST(Fidiv            , FpuM               , O_FPU(00,00DA,6)          , 0                         , 39 , 0  , 808  , 65 , 0  ), // #195
+  INST(Fidivr           , FpuM               , O_FPU(00,00DA,7)          , 0                         , 52 , 0  , 814  , 65 , 0  ), // #196
+  INST(Fild             , FpuM               , O_FPU(00,00DB,0)          , O_FPU(00,00DF,5)          , 49 , 8  , 821  , 66 , 0  ), // #197
+  INST(Fimul            , FpuM               , O_FPU(00,00DA,1)          , 0                         , 53 , 0  , 826  , 65 , 0  ), // #198
+  INST(Fincstp          , FpuOp              , O_FPU(00,D9F7,_)          , 0                         , 35 , 0  , 832  , 30 , 0  ), // #199
+  INST(Finit            , FpuOp              , O_FPU(9B,DBE3,_)          , 0                         , 40 , 0  , 840  , 30 , 0  ), // #200
+  INST(Fist             , FpuM               , O_FPU(00,00DB,2)          , 0                         , 50 , 0  , 846  , 65 , 0  ), // #201
+  INST(Fistp            , FpuM               , O_FPU(00,00DB,3)          , O_FPU(00,00DF,7)          , 51 , 9  , 851  , 66 , 0  ), // #202
+  INST(Fisttp           , FpuM               , O_FPU(00,00DB,1)          , O_FPU(00,00DD,1)          , 53 , 10 , 857  , 66 , 6  ), // #203
+  INST(Fisub            , FpuM               , O_FPU(00,00DA,4)          , 0                         , 38 , 0  , 864  , 65 , 0  ), // #204
+  INST(Fisubr           , FpuM               , O_FPU(00,00DA,5)          , 0                         , 54 , 0  , 870  , 65 , 0  ), // #205
+  INST(Fld              , FpuFldFst          , O_FPU(00,00D9,0)          , O_FPU(00,00DB,5)          , 49 , 11 , 877  , 67 , 0  ), // #206
+  INST(Fld1             , FpuOp              , O_FPU(00,D9E8,_)          , 0                         , 35 , 0  , 881  , 30 , 0  ), // #207
+  INST(Fldcw            , X86M_Only          , O_FPU(00,00D9,5)          , 0                         , 54 , 0  , 886  , 68 , 0  ), // #208
+  INST(Fldenv           , X86M_Only          , O_FPU(00,00D9,4)          , 0                         , 38 , 0  , 892  , 69 , 0  ), // #209
+  INST(Fldl2e           , FpuOp              , O_FPU(00,D9EA,_)          , 0                         , 35 , 0  , 899  , 30 , 0  ), // #210
+  INST(Fldl2t           , FpuOp              , O_FPU(00,D9E9,_)          , 0                         , 35 , 0  , 906  , 30 , 0  ), // #211
+  INST(Fldlg2           , FpuOp              , O_FPU(00,D9EC,_)          , 0                         , 35 , 0  , 913  , 30 , 0  ), // #212
+  INST(Fldln2           , FpuOp              , O_FPU(00,D9ED,_)          , 0                         , 35 , 0  , 920  , 30 , 0  ), // #213
+  INST(Fldpi            , FpuOp              , O_FPU(00,D9EB,_)          , 0                         , 35 , 0  , 927  , 30 , 0  ), // #214
+  INST(Fldz             , FpuOp              , O_FPU(00,D9EE,_)          , 0                         , 35 , 0  , 933  , 30 , 0  ), // #215
+  INST(Fmul             , FpuArith           , O_FPU(00,C8C8,1)          , 0                         , 55 , 0  , 2163 , 60 , 0  ), // #216
+  INST(Fmulp            , FpuRDef            , O_FPU(00,DEC8,_)          , 0                         , 37 , 0  , 938  , 61 , 0  ), // #217
+  INST(Fnclex           , FpuOp              , O_FPU(00,DBE2,_)          , 0                         , 42 , 0  , 944  , 30 , 0  ), // #218
+  INST(Fninit           , FpuOp              , O_FPU(00,DBE3,_)          , 0                         , 42 , 0  , 951  , 30 , 0  ), // #219
+  INST(Fnop             , FpuOp              , O_FPU(00,D9D0,_)          , 0                         , 35 , 0  , 958  , 30 , 0  ), // #220
+  INST(Fnsave           , X86M_Only          , O_FPU(00,00DD,6)          , 0                         , 39 , 0  , 963  , 69 , 0  ), // #221
+  INST(Fnstcw           , X86M_Only          , O_FPU(00,00D9,7)          , 0                         , 52 , 0  , 970  , 68 , 0  ), // #222
+  INST(Fnstenv          , X86M_Only          , O_FPU(00,00D9,6)          , 0                         , 39 , 0  , 977  , 69 , 0  ), // #223
+  INST(Fnstsw           , FpuStsw            , O_FPU(00,00DD,7)          , O_FPU(00,DFE0,_)          , 52 , 12 , 985  , 70 , 0  ), // #224
+  INST(Fpatan           , FpuOp              , O_FPU(00,D9F3,_)          , 0                         , 35 , 0  , 992  , 30 , 0  ), // #225
+  INST(Fprem            , FpuOp              , O_FPU(00,D9F8,_)          , 0                         , 35 , 0  , 999  , 30 , 0  ), // #226
+  INST(Fprem1           , FpuOp              , O_FPU(00,D9F5,_)          , 0                         , 35 , 0  , 1005 , 30 , 0  ), // #227
+  INST(Fptan            , FpuOp              , O_FPU(00,D9F2,_)          , 0                         , 35 , 0  , 1012 , 30 , 0  ), // #228
+  INST(Frndint          , FpuOp              , O_FPU(00,D9FC,_)          , 0                         , 35 , 0  , 1018 , 30 , 0  ), // #229
+  INST(Frstor           , X86M_Only          , O_FPU(00,00DD,4)          , 0                         , 38 , 0  , 1026 , 69 , 0  ), // #230
+  INST(Fsave            , X86M_Only          , O_FPU(9B,00DD,6)          , 0                         , 56 , 0  , 1033 , 69 , 0  ), // #231
+  INST(Fscale           , FpuOp              , O_FPU(00,D9FD,_)          , 0                         , 35 , 0  , 1039 , 30 , 0  ), // #232
+  INST(Fsin             , FpuOp              , O_FPU(00,D9FE,_)          , 0                         , 35 , 0  , 1046 , 30 , 0  ), // #233
+  INST(Fsincos          , FpuOp              , O_FPU(00,D9FB,_)          , 0                         , 35 , 0  , 1051 , 30 , 0  ), // #234
+  INST(Fsqrt            , FpuOp              , O_FPU(00,D9FA,_)          , 0                         , 35 , 0  , 1059 , 30 , 0  ), // #235
+  INST(Fst              , FpuFldFst          , O_FPU(00,00D9,2)          , 0                         , 50 , 0  , 1065 , 71 , 0  ), // #236
+  INST(Fstcw            , X86M_Only          , O_FPU(9B,00D9,7)          , 0                         , 57 , 0  , 1069 , 68 , 0  ), // #237
+  INST(Fstenv           , X86M_Only          , O_FPU(9B,00D9,6)          , 0                         , 56 , 0  , 1075 , 69 , 0  ), // #238
+  INST(Fstp             , FpuFldFst          , O_FPU(00,00D9,3)          , O(000000,DB,7,_,_,_,_,_  ), 51 , 13 , 1082 , 67 , 0  ), // #239
+  INST(Fstsw            , FpuStsw            , O_FPU(9B,00DD,7)          , O_FPU(9B,DFE0,_)          , 57 , 14 , 1087 , 70 , 0  ), // #240
+  INST(Fsub             , FpuArith           , O_FPU(00,E0E8,4)          , 0                         , 58 , 0  , 2241 , 60 , 0  ), // #241
+  INST(Fsubp            , FpuRDef            , O_FPU(00,DEE8,_)          , 0                         , 37 , 0  , 1093 , 61 , 0  ), // #242
+  INST(Fsubr            , FpuArith           , O_FPU(00,E8E0,5)          , 0                         , 59 , 0  , 2247 , 60 , 0  ), // #243
+  INST(Fsubrp           , FpuRDef            , O_FPU(00,DEE0,_)          , 0                         , 37 , 0  , 1099 , 61 , 0  ), // #244
+  INST(Ftst             , FpuOp              , O_FPU(00,D9E4,_)          , 0                         , 35 , 0  , 1106 , 30 , 0  ), // #245
+  INST(Fucom            , FpuRDef            , O_FPU(00,DDE0,_)          , 0                         , 48 , 0  , 1111 , 61 , 0  ), // #246
+  INST(Fucomi           , FpuR               , O_FPU(00,DBE8,_)          , 0                         , 42 , 0  , 1117 , 63 , 50 ), // #247
+  INST(Fucomip          , FpuR               , O_FPU(00,DFE8,_)          , 0                         , 44 , 0  , 1124 , 63 , 50 ), // #248
+  INST(Fucomp           , FpuRDef            , O_FPU(00,DDE8,_)          , 0                         , 48 , 0  , 1132 , 61 , 0  ), // #249
+  INST(Fucompp          , FpuOp              , O_FPU(00,DAE9,_)          , 0                         , 41 , 0  , 1139 , 30 , 0  ), // #250
+  INST(Fwait            , X86Op              , O_FPU(00,009B,_)          , 0                         , 49 , 0  , 1147 , 30 , 0  ), // #251
+  INST(Fxam             , FpuOp              , O_FPU(00,D9E5,_)          , 0                         , 35 , 0  , 1153 , 30 , 0  ), // #252
+  INST(Fxch             , FpuR               , O_FPU(00,D9C8,_)          , 0                         , 35 , 0  , 1158 , 61 , 0  ), // #253
+  INST(Fxrstor          , X86M_Only          , O(000F00,AE,1,_,_,_,_,_  ), 0                         , 29 , 0  , 1163 , 69 , 52 ), // #254
+  INST(Fxrstor64        , X86M_Only          , O(000F00,AE,1,_,1,_,_,_  ), 0                         , 28 , 0  , 1171 , 72 , 52 ), // #255
+  INST(Fxsave           , X86M_Only          , O(000F00,AE,0,_,_,_,_,_  ), 0                         , 4  , 0  , 1181 , 69 , 52 ), // #256
+  INST(Fxsave64         , X86M_Only          , O(000F00,AE,0,_,1,_,_,_  ), 0                         , 60 , 0  , 1188 , 72 , 52 ), // #257
+  INST(Fxtract          , FpuOp              , O_FPU(00,D9F4,_)          , 0                         , 35 , 0  , 1197 , 30 , 0  ), // #258
+  INST(Fyl2x            , FpuOp              , O_FPU(00,D9F1,_)          , 0                         , 35 , 0  , 1205 , 30 , 0  ), // #259
+  INST(Fyl2xp1          , FpuOp              , O_FPU(00,D9F9,_)          , 0                         , 35 , 0  , 1211 , 30 , 0  ), // #260
+  INST(Getsec           , X86Op              , O(000F00,37,_,_,_,_,_,_  ), 0                         , 4  , 0  , 1219 , 30 , 53 ), // #261
+  INST(Gf2p8affineinvqb , ExtRmi             , O(660F3A,CF,_,_,_,_,_,_  ), 0                         , 8  , 0  , 6789 , 8  , 54 ), // #262
+  INST(Gf2p8affineqb    , ExtRmi             , O(660F3A,CE,_,_,_,_,_,_  ), 0                         , 8  , 0  , 6807 , 8  , 54 ), // #263
+  INST(Gf2p8mulb        , ExtRm              , O(660F38,CF,_,_,_,_,_,_  ), 0                         , 2  , 0  , 6822 , 5  , 54 ), // #264
+  INST(Haddpd           , ExtRm              , O(660F00,7C,_,_,_,_,_,_  ), 0                         , 3  , 0  , 6833 , 5  , 6  ), // #265
+  INST(Haddps           , ExtRm              , O(F20F00,7C,_,_,_,_,_,_  ), 0                         , 5  , 0  , 6841 , 5  , 6  ), // #266
+  INST(Hlt              , X86Op              , O(000000,F4,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1226 , 30 , 0  ), // #267
+  INST(Hreset           , X86Op_Mod11RM_I8   , O(F30F3A,F0,0,_,_,_,_,_  ), 0                         , 61 , 0  , 1230 , 73 , 55 ), // #268
+  INST(Hsubpd           , ExtRm              , O(660F00,7D,_,_,_,_,_,_  ), 0                         , 3  , 0  , 6849 , 5  , 6  ), // #269
+  INST(Hsubps           , ExtRm              , O(F20F00,7D,_,_,_,_,_,_  ), 0                         , 5  , 0  , 6857 , 5  , 6  ), // #270
+  INST(Idiv             , X86M_GPB_MulDiv    , O(000000,F6,7,_,x,_,_,_  ), 0                         , 27 , 0  , 809  , 54 , 1  ), // #271
+  INST(Imul             , X86Imul            , O(000000,F6,5,_,x,_,_,_  ), 0                         , 62 , 0  , 827  , 74 , 1  ), // #272
+  INST(In               , X86In              , O(000000,EC,_,_,_,_,_,_  ), O(000000,E4,_,_,_,_,_,_  ), 0  , 15 , 11572, 75 , 0  ), // #273
+  INST(Inc              , X86IncDec          , O(000000,FE,0,_,x,_,_,_  ), O(000000,40,_,_,x,_,_,_  ), 0  , 16 , 1237 , 53 , 45 ), // #274
+  INST(Incsspd          , X86M               , O(F30F00,AE,5,_,0,_,_,_  ), 0                         , 63 , 0  , 1241 , 76 , 56 ), // #275
+  INST(Incsspq          , X86M               , O(F30F00,AE,5,_,1,_,_,_  ), 0                         , 64 , 0  , 1249 , 77 , 56 ), // #276
+  INST(Ins              , X86Ins             , O(000000,6C,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1916 , 78 , 0  ), // #277
+  INST(Insertps         , ExtRmi             , O(660F3A,21,_,_,_,_,_,_  ), 0                         , 8  , 0  , 6993 , 38 , 12 ), // #278
+  INST(Insertq          , ExtInsertq         , O(F20F00,79,_,_,_,_,_,_  ), O(F20F00,78,_,_,_,_,_,_  ), 5  , 17 , 1257 , 79 , 49 ), // #279
+  INST(Int              , X86Int             , O(000000,CD,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1022 , 80 , 0  ), // #280
+  INST(Int3             , X86Op              , O(000000,CC,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1265 , 30 , 0  ), // #281
+  INST(Into             , X86Op              , O(000000,CE,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1270 , 81 , 57 ), // #282
+  INST(Invd             , X86Op              , O(000F00,08,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11501, 30 , 43 ), // #283
+  INST(Invept           , X86Rm_NoSize       , O(660F38,80,_,_,_,_,_,_  ), 0                         , 2  , 0  , 1275 , 82 , 58 ), // #284
+  INST(Invlpg           , X86M_Only          , O(000F00,01,7,_,_,_,_,_  ), 0                         , 22 , 0  , 1282 , 69 , 43 ), // #285
+  INST(Invlpga          , X86Op_xAddr        , O(000F01,DF,_,_,_,_,_,_  ), 0                         , 21 , 0  , 1289 , 83 , 22 ), // #286
+  INST(Invpcid          , X86Rm_NoSize       , O(660F38,82,_,_,_,_,_,_  ), 0                         , 2  , 0  , 1297 , 82 , 43 ), // #287
+  INST(Invvpid          , X86Rm_NoSize       , O(660F38,81,_,_,_,_,_,_  ), 0                         , 2  , 0  , 1305 , 82 , 58 ), // #288
+  INST(Iret             , X86Op              , O(660000,CF,_,_,_,_,_,_  ), 0                         , 19 , 0  , 3226 , 84 , 1  ), // #289
+  INST(Iretd            , X86Op              , O(000000,CF,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1313 , 84 , 1  ), // #290
+  INST(Iretq            , X86Op              , O(000000,CF,_,_,1,_,_,_  ), 0                         , 20 , 0  , 1319 , 85 , 1  ), // #291
+  INST(Ja               , X86Jcc             , O(000F00,87,_,_,_,_,_,_  ), O(000000,77,_,_,_,_,_,_  ), 4  , 18 , 1325 , 86 , 59 ), // #292
+  INST(Jae              , X86Jcc             , O(000F00,83,_,_,_,_,_,_  ), O(000000,73,_,_,_,_,_,_  ), 4  , 19 , 1328 , 86 , 60 ), // #293
+  INST(Jb               , X86Jcc             , O(000F00,82,_,_,_,_,_,_  ), O(000000,72,_,_,_,_,_,_  ), 4  , 20 , 1332 , 86 , 60 ), // #294
+  INST(Jbe              , X86Jcc             , O(000F00,86,_,_,_,_,_,_  ), O(000000,76,_,_,_,_,_,_  ), 4  , 21 , 1335 , 86 , 59 ), // #295
+  INST(Jc               , X86Jcc             , O(000F00,82,_,_,_,_,_,_  ), O(000000,72,_,_,_,_,_,_  ), 4  , 20 , 1339 , 86 , 60 ), // #296
+  INST(Je               , X86Jcc             , O(000F00,84,_,_,_,_,_,_  ), O(000000,74,_,_,_,_,_,_  ), 4  , 22 , 1342 , 86 , 61 ), // #297
+  INST(Jecxz            , X86JecxzLoop       , 0                         , O(000000,E3,_,_,_,_,_,_  ), 0  , 23 , 1345 , 87 , 0  ), // #298
+  INST(Jg               , X86Jcc             , O(000F00,8F,_,_,_,_,_,_  ), O(000000,7F,_,_,_,_,_,_  ), 4  , 24 , 1351 , 86 , 62 ), // #299
+  INST(Jge              , X86Jcc             , O(000F00,8D,_,_,_,_,_,_  ), O(000000,7D,_,_,_,_,_,_  ), 4  , 25 , 1354 , 86 , 63 ), // #300
+  INST(Jl               , X86Jcc             , O(000F00,8C,_,_,_,_,_,_  ), O(000000,7C,_,_,_,_,_,_  ), 4  , 26 , 1358 , 86 , 63 ), // #301
+  INST(Jle              , X86Jcc             , O(000F00,8E,_,_,_,_,_,_  ), O(000000,7E,_,_,_,_,_,_  ), 4  , 27 , 1361 , 86 , 62 ), // #302
+  INST(Jmp              , X86Jmp             , O(000000,FF,4,_,_,_,_,_  ), O(000000,EB,_,_,_,_,_,_  ), 9  , 28 , 1861 , 88 , 0  ), // #303
+  INST(Jna              , X86Jcc             , O(000F00,86,_,_,_,_,_,_  ), O(000000,76,_,_,_,_,_,_  ), 4  , 21 , 1365 , 86 , 59 ), // #304
+  INST(Jnae             , X86Jcc             , O(000F00,82,_,_,_,_,_,_  ), O(000000,72,_,_,_,_,_,_  ), 4  , 20 , 1369 , 86 , 60 ), // #305
+  INST(Jnb              , X86Jcc             , O(000F00,83,_,_,_,_,_,_  ), O(000000,73,_,_,_,_,_,_  ), 4  , 19 , 1374 , 86 , 60 ), // #306
+  INST(Jnbe             , X86Jcc             , O(000F00,87,_,_,_,_,_,_  ), O(000000,77,_,_,_,_,_,_  ), 4  , 18 , 1378 , 86 , 59 ), // #307
+  INST(Jnc              , X86Jcc             , O(000F00,83,_,_,_,_,_,_  ), O(000000,73,_,_,_,_,_,_  ), 4  , 19 , 1383 , 86 , 60 ), // #308
+  INST(Jne              , X86Jcc             , O(000F00,85,_,_,_,_,_,_  ), O(000000,75,_,_,_,_,_,_  ), 4  , 29 , 1387 , 86 , 61 ), // #309
+  INST(Jng              , X86Jcc             , O(000F00,8E,_,_,_,_,_,_  ), O(000000,7E,_,_,_,_,_,_  ), 4  , 27 , 1391 , 86 , 62 ), // #310
+  INST(Jnge             , X86Jcc             , O(000F00,8C,_,_,_,_,_,_  ), O(000000,7C,_,_,_,_,_,_  ), 4  , 26 , 1395 , 86 , 63 ), // #311
+  INST(Jnl              , X86Jcc             , O(000F00,8D,_,_,_,_,_,_  ), O(000000,7D,_,_,_,_,_,_  ), 4  , 25 , 1400 , 86 , 63 ), // #312
+  INST(Jnle             , X86Jcc             , O(000F00,8F,_,_,_,_,_,_  ), O(000000,7F,_,_,_,_,_,_  ), 4  , 24 , 1404 , 86 , 62 ), // #313
+  INST(Jno              , X86Jcc             , O(000F00,81,_,_,_,_,_,_  ), O(000000,71,_,_,_,_,_,_  ), 4  , 30 , 1409 , 86 , 57 ), // #314
+  INST(Jnp              , X86Jcc             , O(000F00,8B,_,_,_,_,_,_  ), O(000000,7B,_,_,_,_,_,_  ), 4  , 31 , 1413 , 86 , 64 ), // #315
+  INST(Jns              , X86Jcc             , O(000F00,89,_,_,_,_,_,_  ), O(000000,79,_,_,_,_,_,_  ), 4  , 32 , 1417 , 86 , 65 ), // #316
+  INST(Jnz              , X86Jcc             , O(000F00,85,_,_,_,_,_,_  ), O(000000,75,_,_,_,_,_,_  ), 4  , 29 , 1421 , 86 , 61 ), // #317
+  INST(Jo               , X86Jcc             , O(000F00,80,_,_,_,_,_,_  ), O(000000,70,_,_,_,_,_,_  ), 4  , 33 , 1425 , 86 , 57 ), // #318
+  INST(Jp               , X86Jcc             , O(000F00,8A,_,_,_,_,_,_  ), O(000000,7A,_,_,_,_,_,_  ), 4  , 34 , 1428 , 86 , 64 ), // #319
+  INST(Jpe              , X86Jcc             , O(000F00,8A,_,_,_,_,_,_  ), O(000000,7A,_,_,_,_,_,_  ), 4  , 34 , 1431 , 86 , 64 ), // #320
+  INST(Jpo              , X86Jcc             , O(000F00,8B,_,_,_,_,_,_  ), O(000000,7B,_,_,_,_,_,_  ), 4  , 31 , 1435 , 86 , 64 ), // #321
+  INST(Js               , X86Jcc             , O(000F00,88,_,_,_,_,_,_  ), O(000000,78,_,_,_,_,_,_  ), 4  , 35 , 1439 , 86 , 65 ), // #322
+  INST(Jz               , X86Jcc             , O(000F00,84,_,_,_,_,_,_  ), O(000000,74,_,_,_,_,_,_  ), 4  , 22 , 1442 , 86 , 61 ), // #323
+  INST(Kaddb            , VexRvm             , V(660F00,4A,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1445 , 89 , 66 ), // #324
+  INST(Kaddd            , VexRvm             , V(660F00,4A,_,1,1,_,_,_  ), 0                         , 66 , 0  , 1451 , 89 , 67 ), // #325
+  INST(Kaddq            , VexRvm             , V(000F00,4A,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1457 , 89 , 67 ), // #326
+  INST(Kaddw            , VexRvm             , V(000F00,4A,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1463 , 89 , 66 ), // #327
+  INST(Kandb            , VexRvm             , V(660F00,41,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1469 , 89 , 66 ), // #328
+  INST(Kandd            , VexRvm             , V(660F00,41,_,1,1,_,_,_  ), 0                         , 66 , 0  , 1475 , 89 , 67 ), // #329
+  INST(Kandnb           , VexRvm             , V(660F00,42,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1481 , 89 , 66 ), // #330
+  INST(Kandnd           , VexRvm             , V(660F00,42,_,1,1,_,_,_  ), 0                         , 66 , 0  , 1488 , 89 , 67 ), // #331
+  INST(Kandnq           , VexRvm             , V(000F00,42,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1495 , 89 , 67 ), // #332
+  INST(Kandnw           , VexRvm             , V(000F00,42,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1502 , 89 , 68 ), // #333
+  INST(Kandq            , VexRvm             , V(000F00,41,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1509 , 89 , 67 ), // #334
+  INST(Kandw            , VexRvm             , V(000F00,41,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1515 , 89 , 68 ), // #335
+  INST(Kmovb            , VexKmov            , V(660F00,90,_,0,0,_,_,_  ), V(660F00,92,_,0,0,_,_,_  ), 69 , 36 , 1521 , 90 , 69 ), // #336
+  INST(Kmovd            , VexKmov            , V(660F00,90,_,0,1,_,_,_  ), V(F20F00,92,_,0,0,_,_,_  ), 70 , 37 , 9105 , 91 , 70 ), // #337
+  INST(Kmovq            , VexKmov            , V(000F00,90,_,0,1,_,_,_  ), V(F20F00,92,_,0,1,_,_,_  ), 71 , 38 , 9116 , 92 , 70 ), // #338
+  INST(Kmovw            , VexKmov            , V(000F00,90,_,0,0,_,_,_  ), V(000F00,92,_,0,0,_,_,_  ), 72 , 39 , 1527 , 93 , 71 ), // #339
+  INST(Knotb            , VexRm              , V(660F00,44,_,0,0,_,_,_  ), 0                         , 69 , 0  , 1533 , 94 , 66 ), // #340
+  INST(Knotd            , VexRm              , V(660F00,44,_,0,1,_,_,_  ), 0                         , 70 , 0  , 1539 , 94 , 67 ), // #341
+  INST(Knotq            , VexRm              , V(000F00,44,_,0,1,_,_,_  ), 0                         , 71 , 0  , 1545 , 94 , 67 ), // #342
+  INST(Knotw            , VexRm              , V(000F00,44,_,0,0,_,_,_  ), 0                         , 72 , 0  , 1551 , 94 , 68 ), // #343
+  INST(Korb             , VexRvm             , V(660F00,45,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1557 , 89 , 66 ), // #344
+  INST(Kord             , VexRvm             , V(660F00,45,_,1,1,_,_,_  ), 0                         , 66 , 0  , 1562 , 89 , 67 ), // #345
+  INST(Korq             , VexRvm             , V(000F00,45,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1567 , 89 , 67 ), // #346
+  INST(Kortestb         , VexRm              , V(660F00,98,_,0,0,_,_,_  ), 0                         , 69 , 0  , 1572 , 94 , 72 ), // #347
+  INST(Kortestd         , VexRm              , V(660F00,98,_,0,1,_,_,_  ), 0                         , 70 , 0  , 1581 , 94 , 73 ), // #348
+  INST(Kortestq         , VexRm              , V(000F00,98,_,0,1,_,_,_  ), 0                         , 71 , 0  , 1590 , 94 , 73 ), // #349
+  INST(Kortestw         , VexRm              , V(000F00,98,_,0,0,_,_,_  ), 0                         , 72 , 0  , 1599 , 94 , 74 ), // #350
+  INST(Korw             , VexRvm             , V(000F00,45,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1608 , 89 , 68 ), // #351
+  INST(Kshiftlb         , VexRmi             , V(660F3A,32,_,0,0,_,_,_  ), 0                         , 73 , 0  , 1613 , 95 , 66 ), // #352
+  INST(Kshiftld         , VexRmi             , V(660F3A,33,_,0,0,_,_,_  ), 0                         , 73 , 0  , 1622 , 95 , 67 ), // #353
+  INST(Kshiftlq         , VexRmi             , V(660F3A,33,_,0,1,_,_,_  ), 0                         , 74 , 0  , 1631 , 95 , 67 ), // #354
+  INST(Kshiftlw         , VexRmi             , V(660F3A,32,_,0,1,_,_,_  ), 0                         , 74 , 0  , 1640 , 95 , 68 ), // #355
+  INST(Kshiftrb         , VexRmi             , V(660F3A,30,_,0,0,_,_,_  ), 0                         , 73 , 0  , 1649 , 95 , 66 ), // #356
+  INST(Kshiftrd         , VexRmi             , V(660F3A,31,_,0,0,_,_,_  ), 0                         , 73 , 0  , 1658 , 95 , 67 ), // #357
+  INST(Kshiftrq         , VexRmi             , V(660F3A,31,_,0,1,_,_,_  ), 0                         , 74 , 0  , 1667 , 95 , 67 ), // #358
+  INST(Kshiftrw         , VexRmi             , V(660F3A,30,_,0,1,_,_,_  ), 0                         , 74 , 0  , 1676 , 95 , 68 ), // #359
+  INST(Ktestb           , VexRm              , V(660F00,99,_,0,0,_,_,_  ), 0                         , 69 , 0  , 1685 , 94 , 72 ), // #360
+  INST(Ktestd           , VexRm              , V(660F00,99,_,0,1,_,_,_  ), 0                         , 70 , 0  , 1692 , 94 , 73 ), // #361
+  INST(Ktestq           , VexRm              , V(000F00,99,_,0,1,_,_,_  ), 0                         , 71 , 0  , 1699 , 94 , 73 ), // #362
+  INST(Ktestw           , VexRm              , V(000F00,99,_,0,0,_,_,_  ), 0                         , 72 , 0  , 1706 , 94 , 72 ), // #363
+  INST(Kunpckbw         , VexRvm             , V(660F00,4B,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1713 , 89 , 68 ), // #364
+  INST(Kunpckdq         , VexRvm             , V(000F00,4B,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1722 , 89 , 67 ), // #365
+  INST(Kunpckwd         , VexRvm             , V(000F00,4B,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1731 , 89 , 67 ), // #366
+  INST(Kxnorb           , VexRvm             , V(660F00,46,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1740 , 96 , 66 ), // #367
+  INST(Kxnord           , VexRvm             , V(660F00,46,_,1,1,_,_,_  ), 0                         , 66 , 0  , 1747 , 96 , 67 ), // #368
+  INST(Kxnorq           , VexRvm             , V(000F00,46,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1754 , 96 , 67 ), // #369
+  INST(Kxnorw           , VexRvm             , V(000F00,46,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1761 , 96 , 68 ), // #370
+  INST(Kxorb            , VexRvm             , V(660F00,47,_,1,0,_,_,_  ), 0                         , 65 , 0  , 1768 , 96 , 66 ), // #371
+  INST(Kxord            , VexRvm             , V(660F00,47,_,1,1,_,_,_  ), 0                         , 66 , 0  , 1774 , 96 , 67 ), // #372
+  INST(Kxorq            , VexRvm             , V(000F00,47,_,1,1,_,_,_  ), 0                         , 67 , 0  , 1780 , 96 , 67 ), // #373
+  INST(Kxorw            , VexRvm             , V(000F00,47,_,1,0,_,_,_  ), 0                         , 68 , 0  , 1786 , 96 , 68 ), // #374
+  INST(Lahf             , X86Op              , O(000000,9F,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1792 , 97 , 75 ), // #375
+  INST(Lar              , X86Rm              , O(000F00,02,_,_,_,_,_,_  ), 0                         , 4  , 0  , 1797 , 98 , 10 ), // #376
+  INST(Lcall            , X86LcallLjmp       , O(000000,FF,3,_,_,_,_,_  ), O(000000,9A,_,_,_,_,_,_  ), 75 , 40 , 1801 , 99 , 1  ), // #377
+  INST(Lddqu            , ExtRm              , O(F20F00,F0,_,_,_,_,_,_  ), 0                         , 5  , 0  , 7003 , 100, 6  ), // #378
+  INST(Ldmxcsr          , X86M_Only          , O(000F00,AE,2,_,_,_,_,_  ), 0                         , 76 , 0  , 7010 , 101, 5  ), // #379
+  INST(Lds              , X86Rm              , O(000000,C5,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1807 , 102, 0  ), // #380
+  INST(Ldtilecfg        , AmxCfg             , V(000F38,49,_,0,0,_,_,_  ), 0                         , 10 , 0  , 1811 , 103, 76 ), // #381
+  INST(Lea              , X86Lea             , O(000000,8D,_,_,x,_,_,_  ), 0                         , 0  , 0  , 1821 , 104, 0  ), // #382
+  INST(Leave            , X86Op              , O(000000,C9,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1825 , 30 , 0  ), // #383
+  INST(Les              , X86Rm              , O(000000,C4,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1831 , 102, 0  ), // #384
+  INST(Lfence           , X86Fence           , O(000F00,AE,5,_,_,_,_,_  ), 0                         , 77 , 0  , 1835 , 30 , 4  ), // #385
+  INST(Lfs              , X86Rm              , O(000F00,B4,_,_,_,_,_,_  ), 0                         , 4  , 0  , 1842 , 105, 0  ), // #386
+  INST(Lgdt             , X86M_Only          , O(000F00,01,2,_,_,_,_,_  ), 0                         , 76 , 0  , 1846 , 69 , 0  ), // #387
+  INST(Lgs              , X86Rm              , O(000F00,B5,_,_,_,_,_,_  ), 0                         , 4  , 0  , 1851 , 105, 0  ), // #388
+  INST(Lidt             , X86M_Only          , O(000F00,01,3,_,_,_,_,_  ), 0                         , 78 , 0  , 1855 , 69 , 0  ), // #389
+  INST(Ljmp             , X86LcallLjmp       , O(000000,FF,5,_,_,_,_,_  ), O(000000,EA,_,_,_,_,_,_  ), 62 , 41 , 1860 , 106, 0  ), // #390
+  INST(Lldt             , X86M_NoSize        , O(000F00,00,2,_,_,_,_,_  ), 0                         , 76 , 0  , 1865 , 107, 0  ), // #391
+  INST(Llwpcb           , VexR_Wx            , V(XOP_M9,12,0,0,x,_,_,_  ), 0                         , 79 , 0  , 1870 , 108, 77 ), // #392
+  INST(Lmsw             , X86M_NoSize        , O(000F00,01,6,_,_,_,_,_  ), 0                         , 80 , 0  , 1877 , 107, 0  ), // #393
+  INST(Lods             , X86StrRm           , O(000000,AC,_,_,_,_,_,_  ), 0                         , 0  , 0  , 1882 , 109, 78 ), // #394
+  INST(Loop             , X86JecxzLoop       , 0                         , O(000000,E2,_,_,_,_,_,_  ), 0  , 42 , 1887 , 110, 0  ), // #395
+  INST(Loope            , X86JecxzLoop       , 0                         , O(000000,E1,_,_,_,_,_,_  ), 0  , 43 , 1892 , 110, 61 ), // #396
+  INST(Loopne           , X86JecxzLoop       , 0                         , O(000000,E0,_,_,_,_,_,_  ), 0  , 44 , 1898 , 110, 61 ), // #397
+  INST(Lsl              , X86Rm              , O(000F00,03,_,_,_,_,_,_  ), 0                         , 4  , 0  , 1905 , 111, 10 ), // #398
+  INST(Lss              , X86Rm              , O(000F00,B2,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7556 , 105, 0  ), // #399
+  INST(Ltr              , X86M_NoSize        , O(000F00,00,3,_,_,_,_,_  ), 0                         , 78 , 0  , 1909 , 107, 0  ), // #400
+  INST(Lwpins           , VexVmi4_Wx         , V(XOP_MA,12,0,0,x,_,_,_  ), 0                         , 81 , 0  , 1913 , 112, 77 ), // #401
+  INST(Lwpval           , VexVmi4_Wx         , V(XOP_MA,12,1,0,x,_,_,_  ), 0                         , 82 , 0  , 1920 , 112, 77 ), // #402
+  INST(Lzcnt            , X86Rm_Raw66H       , O(F30F00,BD,_,_,x,_,_,_  ), 0                         , 6  , 0  , 1927 , 22 , 79 ), // #403
+  INST(Maskmovdqu       , ExtRm_ZDI          , O(660F00,F7,_,_,_,_,_,_  ), 0                         , 3  , 0  , 7019 , 113, 4  ), // #404
+  INST(Maskmovq         , ExtRm_ZDI          , O(000F00,F7,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9113 , 114, 80 ), // #405
+  INST(Maxpd            , ExtRm              , O(660F00,5F,_,_,_,_,_,_  ), 0                         , 3  , 0  , 7053 , 5  , 4  ), // #406
+  INST(Maxps            , ExtRm              , O(000F00,5F,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7067 , 5  , 5  ), // #407
+  INST(Maxsd            , ExtRm              , O(F20F00,5F,_,_,_,_,_,_  ), 0                         , 5  , 0  , 9132 , 6  , 4  ), // #408
+  INST(Maxss            , ExtRm              , O(F30F00,5F,_,_,_,_,_,_  ), 0                         , 6  , 0  , 7088 , 7  , 5  ), // #409
+  INST(Mcommit          , X86Op              , O(F30F01,FA,_,_,_,_,_,_  ), 0                         , 25 , 0  , 1933 , 30 , 81 ), // #410
+  INST(Mfence           , X86Fence           , O(000F00,AE,6,_,_,_,_,_  ), 0                         , 80 , 0  , 1941 , 30 , 4  ), // #411
+  INST(Minpd            , ExtRm              , O(660F00,5D,_,_,_,_,_,_  ), 0                         , 3  , 0  , 7117 , 5  , 4  ), // #412
+  INST(Minps            , ExtRm              , O(000F00,5D,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7131 , 5  , 5  ), // #413
+  INST(Minsd            , ExtRm              , O(F20F00,5D,_,_,_,_,_,_  ), 0                         , 5  , 0  , 9196 , 6  , 4  ), // #414
+  INST(Minss            , ExtRm              , O(F30F00,5D,_,_,_,_,_,_  ), 0                         , 6  , 0  , 7152 , 7  , 5  ), // #415
+  INST(Monitor          , X86Op              , O(000F01,C8,_,_,_,_,_,_  ), 0                         , 21 , 0  , 3232 , 115, 82 ), // #416
+  INST(Monitorx         , X86Op              , O(000F01,FA,_,_,_,_,_,_  ), 0                         , 21 , 0  , 1948 , 115, 83 ), // #417
+  INST(Mov              , X86Mov             , 0                         , 0                         , 0  , 0  , 138  , 116, 84 ), // #418
+  INST(Movabs           , X86Movabs          , 0                         , 0                         , 0  , 0  , 1957 , 117, 0  ), // #419
+  INST(Movapd           , ExtMov             , O(660F00,28,_,_,_,_,_,_  ), O(660F00,29,_,_,_,_,_,_  ), 3  , 45 , 7183 , 118, 85 ), // #420
+  INST(Movaps           , ExtMov             , O(000F00,28,_,_,_,_,_,_  ), O(000F00,29,_,_,_,_,_,_  ), 4  , 46 , 7191 , 118, 86 ), // #421
+  INST(Movbe            , ExtMovbe           , O(000F38,F0,_,_,x,_,_,_  ), O(000F38,F1,_,_,x,_,_,_  ), 83 , 47 , 656  , 119, 87 ), // #422
+  INST(Movd             , ExtMovd            , O(000F00,6E,_,_,_,_,_,_  ), O(000F00,7E,_,_,_,_,_,_  ), 4  , 48 , 9106 , 120, 88 ), // #423
+  INST(Movddup          , ExtMov             , O(F20F00,12,_,_,_,_,_,_  ), 0                         , 5  , 0  , 7205 , 6  , 6  ), // #424
+  INST(Movdir64b        , X86EnqcmdMovdir64b , O(660F38,F8,_,_,_,_,_,_  ), 0                         , 2  , 0  , 1964 , 121, 89 ), // #425
+  INST(Movdiri          , X86MovntiMovdiri   , O(000F38,F9,_,_,_,_,_,_  ), 0                         , 83 , 0  , 1974 , 122, 90 ), // #426
+  INST(Movdq2q          , ExtMov             , O(F20F00,D6,_,_,_,_,_,_  ), 0                         , 5  , 0  , 1982 , 123, 4  ), // #427
+  INST(Movdqa           , ExtMov             , O(660F00,6F,_,_,_,_,_,_  ), O(660F00,7F,_,_,_,_,_,_  ), 3  , 49 , 7214 , 118, 85 ), // #428
+  INST(Movdqu           , ExtMov             , O(F30F00,6F,_,_,_,_,_,_  ), O(F30F00,7F,_,_,_,_,_,_  ), 6  , 50 , 7023 , 118, 85 ), // #429
+  INST(Movhlps          , ExtMov             , O(000F00,12,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7289 , 124, 5  ), // #430
+  INST(Movhpd           , ExtMov             , O(660F00,16,_,_,_,_,_,_  ), O(660F00,17,_,_,_,_,_,_  ), 3  , 51 , 7298 , 125, 4  ), // #431
+  INST(Movhps           , ExtMov             , O(000F00,16,_,_,_,_,_,_  ), O(000F00,17,_,_,_,_,_,_  ), 4  , 52 , 7306 , 125, 5  ), // #432
+  INST(Movlhps          , ExtMov             , O(000F00,16,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7314 , 124, 5  ), // #433
+  INST(Movlpd           , ExtMov             , O(660F00,12,_,_,_,_,_,_  ), O(660F00,13,_,_,_,_,_,_  ), 3  , 53 , 7323 , 125, 4  ), // #434
+  INST(Movlps           , ExtMov             , O(000F00,12,_,_,_,_,_,_  ), O(000F00,13,_,_,_,_,_,_  ), 4  , 54 , 7331 , 125, 5  ), // #435
+  INST(Movmskpd         , ExtMov             , O(660F00,50,_,_,_,_,_,_  ), 0                         , 3  , 0  , 7339 , 126, 4  ), // #436
+  INST(Movmskps         , ExtMov             , O(000F00,50,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7349 , 126, 5  ), // #437
+  INST(Movntdq          , ExtMov             , 0                         , O(660F00,E7,_,_,_,_,_,_  ), 0  , 55 , 7359 , 127, 4  ), // #438
+  INST(Movntdqa         , ExtMov             , O(660F38,2A,_,_,_,_,_,_  ), 0                         , 2  , 0  , 7368 , 100, 12 ), // #439
+  INST(Movnti           , X86MovntiMovdiri   , O(000F00,C3,_,_,x,_,_,_  ), 0                         , 4  , 0  , 1990 , 122, 4  ), // #440
+  INST(Movntpd          , ExtMov             , 0                         , O(660F00,2B,_,_,_,_,_,_  ), 0  , 56 , 7378 , 127, 4  ), // #441
+  INST(Movntps          , ExtMov             , 0                         , O(000F00,2B,_,_,_,_,_,_  ), 0  , 57 , 7387 , 127, 5  ), // #442
+  INST(Movntq           , ExtMov             , 0                         , O(000F00,E7,_,_,_,_,_,_  ), 0  , 58 , 1997 , 128, 80 ), // #443
+  INST(Movntsd          , ExtMov             , 0                         , O(F20F00,2B,_,_,_,_,_,_  ), 0  , 59 , 2004 , 129, 49 ), // #444
+  INST(Movntss          , ExtMov             , 0                         , O(F30F00,2B,_,_,_,_,_,_  ), 0  , 60 , 2012 , 130, 49 ), // #445
+  INST(Movq             , ExtMovq            , O(000F00,6E,_,_,x,_,_,_  ), O(000F00,7E,_,_,x,_,_,_  ), 4  , 48 , 9117 , 131, 91 ), // #446
+  INST(Movq2dq          , ExtRm              , O(F30F00,D6,_,_,_,_,_,_  ), 0                         , 6  , 0  , 2020 , 132, 4  ), // #447
+  INST(Movs             , X86StrMm           , O(000000,A4,_,_,_,_,_,_  ), 0                         , 0  , 0  , 439  , 133, 78 ), // #448
+  INST(Movsd            , ExtMov             , O(F20F00,10,_,_,_,_,_,_  ), O(F20F00,11,_,_,_,_,_,_  ), 5  , 61 , 7402 , 134, 85 ), // #449
+  INST(Movshdup         , ExtRm              , O(F30F00,16,_,_,_,_,_,_  ), 0                         , 6  , 0  , 7416 , 5  , 6  ), // #450
+  INST(Movsldup         , ExtRm              , O(F30F00,12,_,_,_,_,_,_  ), 0                         , 6  , 0  , 7426 , 5  , 6  ), // #451
+  INST(Movss            , ExtMov             , O(F30F00,10,_,_,_,_,_,_  ), O(F30F00,11,_,_,_,_,_,_  ), 6  , 62 , 7436 , 135, 86 ), // #452
+  INST(Movsx            , X86MovsxMovzx      , O(000F00,BE,_,_,x,_,_,_  ), 0                         , 4  , 0  , 2028 , 136, 0  ), // #453
+  INST(Movsxd           , X86Rm              , O(000000,63,_,_,x,_,_,_  ), 0                         , 0  , 0  , 2034 , 137, 0  ), // #454
+  INST(Movupd           , ExtMov             , O(660F00,10,_,_,_,_,_,_  ), O(660F00,11,_,_,_,_,_,_  ), 3  , 63 , 7443 , 118, 85 ), // #455
+  INST(Movups           , ExtMov             , O(000F00,10,_,_,_,_,_,_  ), O(000F00,11,_,_,_,_,_,_  ), 4  , 64 , 7451 , 118, 86 ), // #456
+  INST(Movzx            , X86MovsxMovzx      , O(000F00,B6,_,_,x,_,_,_  ), 0                         , 4  , 0  , 2041 , 136, 0  ), // #457
+  INST(Mpsadbw          , ExtRmi             , O(660F3A,42,_,_,_,_,_,_  ), 0                         , 8  , 0  , 7465 , 8  , 12 ), // #458
+  INST(Mul              , X86M_GPB_MulDiv    , O(000000,F6,4,_,x,_,_,_  ), 0                         , 9  , 0  , 828  , 54 , 1  ), // #459
+  INST(Mulpd            , ExtRm              , O(660F00,59,_,_,_,_,_,_  ), 0                         , 3  , 0  , 7519 , 5  , 4  ), // #460
+  INST(Mulps            , ExtRm              , O(000F00,59,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7533 , 5  , 5  ), // #461
+  INST(Mulsd            , ExtRm              , O(F20F00,59,_,_,_,_,_,_  ), 0                         , 5  , 0  , 7540 , 6  , 4  ), // #462
+  INST(Mulss            , ExtRm              , O(F30F00,59,_,_,_,_,_,_  ), 0                         , 6  , 0  , 7554 , 7  , 5  ), // #463
+  INST(Mulx             , VexRvm_ZDX_Wx      , V(F20F38,F6,_,0,x,_,_,_  ), 0                         , 84 , 0  , 2047 , 138, 92 ), // #464
+  INST(Mwait            , X86Op              , O(000F01,C9,_,_,_,_,_,_  ), 0                         , 21 , 0  , 3241 , 139, 82 ), // #465
+  INST(Mwaitx           , X86Op              , O(000F01,FB,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2052 , 140, 83 ), // #466
+  INST(Neg              , X86M_GPB           , O(000000,F6,3,_,x,_,_,_  ), 0                         , 75 , 0  , 2059 , 141, 1  ), // #467
+  INST(Nop              , X86M_Nop           , O(000000,90,_,_,_,_,_,_  ), 0                         , 0  , 0  , 959  , 142, 0  ), // #468
+  INST(Not              , X86M_GPB           , O(000000,F6,2,_,x,_,_,_  ), 0                         , 1  , 0  , 2063 , 141, 0  ), // #469
+  INST(Or               , X86Arith           , O(000000,08,1,_,x,_,_,_  ), 0                         , 31 , 0  , 3237 , 143, 1  ), // #470
+  INST(Orpd             , ExtRm              , O(660F00,56,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11458, 11 , 4  ), // #471
+  INST(Orps             , ExtRm              , O(000F00,56,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11465, 11 , 5  ), // #472
+  INST(Out              , X86Out             , O(000000,EE,_,_,_,_,_,_  ), O(000000,E6,_,_,_,_,_,_  ), 0  , 65 , 2067 , 144, 0  ), // #473
+  INST(Outs             , X86Outs            , O(000000,6E,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2071 , 145, 0  ), // #474
+  INST(Pabsb            , ExtRm_P            , O(000F38,1C,_,_,_,_,_,_  ), 0                         , 83 , 0  , 7636 , 146, 93 ), // #475
+  INST(Pabsd            , ExtRm_P            , O(000F38,1E,_,_,_,_,_,_  ), 0                         , 83 , 0  , 7643 , 146, 93 ), // #476
+  INST(Pabsw            , ExtRm_P            , O(000F38,1D,_,_,_,_,_,_  ), 0                         , 83 , 0  , 7657 , 146, 93 ), // #477
+  INST(Packssdw         , ExtRm_P            , O(000F00,6B,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7664 , 146, 88 ), // #478
+  INST(Packsswb         , ExtRm_P            , O(000F00,63,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7674 , 146, 88 ), // #479
+  INST(Packusdw         , ExtRm              , O(660F38,2B,_,_,_,_,_,_  ), 0                         , 2  , 0  , 7684 , 5  , 12 ), // #480
+  INST(Packuswb         , ExtRm_P            , O(000F00,67,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7694 , 146, 88 ), // #481
+  INST(Paddb            , ExtRm_P            , O(000F00,FC,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7704 , 146, 88 ), // #482
+  INST(Paddd            , ExtRm_P            , O(000F00,FE,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7711 , 146, 88 ), // #483
+  INST(Paddq            , ExtRm_P            , O(000F00,D4,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7718 , 146, 4  ), // #484
+  INST(Paddsb           , ExtRm_P            , O(000F00,EC,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7725 , 146, 88 ), // #485
+  INST(Paddsw           , ExtRm_P            , O(000F00,ED,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7733 , 146, 88 ), // #486
+  INST(Paddusb          , ExtRm_P            , O(000F00,DC,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7741 , 146, 88 ), // #487
+  INST(Paddusw          , ExtRm_P            , O(000F00,DD,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7750 , 146, 88 ), // #488
+  INST(Paddw            , ExtRm_P            , O(000F00,FD,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7759 , 146, 88 ), // #489
+  INST(Palignr          , ExtRmi_P           , O(000F3A,0F,_,_,_,_,_,_  ), 0                         , 85 , 0  , 7766 , 147, 6  ), // #490
+  INST(Pand             , ExtRm_P            , O(000F00,DB,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7775 , 148, 88 ), // #491
+  INST(Pandn            , ExtRm_P            , O(000F00,DF,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7788 , 149, 88 ), // #492
+  INST(Pause            , X86Op              , O(F30000,90,_,_,_,_,_,_  ), 0                         , 86 , 0  , 3195 , 30 , 0  ), // #493
+  INST(Pavgb            , ExtRm_P            , O(000F00,E0,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7818 , 146, 94 ), // #494
+  INST(Pavgusb          , Ext3dNow           , O(000F0F,BF,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2076 , 150, 51 ), // #495
+  INST(Pavgw            , ExtRm_P            , O(000F00,E3,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7825 , 146, 94 ), // #496
+  INST(Pblendvb         , ExtRm_XMM0         , O(660F38,10,_,_,_,_,_,_  ), 0                         , 2  , 0  , 7881 , 15 , 12 ), // #497
+  INST(Pblendw          , ExtRmi             , O(660F3A,0E,_,_,_,_,_,_  ), 0                         , 8  , 0  , 7891 , 8  , 12 ), // #498
+  INST(Pclmulqdq        , ExtRmi             , O(660F3A,44,_,_,_,_,_,_  ), 0                         , 8  , 0  , 7984 , 8  , 95 ), // #499
+  INST(Pcmpeqb          , ExtRm_P            , O(000F00,74,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8016 , 149, 88 ), // #500
+  INST(Pcmpeqd          , ExtRm_P            , O(000F00,76,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8025 , 149, 88 ), // #501
+  INST(Pcmpeqq          , ExtRm              , O(660F38,29,_,_,_,_,_,_  ), 0                         , 2  , 0  , 8034 , 151, 12 ), // #502
+  INST(Pcmpeqw          , ExtRm_P            , O(000F00,75,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8043 , 149, 88 ), // #503
+  INST(Pcmpestri        , ExtRmi             , O(660F3A,61,_,_,_,_,_,_  ), 0                         , 8  , 0  , 8052 , 152, 96 ), // #504
+  INST(Pcmpestrm        , ExtRmi             , O(660F3A,60,_,_,_,_,_,_  ), 0                         , 8  , 0  , 8063 , 153, 96 ), // #505
+  INST(Pcmpgtb          , ExtRm_P            , O(000F00,64,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8074 , 149, 88 ), // #506
+  INST(Pcmpgtd          , ExtRm_P            , O(000F00,66,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8083 , 149, 88 ), // #507
+  INST(Pcmpgtq          , ExtRm              , O(660F38,37,_,_,_,_,_,_  ), 0                         , 2  , 0  , 8092 , 151, 44 ), // #508
+  INST(Pcmpgtw          , ExtRm_P            , O(000F00,65,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8101 , 149, 88 ), // #509
+  INST(Pcmpistri        , ExtRmi             , O(660F3A,63,_,_,_,_,_,_  ), 0                         , 8  , 0  , 8110 , 154, 96 ), // #510
+  INST(Pcmpistrm        , ExtRmi             , O(660F3A,62,_,_,_,_,_,_  ), 0                         , 8  , 0  , 8121 , 155, 96 ), // #511
+  INST(Pconfig          , X86Op              , O(000F01,C5,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2084 , 30 , 97 ), // #512
+  INST(Pdep             , VexRvm_Wx          , V(F20F38,F5,_,0,x,_,_,_  ), 0                         , 84 , 0  , 2092 , 10 , 92 ), // #513
+  INST(Pext             , VexRvm_Wx          , V(F30F38,F5,_,0,x,_,_,_  ), 0                         , 88 , 0  , 2097 , 10 , 92 ), // #514
+  INST(Pextrb           , ExtExtract         , O(000F3A,14,_,_,_,_,_,_  ), 0                         , 85 , 0  , 8608 , 156, 12 ), // #515
+  INST(Pextrd           , ExtExtract         , O(000F3A,16,_,_,_,_,_,_  ), 0                         , 85 , 0  , 8616 , 58 , 12 ), // #516
+  INST(Pextrq           , ExtExtract         , O(000F3A,16,_,_,1,_,_,_  ), 0                         , 89 , 0  , 8624 , 157, 12 ), // #517
+  INST(Pextrw           , ExtPextrw          , O(000F00,C5,_,_,_,_,_,_  ), O(000F3A,15,_,_,_,_,_,_  ), 4  , 66 , 8632 , 158, 98 ), // #518
+  INST(Pf2id            , Ext3dNow           , O(000F0F,1D,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2102 , 150, 51 ), // #519
+  INST(Pf2iw            , Ext3dNow           , O(000F0F,1C,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2108 , 150, 99 ), // #520
+  INST(Pfacc            , Ext3dNow           , O(000F0F,AE,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2114 , 150, 51 ), // #521
+  INST(Pfadd            , Ext3dNow           , O(000F0F,9E,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2120 , 150, 51 ), // #522
+  INST(Pfcmpeq          , Ext3dNow           , O(000F0F,B0,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2126 , 150, 51 ), // #523
+  INST(Pfcmpge          , Ext3dNow           , O(000F0F,90,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2134 , 150, 51 ), // #524
+  INST(Pfcmpgt          , Ext3dNow           , O(000F0F,A0,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2142 , 150, 51 ), // #525
+  INST(Pfmax            , Ext3dNow           , O(000F0F,A4,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2150 , 150, 51 ), // #526
+  INST(Pfmin            , Ext3dNow           , O(000F0F,94,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2156 , 150, 51 ), // #527
+  INST(Pfmul            , Ext3dNow           , O(000F0F,B4,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2162 , 150, 51 ), // #528
+  INST(Pfnacc           , Ext3dNow           , O(000F0F,8A,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2168 , 150, 99 ), // #529
+  INST(Pfpnacc          , Ext3dNow           , O(000F0F,8E,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2175 , 150, 99 ), // #530
+  INST(Pfrcp            , Ext3dNow           , O(000F0F,96,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2183 , 150, 51 ), // #531
+  INST(Pfrcpit1         , Ext3dNow           , O(000F0F,A6,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2189 , 150, 51 ), // #532
+  INST(Pfrcpit2         , Ext3dNow           , O(000F0F,B6,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2198 , 150, 51 ), // #533
+  INST(Pfrcpv           , Ext3dNow           , O(000F0F,86,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2207 , 150, 100), // #534
+  INST(Pfrsqit1         , Ext3dNow           , O(000F0F,A7,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2214 , 150, 51 ), // #535
+  INST(Pfrsqrt          , Ext3dNow           , O(000F0F,97,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2223 , 150, 51 ), // #536
+  INST(Pfrsqrtv         , Ext3dNow           , O(000F0F,87,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2231 , 150, 100), // #537
+  INST(Pfsub            , Ext3dNow           , O(000F0F,9A,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2240 , 150, 51 ), // #538
+  INST(Pfsubr           , Ext3dNow           , O(000F0F,AA,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2246 , 150, 51 ), // #539
+  INST(Phaddd           , ExtRm_P            , O(000F38,02,_,_,_,_,_,_  ), 0                         , 83 , 0  , 8711 , 146, 93 ), // #540
+  INST(Phaddsw          , ExtRm_P            , O(000F38,03,_,_,_,_,_,_  ), 0                         , 83 , 0  , 8728 , 146, 93 ), // #541
+  INST(Phaddw           , ExtRm_P            , O(000F38,01,_,_,_,_,_,_  ), 0                         , 83 , 0  , 8797 , 146, 93 ), // #542
+  INST(Phminposuw       , ExtRm              , O(660F38,41,_,_,_,_,_,_  ), 0                         , 2  , 0  , 8823 , 5  , 12 ), // #543
+  INST(Phsubd           , ExtRm_P            , O(000F38,06,_,_,_,_,_,_  ), 0                         , 83 , 0  , 8844 , 146, 93 ), // #544
+  INST(Phsubsw          , ExtRm_P            , O(000F38,07,_,_,_,_,_,_  ), 0                         , 83 , 0  , 8861 , 146, 93 ), // #545
+  INST(Phsubw           , ExtRm_P            , O(000F38,05,_,_,_,_,_,_  ), 0                         , 83 , 0  , 8870 , 146, 93 ), // #546
+  INST(Pi2fd            , Ext3dNow           , O(000F0F,0D,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2253 , 150, 51 ), // #547
+  INST(Pi2fw            , Ext3dNow           , O(000F0F,0C,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2259 , 150, 99 ), // #548
+  INST(Pinsrb           , ExtRmi             , O(660F3A,20,_,_,_,_,_,_  ), 0                         , 8  , 0  , 8887 , 159, 12 ), // #549
+  INST(Pinsrd           , ExtRmi             , O(660F3A,22,_,_,_,_,_,_  ), 0                         , 8  , 0  , 8895 , 160, 12 ), // #550
+  INST(Pinsrq           , ExtRmi             , O(660F3A,22,_,_,1,_,_,_  ), 0                         , 90 , 0  , 8903 , 161, 12 ), // #551
+  INST(Pinsrw           , ExtRmi_P           , O(000F00,C4,_,_,_,_,_,_  ), 0                         , 4  , 0  , 8911 , 162, 94 ), // #552
+  INST(Pmaddubsw        , ExtRm_P            , O(000F38,04,_,_,_,_,_,_  ), 0                         , 83 , 0  , 9081 , 146, 93 ), // #553
+  INST(Pmaddwd          , ExtRm_P            , O(000F00,F5,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9092 , 146, 88 ), // #554
+  INST(Pmaxsb           , ExtRm              , O(660F38,3C,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9123 , 11 , 12 ), // #555
+  INST(Pmaxsd           , ExtRm              , O(660F38,3D,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9131 , 11 , 12 ), // #556
+  INST(Pmaxsw           , ExtRm_P            , O(000F00,EE,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9147 , 148, 94 ), // #557
+  INST(Pmaxub           , ExtRm_P            , O(000F00,DE,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9155 , 148, 94 ), // #558
+  INST(Pmaxud           , ExtRm              , O(660F38,3F,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9163 , 11 , 12 ), // #559
+  INST(Pmaxuw           , ExtRm              , O(660F38,3E,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9179 , 11 , 12 ), // #560
+  INST(Pminsb           , ExtRm              , O(660F38,38,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9187 , 11 , 12 ), // #561
+  INST(Pminsd           , ExtRm              , O(660F38,39,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9195 , 11 , 12 ), // #562
+  INST(Pminsw           , ExtRm_P            , O(000F00,EA,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9211 , 148, 94 ), // #563
+  INST(Pminub           , ExtRm_P            , O(000F00,DA,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9219 , 148, 94 ), // #564
+  INST(Pminud           , ExtRm              , O(660F38,3B,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9227 , 11 , 12 ), // #565
+  INST(Pminuw           , ExtRm              , O(660F38,3A,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9243 , 11 , 12 ), // #566
+  INST(Pmovmskb         , ExtRm_P            , O(000F00,D7,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9321 , 163, 94 ), // #567
+  INST(Pmovsxbd         , ExtRm              , O(660F38,21,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9418 , 7  , 12 ), // #568
+  INST(Pmovsxbq         , ExtRm              , O(660F38,22,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9428 , 164, 12 ), // #569
+  INST(Pmovsxbw         , ExtRm              , O(660F38,20,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9438 , 6  , 12 ), // #570
+  INST(Pmovsxdq         , ExtRm              , O(660F38,25,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9448 , 6  , 12 ), // #571
+  INST(Pmovsxwd         , ExtRm              , O(660F38,23,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9458 , 6  , 12 ), // #572
+  INST(Pmovsxwq         , ExtRm              , O(660F38,24,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9468 , 7  , 12 ), // #573
+  INST(Pmovzxbd         , ExtRm              , O(660F38,31,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9555 , 7  , 12 ), // #574
+  INST(Pmovzxbq         , ExtRm              , O(660F38,32,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9565 , 164, 12 ), // #575
+  INST(Pmovzxbw         , ExtRm              , O(660F38,30,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9575 , 6  , 12 ), // #576
+  INST(Pmovzxdq         , ExtRm              , O(660F38,35,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9585 , 6  , 12 ), // #577
+  INST(Pmovzxwd         , ExtRm              , O(660F38,33,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9595 , 6  , 12 ), // #578
+  INST(Pmovzxwq         , ExtRm              , O(660F38,34,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9605 , 7  , 12 ), // #579
+  INST(Pmuldq           , ExtRm              , O(660F38,28,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9615 , 5  , 12 ), // #580
+  INST(Pmulhrsw         , ExtRm_P            , O(000F38,0B,_,_,_,_,_,_  ), 0                         , 83 , 0  , 9623 , 146, 93 ), // #581
+  INST(Pmulhrw          , Ext3dNow           , O(000F0F,B7,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2265 , 150, 51 ), // #582
+  INST(Pmulhuw          , ExtRm_P            , O(000F00,E4,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9633 , 146, 94 ), // #583
+  INST(Pmulhw           , ExtRm_P            , O(000F00,E5,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9642 , 146, 88 ), // #584
+  INST(Pmulld           , ExtRm              , O(660F38,40,_,_,_,_,_,_  ), 0                         , 2  , 0  , 9650 , 5  , 12 ), // #585
+  INST(Pmullw           , ExtRm_P            , O(000F00,D5,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9666 , 146, 88 ), // #586
+  INST(Pmuludq          , ExtRm_P            , O(000F00,F4,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9689 , 146, 4  ), // #587
+  INST(Pop              , X86Pop             , O(000000,8F,0,_,_,_,_,_  ), O(000000,58,_,_,_,_,_,_  ), 0  , 67 , 2273 , 165, 0  ), // #588
+  INST(Popa             , X86Op              , O(660000,61,_,_,_,_,_,_  ), 0                         , 19 , 0  , 2277 , 81 , 0  ), // #589
+  INST(Popad            , X86Op              , O(000000,61,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2282 , 81 , 0  ), // #590
+  INST(Popcnt           , X86Rm_Raw66H       , O(F30F00,B8,_,_,x,_,_,_  ), 0                         , 6  , 0  , 2288 , 22 , 101), // #591
+  INST(Popf             , X86Op              , O(660000,9D,_,_,_,_,_,_  ), 0                         , 19 , 0  , 2295 , 30 , 102), // #592
+  INST(Popfd            , X86Op              , O(000000,9D,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2300 , 81 , 102), // #593
+  INST(Popfq            , X86Op              , O(000000,9D,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2306 , 33 , 102), // #594
+  INST(Por              , ExtRm_P            , O(000F00,EB,_,_,_,_,_,_  ), 0                         , 4  , 0  , 9734 , 148, 88 ), // #595
+  INST(Prefetch         , X86M_Only          , O(000F00,0D,0,_,_,_,_,_  ), 0                         , 4  , 0  , 2312 , 31 , 51 ), // #596
+  INST(Prefetchnta      , X86M_Only          , O(000F00,18,0,_,_,_,_,_  ), 0                         , 4  , 0  , 2321 , 31 , 80 ), // #597
+  INST(Prefetcht0       , X86M_Only          , O(000F00,18,1,_,_,_,_,_  ), 0                         , 29 , 0  , 2333 , 31 , 80 ), // #598
+  INST(Prefetcht1       , X86M_Only          , O(000F00,18,2,_,_,_,_,_  ), 0                         , 76 , 0  , 2344 , 31 , 80 ), // #599
+  INST(Prefetcht2       , X86M_Only          , O(000F00,18,3,_,_,_,_,_  ), 0                         , 78 , 0  , 2355 , 31 , 80 ), // #600
+  INST(Prefetchw        , X86M_Only          , O(000F00,0D,1,_,_,_,_,_  ), 0                         , 29 , 0  , 2366 , 31 , 103), // #601
+  INST(Prefetchwt1      , X86M_Only          , O(000F00,0D,2,_,_,_,_,_  ), 0                         , 76 , 0  , 2376 , 31 , 104), // #602
+  INST(Psadbw           , ExtRm_P            , O(000F00,F6,_,_,_,_,_,_  ), 0                         , 4  , 0  , 4644 , 146, 94 ), // #603
+  INST(Pshufb           , ExtRm_P            , O(000F38,00,_,_,_,_,_,_  ), 0                         , 83 , 0  , 10060, 146, 93 ), // #604
+  INST(Pshufd           , ExtRmi             , O(660F00,70,_,_,_,_,_,_  ), 0                         , 3  , 0  , 10081, 8  , 4  ), // #605
+  INST(Pshufhw          , ExtRmi             , O(F30F00,70,_,_,_,_,_,_  ), 0                         , 6  , 0  , 10089, 8  , 4  ), // #606
+  INST(Pshuflw          , ExtRmi             , O(F20F00,70,_,_,_,_,_,_  ), 0                         , 5  , 0  , 10098, 8  , 4  ), // #607
+  INST(Pshufw           , ExtRmi_P           , O(000F00,70,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2388 , 166, 80 ), // #608
+  INST(Psignb           , ExtRm_P            , O(000F38,08,_,_,_,_,_,_  ), 0                         , 83 , 0  , 10107, 146, 93 ), // #609
+  INST(Psignd           , ExtRm_P            , O(000F38,0A,_,_,_,_,_,_  ), 0                         , 83 , 0  , 10115, 146, 93 ), // #610
+  INST(Psignw           , ExtRm_P            , O(000F38,09,_,_,_,_,_,_  ), 0                         , 83 , 0  , 10123, 146, 93 ), // #611
+  INST(Pslld            , ExtRmRi_P          , O(000F00,F2,_,_,_,_,_,_  ), O(000F00,72,6,_,_,_,_,_  ), 4  , 68 , 10131, 167, 88 ), // #612
+  INST(Pslldq           , ExtRmRi            , 0                         , O(660F00,73,7,_,_,_,_,_  ), 0  , 69 , 10138, 168, 4  ), // #613
+  INST(Psllq            , ExtRmRi_P          , O(000F00,F3,_,_,_,_,_,_  ), O(000F00,73,6,_,_,_,_,_  ), 4  , 70 , 10146, 167, 88 ), // #614
+  INST(Psllw            , ExtRmRi_P          , O(000F00,F1,_,_,_,_,_,_  ), O(000F00,71,6,_,_,_,_,_  ), 4  , 71 , 10177, 167, 88 ), // #615
+  INST(Psmash           , X86Op              , O(F30F01,FF,_,_,_,_,_,_  ), 0                         , 25 , 0  , 2395 , 33 , 105), // #616
+  INST(Psrad            , ExtRmRi_P          , O(000F00,E2,_,_,_,_,_,_  ), O(000F00,72,4,_,_,_,_,_  ), 4  , 72 , 10184, 167, 88 ), // #617
+  INST(Psraw            , ExtRmRi_P          , O(000F00,E1,_,_,_,_,_,_  ), O(000F00,71,4,_,_,_,_,_  ), 4  , 73 , 10222, 167, 88 ), // #618
+  INST(Psrld            , ExtRmRi_P          , O(000F00,D2,_,_,_,_,_,_  ), O(000F00,72,2,_,_,_,_,_  ), 4  , 74 , 10229, 167, 88 ), // #619
+  INST(Psrldq           , ExtRmRi            , 0                         , O(660F00,73,3,_,_,_,_,_  ), 0  , 75 , 10236, 168, 4  ), // #620
+  INST(Psrlq            , ExtRmRi_P          , O(000F00,D3,_,_,_,_,_,_  ), O(000F00,73,2,_,_,_,_,_  ), 4  , 76 , 10244, 167, 88 ), // #621
+  INST(Psrlw            , ExtRmRi_P          , O(000F00,D1,_,_,_,_,_,_  ), O(000F00,71,2,_,_,_,_,_  ), 4  , 77 , 10275, 167, 88 ), // #622
+  INST(Psubb            , ExtRm_P            , O(000F00,F8,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10282, 149, 88 ), // #623
+  INST(Psubd            , ExtRm_P            , O(000F00,FA,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10289, 149, 88 ), // #624
+  INST(Psubq            , ExtRm_P            , O(000F00,FB,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10296, 149, 4  ), // #625
+  INST(Psubsb           , ExtRm_P            , O(000F00,E8,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10303, 149, 88 ), // #626
+  INST(Psubsw           , ExtRm_P            , O(000F00,E9,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10311, 149, 88 ), // #627
+  INST(Psubusb          , ExtRm_P            , O(000F00,D8,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10319, 149, 88 ), // #628
+  INST(Psubusw          , ExtRm_P            , O(000F00,D9,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10328, 149, 88 ), // #629
+  INST(Psubw            , ExtRm_P            , O(000F00,F9,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10337, 149, 88 ), // #630
+  INST(Pswapd           , Ext3dNow           , O(000F0F,BB,_,_,_,_,_,_  ), 0                         , 87 , 0  , 2402 , 150, 99 ), // #631
+  INST(Ptest            , ExtRm              , O(660F38,17,_,_,_,_,_,_  ), 0                         , 2  , 0  , 10366, 5  , 106), // #632
+  INST(Ptwrite          , X86M               , O(F30F00,AE,4,_,_,_,_,_  ), 0                         , 91 , 0  , 2409 , 169, 107), // #633
+  INST(Punpckhbw        , ExtRm_P            , O(000F00,68,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10449, 146, 88 ), // #634
+  INST(Punpckhdq        , ExtRm_P            , O(000F00,6A,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10460, 146, 88 ), // #635
+  INST(Punpckhqdq       , ExtRm              , O(660F00,6D,_,_,_,_,_,_  ), 0                         , 3  , 0  , 10471, 5  , 4  ), // #636
+  INST(Punpckhwd        , ExtRm_P            , O(000F00,69,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10483, 146, 88 ), // #637
+  INST(Punpcklbw        , ExtRm_P            , O(000F00,60,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10494, 170, 88 ), // #638
+  INST(Punpckldq        , ExtRm_P            , O(000F00,62,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10505, 170, 88 ), // #639
+  INST(Punpcklqdq       , ExtRm              , O(660F00,6C,_,_,_,_,_,_  ), 0                         , 3  , 0  , 10516, 5  , 4  ), // #640
+  INST(Punpcklwd        , ExtRm_P            , O(000F00,61,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10528, 170, 88 ), // #641
+  INST(Push             , X86Push            , O(000000,FF,6,_,_,_,_,_  ), O(000000,50,_,_,_,_,_,_  ), 32 , 78 , 2417 , 171, 0  ), // #642
+  INST(Pusha            , X86Op              , O(660000,60,_,_,_,_,_,_  ), 0                         , 19 , 0  , 2422 , 81 , 0  ), // #643
+  INST(Pushad           , X86Op              , O(000000,60,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2428 , 81 , 0  ), // #644
+  INST(Pushf            , X86Op              , O(660000,9C,_,_,_,_,_,_  ), 0                         , 19 , 0  , 2435 , 30 , 108), // #645
+  INST(Pushfd           , X86Op              , O(000000,9C,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2441 , 81 , 108), // #646
+  INST(Pushfq           , X86Op              , O(000000,9C,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2448 , 33 , 108), // #647
+  INST(Pvalidate        , X86Op              , O(F20F01,FF,_,_,_,_,_,_  ), 0                         , 92 , 0  , 2455 , 30 , 109), // #648
+  INST(Pxor             , ExtRm_P            , O(000F00,EF,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10539, 149, 88 ), // #649
+  INST(Rcl              , X86Rot             , O(000000,D0,2,_,x,_,_,_  ), 0                         , 1  , 0  , 2465 , 172, 110), // #650
+  INST(Rcpps            , ExtRm              , O(000F00,53,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10674, 5  , 5  ), // #651
+  INST(Rcpss            , ExtRm              , O(F30F00,53,_,_,_,_,_,_  ), 0                         , 6  , 0  , 10688, 7  , 5  ), // #652
+  INST(Rcr              , X86Rot             , O(000000,D0,3,_,x,_,_,_  ), 0                         , 75 , 0  , 2469 , 172, 110), // #653
+  INST(Rdfsbase         , X86M               , O(F30F00,AE,0,_,x,_,_,_  ), 0                         , 6  , 0  , 2473 , 173, 111), // #654
+  INST(Rdgsbase         , X86M               , O(F30F00,AE,1,_,x,_,_,_  ), 0                         , 93 , 0  , 2482 , 173, 111), // #655
+  INST(Rdmsr            , X86Op              , O(000F00,32,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2491 , 174, 112), // #656
+  INST(Rdpid            , X86R_Native        , O(F30F00,C7,7,_,_,_,_,_  ), 0                         , 94 , 0  , 2497 , 175, 113), // #657
+  INST(Rdpkru           , X86Op              , O(000F01,EE,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2503 , 174, 114), // #658
+  INST(Rdpmc            , X86Op              , O(000F00,33,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2510 , 174, 0  ), // #659
+  INST(Rdpru            , X86Op              , O(000F01,FD,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2516 , 174, 115), // #660
+  INST(Rdrand           , X86M               , O(000F00,C7,6,_,x,_,_,_  ), 0                         , 80 , 0  , 2522 , 23 , 116), // #661
+  INST(Rdseed           , X86M               , O(000F00,C7,7,_,x,_,_,_  ), 0                         , 22 , 0  , 2529 , 23 , 117), // #662
+  INST(Rdsspd           , X86M               , O(F30F00,1E,1,_,_,_,_,_  ), 0                         , 93 , 0  , 2536 , 76 , 56 ), // #663
+  INST(Rdsspq           , X86M               , O(F30F00,1E,1,_,_,_,_,_  ), 0                         , 93 , 0  , 2543 , 77 , 56 ), // #664
+  INST(Rdtsc            , X86Op              , O(000F00,31,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2550 , 28 , 118), // #665
+  INST(Rdtscp           , X86Op              , O(000F01,F9,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2556 , 174, 119), // #666
+  INST(Ret              , X86Ret             , O(000000,C2,_,_,_,_,_,_  ), 0                         , 0  , 0  , 3072 , 176, 0  ), // #667
+  INST(Retf             , X86Ret             , O(000000,CA,_,_,x,_,_,_  ), 0                         , 0  , 0  , 2563 , 177, 0  ), // #668
+  INST(Rmpadjust        , X86Op              , O(F30F01,FE,_,_,_,_,_,_  ), 0                         , 25 , 0  , 2568 , 33 , 105), // #669
+  INST(Rmpupdate        , X86Op              , O(F20F01,FE,_,_,_,_,_,_  ), 0                         , 92 , 0  , 2578 , 33 , 105), // #670
+  INST(Rol              , X86Rot             , O(000000,D0,0,_,x,_,_,_  ), 0                         , 0  , 0  , 2588 , 172, 120), // #671
+  INST(Ror              , X86Rot             , O(000000,D0,1,_,x,_,_,_  ), 0                         , 31 , 0  , 2592 , 172, 120), // #672
+  INST(Rorx             , VexRmi_Wx          , V(F20F3A,F0,_,0,x,_,_,_  ), 0                         , 95 , 0  , 2596 , 178, 92 ), // #673
+  INST(Roundpd          , ExtRmi             , O(660F3A,09,_,_,_,_,_,_  ), 0                         , 8  , 0  , 10827, 8  , 12 ), // #674
+  INST(Roundps          , ExtRmi             , O(660F3A,08,_,_,_,_,_,_  ), 0                         , 8  , 0  , 10836, 8  , 12 ), // #675
+  INST(Roundsd          , ExtRmi             , O(660F3A,0B,_,_,_,_,_,_  ), 0                         , 8  , 0  , 10845, 37 , 12 ), // #676
+  INST(Roundss          , ExtRmi             , O(660F3A,0A,_,_,_,_,_,_  ), 0                         , 8  , 0  , 10854, 38 , 12 ), // #677
+  INST(Rsm              , X86Op              , O(000F00,AA,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2601 , 81 , 1  ), // #678
+  INST(Rsqrtps          , ExtRm              , O(000F00,52,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10960, 5  , 5  ), // #679
+  INST(Rsqrtss          , ExtRm              , O(F30F00,52,_,_,_,_,_,_  ), 0                         , 6  , 0  , 10978, 7  , 5  ), // #680
+  INST(Rstorssp         , X86M_Only          , O(F30F00,01,5,_,_,_,_,_  ), 0                         , 63 , 0  , 2605 , 32 , 24 ), // #681
+  INST(Sahf             , X86Op              , O(000000,9E,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2614 , 97 , 121), // #682
+  INST(Sal              , X86Rot             , O(000000,D0,4,_,x,_,_,_  ), 0                         , 9  , 0  , 2619 , 172, 1  ), // #683
+  INST(Sar              , X86Rot             , O(000000,D0,7,_,x,_,_,_  ), 0                         , 27 , 0  , 2623 , 172, 1  ), // #684
+  INST(Sarx             , VexRmv_Wx          , V(F30F38,F7,_,0,x,_,_,_  ), 0                         , 88 , 0  , 2627 , 13 , 92 ), // #685
+  INST(Saveprevssp      , X86Op              , O(F30F01,EA,_,_,_,_,_,_  ), 0                         , 25 , 0  , 2632 , 30 , 24 ), // #686
+  INST(Sbb              , X86Arith           , O(000000,18,3,_,x,_,_,_  ), 0                         , 75 , 0  , 2644 , 179, 2  ), // #687
+  INST(Scas             , X86StrRm           , O(000000,AE,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2648 , 180, 37 ), // #688
+  INST(Senduipi         , X86M_NoSize        , O(F30F00,C7,6,_,_,_,_,_  ), 0                         , 24 , 0  , 2653 , 77 , 25 ), // #689
+  INST(Serialize        , X86Op              , O(000F01,E8,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2662 , 30 , 122), // #690
+  INST(Seta             , X86Set             , O(000F00,97,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2672 , 181, 59 ), // #691
+  INST(Setae            , X86Set             , O(000F00,93,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2677 , 181, 60 ), // #692
+  INST(Setb             , X86Set             , O(000F00,92,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2683 , 181, 60 ), // #693
+  INST(Setbe            , X86Set             , O(000F00,96,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2688 , 181, 59 ), // #694
+  INST(Setc             , X86Set             , O(000F00,92,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2694 , 181, 60 ), // #695
+  INST(Sete             , X86Set             , O(000F00,94,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2699 , 181, 61 ), // #696
+  INST(Setg             , X86Set             , O(000F00,9F,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2704 , 181, 62 ), // #697
+  INST(Setge            , X86Set             , O(000F00,9D,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2709 , 181, 63 ), // #698
+  INST(Setl             , X86Set             , O(000F00,9C,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2715 , 181, 63 ), // #699
+  INST(Setle            , X86Set             , O(000F00,9E,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2720 , 181, 62 ), // #700
+  INST(Setna            , X86Set             , O(000F00,96,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2726 , 181, 59 ), // #701
+  INST(Setnae           , X86Set             , O(000F00,92,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2732 , 181, 60 ), // #702
+  INST(Setnb            , X86Set             , O(000F00,93,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2739 , 181, 60 ), // #703
+  INST(Setnbe           , X86Set             , O(000F00,97,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2745 , 181, 59 ), // #704
+  INST(Setnc            , X86Set             , O(000F00,93,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2752 , 181, 60 ), // #705
+  INST(Setne            , X86Set             , O(000F00,95,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2758 , 181, 61 ), // #706
+  INST(Setng            , X86Set             , O(000F00,9E,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2764 , 181, 62 ), // #707
+  INST(Setnge           , X86Set             , O(000F00,9C,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2770 , 181, 63 ), // #708
+  INST(Setnl            , X86Set             , O(000F00,9D,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2777 , 181, 63 ), // #709
+  INST(Setnle           , X86Set             , O(000F00,9F,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2783 , 181, 62 ), // #710
+  INST(Setno            , X86Set             , O(000F00,91,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2790 , 181, 57 ), // #711
+  INST(Setnp            , X86Set             , O(000F00,9B,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2796 , 181, 64 ), // #712
+  INST(Setns            , X86Set             , O(000F00,99,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2802 , 181, 65 ), // #713
+  INST(Setnz            , X86Set             , O(000F00,95,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2808 , 181, 61 ), // #714
+  INST(Seto             , X86Set             , O(000F00,90,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2814 , 181, 57 ), // #715
+  INST(Setp             , X86Set             , O(000F00,9A,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2819 , 181, 64 ), // #716
+  INST(Setpe            , X86Set             , O(000F00,9A,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2824 , 181, 64 ), // #717
+  INST(Setpo            , X86Set             , O(000F00,9B,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2830 , 181, 64 ), // #718
+  INST(Sets             , X86Set             , O(000F00,98,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2836 , 181, 65 ), // #719
+  INST(Setssbsy         , X86Op              , O(F30F01,E8,_,_,_,_,_,_  ), 0                         , 25 , 0  , 2841 , 30 , 56 ), // #720
+  INST(Setz             , X86Set             , O(000F00,94,_,_,_,_,_,_  ), 0                         , 4  , 0  , 2850 , 181, 61 ), // #721
+  INST(Sfence           , X86Fence           , O(000F00,AE,7,_,_,_,_,_  ), 0                         , 22 , 0  , 2855 , 30 , 80 ), // #722
+  INST(Sgdt             , X86M_Only          , O(000F00,01,0,_,_,_,_,_  ), 0                         , 4  , 0  , 2862 , 69 , 0  ), // #723
+  INST(Sha1msg1         , ExtRm              , O(000F38,C9,_,_,_,_,_,_  ), 0                         , 83 , 0  , 2867 , 5  , 123), // #724
+  INST(Sha1msg2         , ExtRm              , O(000F38,CA,_,_,_,_,_,_  ), 0                         , 83 , 0  , 2876 , 5  , 123), // #725
+  INST(Sha1nexte        , ExtRm              , O(000F38,C8,_,_,_,_,_,_  ), 0                         , 83 , 0  , 2885 , 5  , 123), // #726
+  INST(Sha1rnds4        , ExtRmi             , O(000F3A,CC,_,_,_,_,_,_  ), 0                         , 85 , 0  , 2895 , 8  , 123), // #727
+  INST(Sha256msg1       , ExtRm              , O(000F38,CC,_,_,_,_,_,_  ), 0                         , 83 , 0  , 2905 , 5  , 123), // #728
+  INST(Sha256msg2       , ExtRm              , O(000F38,CD,_,_,_,_,_,_  ), 0                         , 83 , 0  , 2916 , 5  , 123), // #729
+  INST(Sha256rnds2      , ExtRm_XMM0         , O(000F38,CB,_,_,_,_,_,_  ), 0                         , 83 , 0  , 2927 , 15 , 123), // #730
+  INST(Shl              , X86Rot             , O(000000,D0,4,_,x,_,_,_  ), 0                         , 9  , 0  , 2939 , 172, 1  ), // #731
+  INST(Shld             , X86ShldShrd        , O(000F00,A4,_,_,x,_,_,_  ), 0                         , 4  , 0  , 9938 , 182, 1  ), // #732
+  INST(Shlx             , VexRmv_Wx          , V(660F38,F7,_,0,x,_,_,_  ), 0                         , 96 , 0  , 2943 , 13 , 92 ), // #733
+  INST(Shr              , X86Rot             , O(000000,D0,5,_,x,_,_,_  ), 0                         , 62 , 0  , 2948 , 172, 1  ), // #734
+  INST(Shrd             , X86ShldShrd        , O(000F00,AC,_,_,x,_,_,_  ), 0                         , 4  , 0  , 2952 , 182, 1  ), // #735
+  INST(Shrx             , VexRmv_Wx          , V(F20F38,F7,_,0,x,_,_,_  ), 0                         , 84 , 0  , 2957 , 13 , 92 ), // #736
+  INST(Shufpd           , ExtRmi             , O(660F00,C6,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11259, 8  , 4  ), // #737
+  INST(Shufps           , ExtRmi             , O(000F00,C6,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11267, 8  , 5  ), // #738
+  INST(Sidt             , X86M_Only          , O(000F00,01,1,_,_,_,_,_  ), 0                         , 29 , 0  , 2962 , 69 , 0  ), // #739
+  INST(Skinit           , X86Op_xAX          , O(000F01,DE,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2967 , 52 , 124), // #740
+  INST(Sldt             , X86M_NoMemSize     , O(000F00,00,0,_,_,_,_,_  ), 0                         , 4  , 0  , 2974 , 183, 0  ), // #741
+  INST(Slwpcb           , VexR_Wx            , V(XOP_M9,12,1,0,x,_,_,_  ), 0                         , 11 , 0  , 2979 , 108, 77 ), // #742
+  INST(Smsw             , X86M_NoMemSize     , O(000F00,01,4,_,_,_,_,_  ), 0                         , 97 , 0  , 2986 , 183, 0  ), // #743
+  INST(Sqrtpd           , ExtRm              , O(660F00,51,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11275, 5  , 4  ), // #744
+  INST(Sqrtps           , ExtRm              , O(000F00,51,_,_,_,_,_,_  ), 0                         , 4  , 0  , 10961, 5  , 5  ), // #745
+  INST(Sqrtsd           , ExtRm              , O(F20F00,51,_,_,_,_,_,_  ), 0                         , 5  , 0  , 11299, 6  , 4  ), // #746
+  INST(Sqrtss           , ExtRm              , O(F30F00,51,_,_,_,_,_,_  ), 0                         , 6  , 0  , 10979, 7  , 5  ), // #747
+  INST(Stac             , X86Op              , O(000F01,CB,_,_,_,_,_,_  ), 0                         , 21 , 0  , 2991 , 30 , 16 ), // #748
+  INST(Stc              , X86Op              , O(000000,F9,_,_,_,_,_,_  ), 0                         , 0  , 0  , 2996 , 30 , 17 ), // #749
+  INST(Std              , X86Op              , O(000000,FD,_,_,_,_,_,_  ), 0                         , 0  , 0  , 7921 , 30 , 18 ), // #750
+  INST(Stgi             , X86Op              , O(000F01,DC,_,_,_,_,_,_  ), 0                         , 21 , 0  , 3000 , 30 , 124), // #751
+  INST(Sti              , X86Op              , O(000000,FB,_,_,_,_,_,_  ), 0                         , 0  , 0  , 3005 , 30 , 23 ), // #752
+  INST(Stmxcsr          , X86M_Only          , O(000F00,AE,3,_,_,_,_,_  ), 0                         , 78 , 0  , 11323, 101, 5  ), // #753
+  INST(Stos             , X86StrMr           , O(000000,AA,_,_,_,_,_,_  ), 0                         , 0  , 0  , 3009 , 184, 78 ), // #754
+  INST(Str              , X86M_NoMemSize     , O(000F00,00,1,_,_,_,_,_  ), 0                         , 29 , 0  , 3014 , 183, 0  ), // #755
+  INST(Sttilecfg        , AmxCfg             , V(660F38,49,_,0,0,_,_,_  ), 0                         , 96 , 0  , 3018 , 103, 76 ), // #756
+  INST(Stui             , X86Op              , O(F30F01,EF,_,_,_,_,_,_  ), 0                         , 25 , 0  , 3135 , 33 , 25 ), // #757
+  INST(Sub              , X86Arith           , O(000000,28,5,_,x,_,_,_  ), 0                         , 62 , 0  , 866  , 179, 1  ), // #758
+  INST(Subpd            , ExtRm              , O(660F00,5C,_,_,_,_,_,_  ), 0                         , 3  , 0  , 5413 , 5  , 4  ), // #759
+  INST(Subps            , ExtRm              , O(000F00,5C,_,_,_,_,_,_  ), 0                         , 4  , 0  , 5425 , 5  , 5  ), // #760
+  INST(Subsd            , ExtRm              , O(F20F00,5C,_,_,_,_,_,_  ), 0                         , 5  , 0  , 6392 , 6  , 4  ), // #761
+  INST(Subss            , ExtRm              , O(F30F00,5C,_,_,_,_,_,_  ), 0                         , 6  , 0  , 6402 , 7  , 5  ), // #762
+  INST(Swapgs           , X86Op              , O(000F01,F8,_,_,_,_,_,_  ), 0                         , 21 , 0  , 3028 , 33 , 0  ), // #763
+  INST(Syscall          , X86Op              , O(000F00,05,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3035 , 33 , 0  ), // #764
+  INST(Sysenter         , X86Op              , O(000F00,34,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3043 , 30 , 0  ), // #765
+  INST(Sysexit          , X86Op              , O(000F00,35,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3052 , 30 , 0  ), // #766
+  INST(Sysexitq         , X86Op              , O(000F00,35,_,_,1,_,_,_  ), 0                         , 60 , 0  , 3060 , 30 , 0  ), // #767
+  INST(Sysret           , X86Op              , O(000F00,07,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3069 , 33 , 0  ), // #768
+  INST(Sysretq          , X86Op              , O(000F00,07,_,_,1,_,_,_  ), 0                         , 60 , 0  , 3076 , 33 , 0  ), // #769
+  INST(T1mskc           , VexVm_Wx           , V(XOP_M9,01,7,0,x,_,_,_  ), 0                         , 98 , 0  , 3084 , 14 , 11 ), // #770
+  INST(Tdpbf16ps        , AmxRmv             , V(F30F38,5C,_,0,0,_,_,_  ), 0                         , 88 , 0  , 3091 , 185, 125), // #771
+  INST(Tdpbssd          , AmxRmv             , V(F20F38,5E,_,0,0,_,_,_  ), 0                         , 84 , 0  , 3101 , 185, 126), // #772
+  INST(Tdpbsud          , AmxRmv             , V(F30F38,5E,_,0,0,_,_,_  ), 0                         , 88 , 0  , 3109 , 185, 126), // #773
+  INST(Tdpbusd          , AmxRmv             , V(660F38,5E,_,0,0,_,_,_  ), 0                         , 96 , 0  , 3117 , 185, 126), // #774
+  INST(Tdpbuud          , AmxRmv             , V(000F38,5E,_,0,0,_,_,_  ), 0                         , 10 , 0  , 3125 , 185, 126), // #775
+  INST(Test             , X86Test            , O(000000,84,_,_,x,_,_,_  ), O(000000,F6,_,_,x,_,_,_  ), 0  , 79 , 10367, 186, 1  ), // #776
+  INST(Testui           , X86Op              , O(F30F01,ED,_,_,_,_,_,_  ), 0                         , 25 , 0  , 3133 , 33 , 127), // #777
+  INST(Tileloadd        , AmxRm              , V(F20F38,4B,_,0,0,_,_,_  ), 0                         , 84 , 0  , 3140 , 187, 76 ), // #778
+  INST(Tileloaddt1      , AmxRm              , V(660F38,4B,_,0,0,_,_,_  ), 0                         , 96 , 0  , 3150 , 187, 76 ), // #779
+  INST(Tilerelease      , VexOpMod           , V(000F38,49,0,0,0,_,_,_  ), 0                         , 10 , 0  , 3162 , 188, 76 ), // #780
+  INST(Tilestored       , AmxMr              , V(F30F38,4B,_,0,0,_,_,_  ), 0                         , 88 , 0  , 3174 , 189, 76 ), // #781
+  INST(Tilezero         , AmxR               , V(F20F38,49,_,0,0,_,_,_  ), 0                         , 84 , 0  , 3185 , 190, 76 ), // #782
+  INST(Tpause           , X86R32_EDX_EAX     , O(660F00,AE,6,_,_,_,_,_  ), 0                         , 26 , 0  , 3194 , 191, 128), // #783
+  INST(Tzcnt            , X86Rm_Raw66H       , O(F30F00,BC,_,_,x,_,_,_  ), 0                         , 6  , 0  , 3201 , 22 , 9  ), // #784
+  INST(Tzmsk            , VexVm_Wx           , V(XOP_M9,01,4,0,x,_,_,_  ), 0                         , 99 , 0  , 3207 , 14 , 11 ), // #785
+  INST(Ucomisd          , ExtRm              , O(660F00,2E,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11390, 6  , 41 ), // #786
+  INST(Ucomiss          , ExtRm              , O(000F00,2E,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11408, 7  , 42 ), // #787
+  INST(Ud0              , X86Rm              , O(000F00,FF,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3213 , 192, 0  ), // #788
+  INST(Ud1              , X86Rm              , O(000F00,B9,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3217 , 192, 0  ), // #789
+  INST(Ud2              , X86Op              , O(000F00,0B,_,_,_,_,_,_  ), 0                         , 4  , 0  , 3221 , 30 , 0  ), // #790
+  INST(Uiret            , X86Op              , O(F30F01,EC,_,_,_,_,_,_  ), 0                         , 25 , 0  , 3225 , 33 , 25 ), // #791
+  INST(Umonitor         , X86R_FromM         , O(F30F00,AE,6,_,_,_,_,_  ), 0                         , 24 , 0  , 3231 , 193, 129), // #792
+  INST(Umwait           , X86R32_EDX_EAX     , O(F20F00,AE,6,_,_,_,_,_  ), 0                         , 100, 0  , 3240 , 191, 128), // #793
+  INST(Unpckhpd         , ExtRm              , O(660F00,15,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11417, 5  , 4  ), // #794
+  INST(Unpckhps         , ExtRm              , O(000F00,15,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11427, 5  , 5  ), // #795
+  INST(Unpcklpd         , ExtRm              , O(660F00,14,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11437, 5  , 4  ), // #796
+  INST(Unpcklps         , ExtRm              , O(000F00,14,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11447, 5  , 5  ), // #797
+  INST(V4fmaddps        , VexRm_T1_4X        , E(F20F38,9A,_,2,_,0,4,T4X), 0                         , 101, 0  , 3247 , 194, 130), // #798
+  INST(V4fmaddss        , VexRm_T1_4X        , E(F20F38,9B,_,0,_,0,4,T4X), 0                         , 102, 0  , 3257 , 195, 130), // #799
+  INST(V4fnmaddps       , VexRm_T1_4X        , E(F20F38,AA,_,2,_,0,4,T4X), 0                         , 101, 0  , 3267 , 194, 130), // #800
+  INST(V4fnmaddss       , VexRm_T1_4X        , E(F20F38,AB,_,0,_,0,4,T4X), 0                         , 102, 0  , 3278 , 195, 130), // #801
+  INST(Vaddpd           , VexRvm_Lx          , V(660F00,58,_,x,I,1,4,FV ), 0                         , 103, 0  , 3289 , 196, 131), // #802
+  INST(Vaddph           , VexRvm_Lx          , E(00MAP5,58,_,_,_,0,4,FV ), 0                         , 104, 0  , 3296 , 197, 132), // #803
+  INST(Vaddps           , VexRvm_Lx          , V(000F00,58,_,x,I,0,4,FV ), 0                         , 105, 0  , 3303 , 198, 131), // #804
+  INST(Vaddsd           , VexRvm             , V(F20F00,58,_,I,I,1,3,T1S), 0                         , 106, 0  , 3310 , 199, 133), // #805
+  INST(Vaddsh           , VexRvm             , E(F3MAP5,58,_,_,_,0,1,T1S), 0                         , 107, 0  , 3317 , 200, 134), // #806
+  INST(Vaddss           , VexRvm             , V(F30F00,58,_,I,I,0,2,T1S), 0                         , 108, 0  , 3324 , 201, 133), // #807
+  INST(Vaddsubpd        , VexRvm_Lx          , V(660F00,D0,_,x,I,_,_,_  ), 0                         , 69 , 0  , 3331 , 202, 135), // #808
+  INST(Vaddsubps        , VexRvm_Lx          , V(F20F00,D0,_,x,I,_,_,_  ), 0                         , 109, 0  , 3341 , 202, 135), // #809
+  INST(Vaesdec          , VexRvm_Lx          , V(660F38,DE,_,x,I,_,4,FVM), 0                         , 110, 0  , 3351 , 203, 136), // #810
+  INST(Vaesdeclast      , VexRvm_Lx          , V(660F38,DF,_,x,I,_,4,FVM), 0                         , 110, 0  , 3359 , 203, 136), // #811
+  INST(Vaesenc          , VexRvm_Lx          , V(660F38,DC,_,x,I,_,4,FVM), 0                         , 110, 0  , 3371 , 203, 136), // #812
+  INST(Vaesenclast      , VexRvm_Lx          , V(660F38,DD,_,x,I,_,4,FVM), 0                         , 110, 0  , 3379 , 203, 136), // #813
+  INST(Vaesimc          , VexRm              , V(660F38,DB,_,0,I,_,_,_  ), 0                         , 96 , 0  , 3391 , 204, 137), // #814
+  INST(Vaeskeygenassist , VexRmi             , V(660F3A,DF,_,0,I,_,_,_  ), 0                         , 73 , 0  , 3399 , 205, 137), // #815
+  INST(Valignd          , VexRvmi_Lx         , E(660F3A,03,_,x,_,0,4,FV ), 0                         , 111, 0  , 3416 , 206, 138), // #816
+  INST(Valignq          , VexRvmi_Lx         , E(660F3A,03,_,x,_,1,4,FV ), 0                         , 112, 0  , 3424 , 207, 138), // #817
+  INST(Vandnpd          , VexRvm_Lx          , V(660F00,55,_,x,I,1,4,FV ), 0                         , 103, 0  , 3432 , 208, 139), // #818
+  INST(Vandnps          , VexRvm_Lx          , V(000F00,55,_,x,I,0,4,FV ), 0                         , 105, 0  , 3440 , 209, 139), // #819
+  INST(Vandpd           , VexRvm_Lx          , V(660F00,54,_,x,I,1,4,FV ), 0                         , 103, 0  , 3448 , 210, 139), // #820
+  INST(Vandps           , VexRvm_Lx          , V(000F00,54,_,x,I,0,4,FV ), 0                         , 105, 0  , 3455 , 211, 139), // #821
+  INST(Vblendmpd        , VexRvm_Lx          , E(660F38,65,_,x,_,1,4,FV ), 0                         , 113, 0  , 3462 , 212, 138), // #822
+  INST(Vblendmps        , VexRvm_Lx          , E(660F38,65,_,x,_,0,4,FV ), 0                         , 114, 0  , 3472 , 213, 138), // #823
+  INST(Vblendpd         , VexRvmi_Lx         , V(660F3A,0D,_,x,I,_,_,_  ), 0                         , 73 , 0  , 3482 , 214, 135), // #824
+  INST(Vblendps         , VexRvmi_Lx         , V(660F3A,0C,_,x,I,_,_,_  ), 0                         , 73 , 0  , 3491 , 214, 135), // #825
+  INST(Vblendvpd        , VexRvmr_Lx         , V(660F3A,4B,_,x,0,_,_,_  ), 0                         , 73 , 0  , 3500 , 215, 135), // #826
+  INST(Vblendvps        , VexRvmr_Lx         , V(660F3A,4A,_,x,0,_,_,_  ), 0                         , 73 , 0  , 3510 , 215, 135), // #827
+  INST(Vbroadcastf128   , VexRm              , V(660F38,1A,_,1,0,_,_,_  ), 0                         , 115, 0  , 3520 , 216, 135), // #828
+  INST(Vbroadcastf32x2  , VexRm_Lx           , E(660F38,19,_,x,_,0,3,T2 ), 0                         , 116, 0  , 3535 , 217, 140), // #829
+  INST(Vbroadcastf32x4  , VexRm_Lx           , E(660F38,1A,_,x,_,0,4,T4 ), 0                         , 117, 0  , 3551 , 218, 68 ), // #830
+  INST(Vbroadcastf32x8  , VexRm              , E(660F38,1B,_,2,_,0,5,T8 ), 0                         , 118, 0  , 3567 , 219, 66 ), // #831
+  INST(Vbroadcastf64x2  , VexRm_Lx           , E(660F38,1A,_,x,_,1,4,T2 ), 0                         , 119, 0  , 3583 , 218, 140), // #832
+  INST(Vbroadcastf64x4  , VexRm              , E(660F38,1B,_,2,_,1,5,T4 ), 0                         , 120, 0  , 3599 , 219, 68 ), // #833
+  INST(Vbroadcasti128   , VexRm              , V(660F38,5A,_,1,0,_,_,_  ), 0                         , 115, 0  , 3615 , 216, 141), // #834
+  INST(Vbroadcasti32x2  , VexRm_Lx           , E(660F38,59,_,x,_,0,3,T2 ), 0                         , 116, 0  , 3630 , 220, 140), // #835
+  INST(Vbroadcasti32x4  , VexRm_Lx           , E(660F38,5A,_,x,_,0,4,T4 ), 0                         , 117, 0  , 3646 , 218, 138), // #836
+  INST(Vbroadcasti32x8  , VexRm              , E(660F38,5B,_,2,_,0,5,T8 ), 0                         , 118, 0  , 3662 , 219, 66 ), // #837
+  INST(Vbroadcasti64x2  , VexRm_Lx           , E(660F38,5A,_,x,_,1,4,T2 ), 0                         , 119, 0  , 3678 , 218, 140), // #838
+  INST(Vbroadcasti64x4  , VexRm              , E(660F38,5B,_,2,_,1,5,T4 ), 0                         , 120, 0  , 3694 , 219, 68 ), // #839
+  INST(Vbroadcastsd     , VexRm_Lx           , V(660F38,19,_,x,0,1,3,T1S), 0                         , 121, 0  , 3710 , 221, 142), // #840
+  INST(Vbroadcastss     , VexRm_Lx           , V(660F38,18,_,x,0,0,2,T1S), 0                         , 122, 0  , 3723 , 222, 142), // #841
+  INST(Vcmppd           , VexRvmi_Lx_KEvex   , V(660F00,C2,_,x,I,1,4,FV ), 0                         , 103, 0  , 3736 , 223, 131), // #842
+  INST(Vcmpph           , VexRvmi_Lx_KEvex   , E(000F3A,C2,_,_,_,0,4,FV ), 0                         , 123, 0  , 3743 , 224, 132), // #843
+  INST(Vcmpps           , VexRvmi_Lx_KEvex   , V(000F00,C2,_,x,I,0,4,FV ), 0                         , 105, 0  , 3750 , 225, 131), // #844
+  INST(Vcmpsd           , VexRvmi_KEvex      , V(F20F00,C2,_,I,I,1,3,T1S), 0                         , 106, 0  , 3757 , 226, 133), // #845
+  INST(Vcmpsh           , VexRvmi_KEvex      , E(F30F3A,C2,_,_,_,0,1,T1S), 0                         , 124, 0  , 3764 , 227, 134), // #846
+  INST(Vcmpss           , VexRvmi_KEvex      , V(F30F00,C2,_,I,I,0,2,T1S), 0                         , 108, 0  , 3771 , 228, 133), // #847
+  INST(Vcomisd          , VexRm              , V(660F00,2F,_,I,I,1,3,T1S), 0                         , 125, 0  , 3778 , 229, 143), // #848
+  INST(Vcomish          , VexRm              , E(00MAP5,2F,_,_,_,0,1,T1S), 0                         , 126, 0  , 3786 , 230, 134), // #849
+  INST(Vcomiss          , VexRm              , V(000F00,2F,_,I,I,0,2,T1S), 0                         , 127, 0  , 3794 , 231, 143), // #850
+  INST(Vcompresspd      , VexMr_Lx           , E(660F38,8A,_,x,_,1,3,T1S), 0                         , 128, 0  , 3802 , 232, 138), // #851
+  INST(Vcompressps      , VexMr_Lx           , E(660F38,8A,_,x,_,0,2,T1S), 0                         , 129, 0  , 3814 , 232, 138), // #852
+  INST(Vcvtdq2pd        , VexRm_Lx           , V(F30F00,E6,_,x,I,0,3,HV ), 0                         , 130, 0  , 3826 , 233, 131), // #853
+  INST(Vcvtdq2ph        , VexRm_Lx           , E(00MAP5,5B,_,_,_,0,4,FV ), 0                         , 104, 0  , 3836 , 234, 132), // #854
+  INST(Vcvtdq2ps        , VexRm_Lx           , V(000F00,5B,_,x,I,0,4,FV ), 0                         , 105, 0  , 3846 , 235, 131), // #855
+  INST(Vcvtne2ps2bf16   , VexRvm_Lx          , E(F20F38,72,_,_,_,0,4,FV ), 0                         , 131, 0  , 3856 , 213, 144), // #856
+  INST(Vcvtneps2bf16    , VexRm_Lx_Narrow    , E(F30F38,72,_,_,_,0,4,FV ), 0                         , 132, 0  , 3871 , 236, 144), // #857
+  INST(Vcvtpd2dq        , VexRm_Lx_Narrow    , V(F20F00,E6,_,x,I,1,4,FV ), 0                         , 133, 0  , 3885 , 237, 131), // #858
+  INST(Vcvtpd2ph        , VexRm_Lx           , E(66MAP5,5A,_,_,_,1,4,FV ), 0                         , 134, 0  , 3895 , 238, 132), // #859
+  INST(Vcvtpd2ps        , VexRm_Lx_Narrow    , V(660F00,5A,_,x,I,1,4,FV ), 0                         , 103, 0  , 3905 , 237, 131), // #860
+  INST(Vcvtpd2qq        , VexRm_Lx           , E(660F00,7B,_,x,_,1,4,FV ), 0                         , 135, 0  , 3915 , 239, 140), // #861
+  INST(Vcvtpd2udq       , VexRm_Lx_Narrow    , E(000F00,79,_,x,_,1,4,FV ), 0                         , 136, 0  , 3925 , 240, 138), // #862
+  INST(Vcvtpd2uqq       , VexRm_Lx           , E(660F00,79,_,x,_,1,4,FV ), 0                         , 135, 0  , 3936 , 239, 140), // #863
+  INST(Vcvtph2dq        , VexRm_Lx           , E(66MAP5,5B,_,_,_,0,3,HV ), 0                         , 137, 0  , 3947 , 241, 132), // #864
+  INST(Vcvtph2pd        , VexRm_Lx           , E(00MAP5,5A,_,_,_,0,2,QV ), 0                         , 138, 0  , 3957 , 242, 132), // #865
+  INST(Vcvtph2ps        , VexRm_Lx           , V(660F38,13,_,x,0,0,3,HVM), 0                         , 139, 0  , 3967 , 243, 145), // #866
+  INST(Vcvtph2psx       , VexRm_Lx           , E(66MAP6,13,_,_,_,0,3,HV ), 0                         , 140, 0  , 3977 , 244, 132), // #867
+  INST(Vcvtph2qq        , VexRm_Lx           , E(66MAP5,7B,_,_,_,0,2,QV ), 0                         , 141, 0  , 3988 , 245, 132), // #868
+  INST(Vcvtph2udq       , VexRm_Lx           , E(00MAP5,79,_,_,_,0,3,HV ), 0                         , 142, 0  , 3998 , 241, 132), // #869
+  INST(Vcvtph2uqq       , VexRm_Lx           , E(66MAP5,79,_,_,_,0,2,QV ), 0                         , 141, 0  , 4009 , 245, 132), // #870
+  INST(Vcvtph2uw        , VexRm_Lx           , E(00MAP5,7D,_,_,_,0,4,FV ), 0                         , 104, 0  , 4020 , 246, 132), // #871
+  INST(Vcvtph2w         , VexRm_Lx           , E(66MAP5,7D,_,_,_,0,4,FV ), 0                         , 143, 0  , 4030 , 246, 132), // #872
+  INST(Vcvtps2dq        , VexRm_Lx           , V(660F00,5B,_,x,I,0,4,FV ), 0                         , 144, 0  , 4039 , 235, 131), // #873
+  INST(Vcvtps2pd        , VexRm_Lx           , V(000F00,5A,_,x,I,0,3,HV ), 0                         , 145, 0  , 4049 , 247, 131), // #874
+  INST(Vcvtps2ph        , VexMri_Lx          , V(660F3A,1D,_,x,0,0,3,HVM), 0                         , 146, 0  , 4059 , 248, 145), // #875
+  INST(Vcvtps2phx       , VexRm_Lx           , E(66MAP5,1D,_,_,_,0,4,FV ), 0                         , 143, 0  , 4069 , 234, 132), // #876
+  INST(Vcvtps2qq        , VexRm_Lx           , E(660F00,7B,_,x,_,0,3,HV ), 0                         , 147, 0  , 4080 , 249, 140), // #877
+  INST(Vcvtps2udq       , VexRm_Lx           , E(000F00,79,_,x,_,0,4,FV ), 0                         , 148, 0  , 4090 , 250, 138), // #878
+  INST(Vcvtps2uqq       , VexRm_Lx           , E(660F00,79,_,x,_,0,3,HV ), 0                         , 147, 0  , 4101 , 249, 140), // #879
+  INST(Vcvtqq2pd        , VexRm_Lx           , E(F30F00,E6,_,x,_,1,4,FV ), 0                         , 149, 0  , 4112 , 239, 140), // #880
+  INST(Vcvtqq2ph        , VexRm_Lx           , E(00MAP5,5B,_,_,_,1,4,FV ), 0                         , 150, 0  , 4122 , 238, 132), // #881
+  INST(Vcvtqq2ps        , VexRm_Lx_Narrow    , E(000F00,5B,_,x,_,1,4,FV ), 0                         , 136, 0  , 4132 , 240, 140), // #882
+  INST(Vcvtsd2sh        , VexRvm             , E(F2MAP5,5A,_,_,_,1,3,T1S), 0                         , 151, 0  , 4142 , 251, 134), // #883
+  INST(Vcvtsd2si        , VexRm_Wx           , V(F20F00,2D,_,I,x,x,3,T1F), 0                         , 152, 0  , 4152 , 252, 133), // #884
+  INST(Vcvtsd2ss        , VexRvm             , V(F20F00,5A,_,I,I,1,3,T1S), 0                         , 106, 0  , 4162 , 199, 133), // #885
+  INST(Vcvtsd2usi       , VexRm_Wx           , E(F20F00,79,_,I,_,x,3,T1F), 0                         , 153, 0  , 4172 , 253, 68 ), // #886
+  INST(Vcvtsh2sd        , VexRvm             , E(F3MAP5,5A,_,_,_,0,1,T1S), 0                         , 107, 0  , 4183 , 254, 134), // #887
+  INST(Vcvtsh2si        , VexRm_Wx           , E(F3MAP5,2D,_,_,_,x,1,T1S), 0                         , 107, 0  , 4193 , 255, 134), // #888
+  INST(Vcvtsh2ss        , VexRvm             , E(00MAP6,13,_,_,_,0,1,T1S), 0                         , 154, 0  , 4203 , 254, 134), // #889
+  INST(Vcvtsh2usi       , VexRm_Wx           , E(F3MAP5,79,_,_,_,x,1,T1S), 0                         , 107, 0  , 4213 , 255, 134), // #890
+  INST(Vcvtsi2sd        , VexRvm_Wx          , V(F20F00,2A,_,I,x,x,2,T1W), 0                         , 155, 0  , 4224 , 256, 133), // #891
+  INST(Vcvtsi2sh        , VexRvm_Wx          , E(F3MAP5,2A,_,_,_,x,2,T1W), 0                         , 156, 0  , 4234 , 257, 134), // #892
+  INST(Vcvtsi2ss        , VexRvm_Wx          , V(F30F00,2A,_,I,x,x,2,T1W), 0                         , 157, 0  , 4244 , 256, 133), // #893
+  INST(Vcvtss2sd        , VexRvm             , V(F30F00,5A,_,I,I,0,2,T1S), 0                         , 108, 0  , 4254 , 258, 133), // #894
+  INST(Vcvtss2sh        , VexRvm             , E(00MAP5,1D,_,_,_,0,2,T1S), 0                         , 158, 0  , 4264 , 259, 134), // #895
+  INST(Vcvtss2si        , VexRm_Wx           , V(F30F00,2D,_,I,x,x,2,T1F), 0                         , 108, 0  , 4274 , 260, 133), // #896
+  INST(Vcvtss2usi       , VexRm_Wx           , E(F30F00,79,_,I,_,x,2,T1F), 0                         , 159, 0  , 4284 , 261, 68 ), // #897
+  INST(Vcvttpd2dq       , VexRm_Lx_Narrow    , V(660F00,E6,_,x,I,1,4,FV ), 0                         , 103, 0  , 4295 , 262, 131), // #898
+  INST(Vcvttpd2qq       , VexRm_Lx           , E(660F00,7A,_,x,_,1,4,FV ), 0                         , 135, 0  , 4306 , 263, 138), // #899
+  INST(Vcvttpd2udq      , VexRm_Lx_Narrow    , E(000F00,78,_,x,_,1,4,FV ), 0                         , 136, 0  , 4317 , 264, 138), // #900
+  INST(Vcvttpd2uqq      , VexRm_Lx           , E(660F00,78,_,x,_,1,4,FV ), 0                         , 135, 0  , 4329 , 263, 140), // #901
+  INST(Vcvttph2dq       , VexRm_Lx           , E(F3MAP5,5B,_,_,_,0,3,HV ), 0                         , 160, 0  , 4341 , 244, 132), // #902
+  INST(Vcvttph2qq       , VexRm_Lx           , E(66MAP5,7A,_,_,_,0,2,QV ), 0                         , 141, 0  , 4352 , 242, 132), // #903
+  INST(Vcvttph2udq      , VexRm_Lx           , E(00MAP5,78,_,_,_,0,3,HV ), 0                         , 142, 0  , 4363 , 244, 132), // #904
+  INST(Vcvttph2uqq      , VexRm_Lx           , E(66MAP5,78,_,_,_,0,2,QV ), 0                         , 141, 0  , 4375 , 242, 132), // #905
+  INST(Vcvttph2uw       , VexRm_Lx           , E(00MAP5,7C,_,_,_,0,4,FV ), 0                         , 104, 0  , 4387 , 265, 132), // #906
+  INST(Vcvttph2w        , VexRm_Lx           , E(66MAP5,7C,_,_,_,0,4,FV ), 0                         , 143, 0  , 4398 , 265, 132), // #907
+  INST(Vcvttps2dq       , VexRm_Lx           , V(F30F00,5B,_,x,I,0,4,FV ), 0                         , 161, 0  , 4408 , 266, 131), // #908
+  INST(Vcvttps2qq       , VexRm_Lx           , E(660F00,7A,_,x,_,0,3,HV ), 0                         , 147, 0  , 4419 , 267, 140), // #909
+  INST(Vcvttps2udq      , VexRm_Lx           , E(000F00,78,_,x,_,0,4,FV ), 0                         , 148, 0  , 4430 , 268, 138), // #910
+  INST(Vcvttps2uqq      , VexRm_Lx           , E(660F00,78,_,x,_,0,3,HV ), 0                         , 147, 0  , 4442 , 267, 140), // #911
+  INST(Vcvttsd2si       , VexRm_Wx           , V(F20F00,2C,_,I,x,x,3,T1F), 0                         , 152, 0  , 4454 , 269, 133), // #912
+  INST(Vcvttsd2usi      , VexRm_Wx           , E(F20F00,78,_,I,_,x,3,T1F), 0                         , 153, 0  , 4465 , 270, 68 ), // #913
+  INST(Vcvttsh2si       , VexRm_Wx           , E(F3MAP5,2C,_,_,_,x,1,T1S), 0                         , 107, 0  , 4477 , 271, 134), // #914
+  INST(Vcvttsh2usi      , VexRm_Wx           , E(F3MAP5,78,_,_,_,x,1,T1S), 0                         , 107, 0  , 4488 , 271, 134), // #915
+  INST(Vcvttss2si       , VexRm_Wx           , V(F30F00,2C,_,I,x,x,2,T1F), 0                         , 108, 0  , 4500 , 272, 133), // #916
+  INST(Vcvttss2usi      , VexRm_Wx           , E(F30F00,78,_,I,_,x,2,T1F), 0                         , 159, 0  , 4511 , 273, 68 ), // #917
+  INST(Vcvtudq2pd       , VexRm_Lx           , E(F30F00,7A,_,x,_,0,3,HV ), 0                         , 162, 0  , 4523 , 274, 138), // #918
+  INST(Vcvtudq2ph       , VexRm_Lx           , E(F2MAP5,7A,_,_,_,0,4,FV ), 0                         , 163, 0  , 4534 , 234, 132), // #919
+  INST(Vcvtudq2ps       , VexRm_Lx           , E(F20F00,7A,_,x,_,0,4,FV ), 0                         , 164, 0  , 4545 , 250, 138), // #920
+  INST(Vcvtuqq2pd       , VexRm_Lx           , E(F30F00,7A,_,x,_,1,4,FV ), 0                         , 149, 0  , 4556 , 239, 140), // #921
+  INST(Vcvtuqq2ph       , VexRm_Lx           , E(F2MAP5,7A,_,_,_,1,4,FV ), 0                         , 165, 0  , 4567 , 238, 132), // #922
+  INST(Vcvtuqq2ps       , VexRm_Lx_Narrow    , E(F20F00,7A,_,x,_,1,4,FV ), 0                         , 166, 0  , 4578 , 240, 140), // #923
+  INST(Vcvtusi2sd       , VexRvm_Wx          , E(F20F00,7B,_,I,_,x,2,T1W), 0                         , 167, 0  , 4589 , 257, 68 ), // #924
+  INST(Vcvtusi2sh       , VexRvm_Wx          , E(F3MAP5,7B,_,_,_,x,2,T1W), 0                         , 156, 0  , 4600 , 257, 134), // #925
+  INST(Vcvtusi2ss       , VexRvm_Wx          , E(F30F00,7B,_,I,_,x,2,T1W), 0                         , 168, 0  , 4611 , 257, 68 ), // #926
+  INST(Vcvtuw2ph        , VexRm_Lx           , E(F2MAP5,7D,_,_,_,0,4,FV ), 0                         , 163, 0  , 4622 , 246, 132), // #927
+  INST(Vcvtw2ph         , VexRm_Lx           , E(F3MAP5,7D,_,_,_,0,4,FV ), 0                         , 169, 0  , 4632 , 246, 132), // #928
+  INST(Vdbpsadbw        , VexRvmi_Lx         , E(660F3A,42,_,x,_,0,4,FVM), 0                         , 111, 0  , 4641 , 275, 146), // #929
+  INST(Vdivpd           , VexRvm_Lx          , V(660F00,5E,_,x,I,1,4,FV ), 0                         , 103, 0  , 4651 , 196, 131), // #930
+  INST(Vdivph           , VexRvm_Lx          , E(00MAP5,5E,_,_,_,0,4,FV ), 0                         , 104, 0  , 4658 , 197, 132), // #931
+  INST(Vdivps           , VexRvm_Lx          , V(000F00,5E,_,x,I,0,4,FV ), 0                         , 105, 0  , 4665 , 198, 131), // #932
+  INST(Vdivsd           , VexRvm             , V(F20F00,5E,_,I,I,1,3,T1S), 0                         , 106, 0  , 4672 , 199, 133), // #933
+  INST(Vdivsh           , VexRvm             , E(F3MAP5,5E,_,_,_,0,1,T1S), 0                         , 107, 0  , 4679 , 200, 134), // #934
+  INST(Vdivss           , VexRvm             , V(F30F00,5E,_,I,I,0,2,T1S), 0                         , 108, 0  , 4686 , 201, 133), // #935
+  INST(Vdpbf16ps        , VexRvm_Lx          , E(F30F38,52,_,_,_,0,4,FV ), 0                         , 132, 0  , 4693 , 213, 144), // #936
+  INST(Vdppd            , VexRvmi_Lx         , V(660F3A,41,_,x,I,_,_,_  ), 0                         , 73 , 0  , 4703 , 276, 135), // #937
+  INST(Vdpps            , VexRvmi_Lx         , V(660F3A,40,_,x,I,_,_,_  ), 0                         , 73 , 0  , 4709 , 214, 135), // #938
+  INST(Verr             , X86M_NoSize        , O(000F00,00,4,_,_,_,_,_  ), 0                         , 97 , 0  , 4715 , 107, 10 ), // #939
+  INST(Verw             , X86M_NoSize        , O(000F00,00,5,_,_,_,_,_  ), 0                         , 77 , 0  , 4720 , 107, 10 ), // #940
+  INST(Vexp2pd          , VexRm              , E(660F38,C8,_,2,_,1,4,FV ), 0                         , 170, 0  , 4725 , 277, 147), // #941
+  INST(Vexp2ps          , VexRm              , E(660F38,C8,_,2,_,0,4,FV ), 0                         , 171, 0  , 4733 , 278, 147), // #942
+  INST(Vexpandpd        , VexRm_Lx           , E(660F38,88,_,x,_,1,3,T1S), 0                         , 128, 0  , 4741 , 279, 138), // #943
+  INST(Vexpandps        , VexRm_Lx           , E(660F38,88,_,x,_,0,2,T1S), 0                         , 129, 0  , 4751 , 279, 138), // #944
+  INST(Vextractf128     , VexMri             , V(660F3A,19,_,1,0,_,_,_  ), 0                         , 172, 0  , 4761 , 280, 135), // #945
+  INST(Vextractf32x4    , VexMri_Lx          , E(660F3A,19,_,x,_,0,4,T4 ), 0                         , 173, 0  , 4774 , 281, 138), // #946
+  INST(Vextractf32x8    , VexMri             , E(660F3A,1B,_,2,_,0,5,T8 ), 0                         , 174, 0  , 4788 , 282, 66 ), // #947
+  INST(Vextractf64x2    , VexMri_Lx          , E(660F3A,19,_,x,_,1,4,T2 ), 0                         , 175, 0  , 4802 , 281, 140), // #948
+  INST(Vextractf64x4    , VexMri             , E(660F3A,1B,_,2,_,1,5,T4 ), 0                         , 176, 0  , 4816 , 282, 68 ), // #949
+  INST(Vextracti128     , VexMri             , V(660F3A,39,_,1,0,_,_,_  ), 0                         , 172, 0  , 4830 , 280, 141), // #950
+  INST(Vextracti32x4    , VexMri_Lx          , E(660F3A,39,_,x,_,0,4,T4 ), 0                         , 173, 0  , 4843 , 281, 138), // #951
+  INST(Vextracti32x8    , VexMri             , E(660F3A,3B,_,2,_,0,5,T8 ), 0                         , 174, 0  , 4857 , 282, 66 ), // #952
+  INST(Vextracti64x2    , VexMri_Lx          , E(660F3A,39,_,x,_,1,4,T2 ), 0                         , 175, 0  , 4871 , 281, 140), // #953
+  INST(Vextracti64x4    , VexMri             , E(660F3A,3B,_,2,_,1,5,T4 ), 0                         , 176, 0  , 4885 , 282, 68 ), // #954
+  INST(Vextractps       , VexMri             , V(660F3A,17,_,0,I,I,2,T1S), 0                         , 177, 0  , 4899 , 283, 133), // #955
+  INST(Vfcmaddcph       , VexRvm_Lx          , E(F2MAP6,56,_,_,_,0,4,FV ), 0                         , 178, 0  , 4910 , 284, 132), // #956
+  INST(Vfcmaddcsh       , VexRvm             , E(F2MAP6,57,_,_,_,0,2,T1S), 0                         , 179, 0  , 4921 , 259, 132), // #957
+  INST(Vfcmulcph        , VexRvm_Lx          , E(F2MAP6,D6,_,_,_,0,4,FV ), 0                         , 178, 0  , 4932 , 284, 132), // #958
+  INST(Vfcmulcsh        , VexRvm             , E(F2MAP6,D7,_,_,_,0,2,T1S), 0                         , 179, 0  , 4942 , 259, 132), // #959
+  INST(Vfixupimmpd      , VexRvmi_Lx         , E(660F3A,54,_,x,_,1,4,FV ), 0                         , 112, 0  , 4952 , 285, 138), // #960
+  INST(Vfixupimmps      , VexRvmi_Lx         , E(660F3A,54,_,x,_,0,4,FV ), 0                         , 111, 0  , 4964 , 286, 138), // #961
+  INST(Vfixupimmsd      , VexRvmi            , E(660F3A,55,_,I,_,1,3,T1S), 0                         , 180, 0  , 4976 , 287, 68 ), // #962
+  INST(Vfixupimmss      , VexRvmi            , E(660F3A,55,_,I,_,0,2,T1S), 0                         , 181, 0  , 4988 , 288, 68 ), // #963
+  INST(Vfmadd132pd      , VexRvm_Lx          , V(660F38,98,_,x,1,1,4,FV ), 0                         , 182, 0  , 5000 , 196, 148), // #964
+  INST(Vfmadd132ph      , VexRvm_Lx          , E(66MAP6,98,_,_,_,0,4,FV ), 0                         , 183, 0  , 5012 , 197, 132), // #965
+  INST(Vfmadd132ps      , VexRvm_Lx          , V(660F38,98,_,x,0,0,4,FV ), 0                         , 110, 0  , 5024 , 198, 148), // #966
+  INST(Vfmadd132sd      , VexRvm             , V(660F38,99,_,I,1,1,3,T1S), 0                         , 184, 0  , 5036 , 199, 149), // #967
+  INST(Vfmadd132sh      , VexRvm             , E(66MAP6,99,_,_,_,0,1,T1S), 0                         , 185, 0  , 5048 , 200, 134), // #968
+  INST(Vfmadd132ss      , VexRvm             , V(660F38,99,_,I,0,0,2,T1S), 0                         , 122, 0  , 5060 , 201, 149), // #969
+  INST(Vfmadd213pd      , VexRvm_Lx          , V(660F38,A8,_,x,1,1,4,FV ), 0                         , 182, 0  , 5072 , 196, 148), // #970
+  INST(Vfmadd213ph      , VexRvm_Lx          , E(66MAP6,A8,_,_,_,0,4,FV ), 0                         , 183, 0  , 5084 , 197, 132), // #971
+  INST(Vfmadd213ps      , VexRvm_Lx          , V(660F38,A8,_,x,0,0,4,FV ), 0                         , 110, 0  , 5096 , 198, 148), // #972
+  INST(Vfmadd213sd      , VexRvm             , V(660F38,A9,_,I,1,1,3,T1S), 0                         , 184, 0  , 5108 , 199, 149), // #973
+  INST(Vfmadd213sh      , VexRvm             , E(66MAP6,A9,_,_,_,0,1,T1S), 0                         , 185, 0  , 5120 , 200, 134), // #974
+  INST(Vfmadd213ss      , VexRvm             , V(660F38,A9,_,I,0,0,2,T1S), 0                         , 122, 0  , 5132 , 201, 149), // #975
+  INST(Vfmadd231pd      , VexRvm_Lx          , V(660F38,B8,_,x,1,1,4,FV ), 0                         , 182, 0  , 5144 , 196, 148), // #976
+  INST(Vfmadd231ph      , VexRvm_Lx          , E(66MAP6,B8,_,_,_,0,4,FV ), 0                         , 183, 0  , 5156 , 197, 132), // #977
+  INST(Vfmadd231ps      , VexRvm_Lx          , V(660F38,B8,_,x,0,0,4,FV ), 0                         , 110, 0  , 5168 , 198, 148), // #978
+  INST(Vfmadd231sd      , VexRvm             , V(660F38,B9,_,I,1,1,3,T1S), 0                         , 184, 0  , 5180 , 199, 149), // #979
+  INST(Vfmadd231sh      , VexRvm             , E(66MAP6,B9,_,_,_,0,1,T1S), 0                         , 185, 0  , 5192 , 200, 134), // #980
+  INST(Vfmadd231ss      , VexRvm             , V(660F38,B9,_,I,0,0,2,T1S), 0                         , 122, 0  , 5204 , 201, 149), // #981
+  INST(Vfmaddcph        , VexRvm_Lx          , E(F3MAP6,56,_,_,_,0,4,FV ), 0                         , 186, 0  , 5216 , 284, 132), // #982
+  INST(Vfmaddcsh        , VexRvm             , E(F3MAP6,57,_,_,_,0,2,T1S), 0                         , 187, 0  , 5226 , 259, 132), // #983
+  INST(Vfmaddpd         , Fma4_Lx            , V(660F3A,69,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5236 , 289, 150), // #984
+  INST(Vfmaddps         , Fma4_Lx            , V(660F3A,68,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5245 , 289, 150), // #985
+  INST(Vfmaddsd         , Fma4               , V(660F3A,6B,_,0,x,_,_,_  ), 0                         , 73 , 0  , 5254 , 290, 150), // #986
+  INST(Vfmaddss         , Fma4               , V(660F3A,6A,_,0,x,_,_,_  ), 0                         , 73 , 0  , 5263 , 291, 150), // #987
+  INST(Vfmaddsub132pd   , VexRvm_Lx          , V(660F38,96,_,x,1,1,4,FV ), 0                         , 182, 0  , 5272 , 196, 148), // #988
+  INST(Vfmaddsub132ph   , VexRvm_Lx          , E(66MAP6,96,_,_,_,0,4,FV ), 0                         , 183, 0  , 5287 , 197, 132), // #989
+  INST(Vfmaddsub132ps   , VexRvm_Lx          , V(660F38,96,_,x,0,0,4,FV ), 0                         , 110, 0  , 5302 , 198, 148), // #990
+  INST(Vfmaddsub213pd   , VexRvm_Lx          , V(660F38,A6,_,x,1,1,4,FV ), 0                         , 182, 0  , 5317 , 196, 148), // #991
+  INST(Vfmaddsub213ph   , VexRvm_Lx          , E(66MAP6,A6,_,_,_,0,4,FV ), 0                         , 183, 0  , 5332 , 197, 132), // #992
+  INST(Vfmaddsub213ps   , VexRvm_Lx          , V(660F38,A6,_,x,0,0,4,FV ), 0                         , 110, 0  , 5347 , 198, 148), // #993
+  INST(Vfmaddsub231pd   , VexRvm_Lx          , V(660F38,B6,_,x,1,1,4,FV ), 0                         , 182, 0  , 5362 , 196, 148), // #994
+  INST(Vfmaddsub231ph   , VexRvm_Lx          , E(66MAP6,B6,_,_,_,0,4,FV ), 0                         , 183, 0  , 5377 , 197, 132), // #995
+  INST(Vfmaddsub231ps   , VexRvm_Lx          , V(660F38,B6,_,x,0,0,4,FV ), 0                         , 110, 0  , 5392 , 198, 148), // #996
+  INST(Vfmaddsubpd      , Fma4_Lx            , V(660F3A,5D,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5407 , 289, 150), // #997
+  INST(Vfmaddsubps      , Fma4_Lx            , V(660F3A,5C,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5419 , 289, 150), // #998
+  INST(Vfmsub132pd      , VexRvm_Lx          , V(660F38,9A,_,x,1,1,4,FV ), 0                         , 182, 0  , 5431 , 196, 148), // #999
+  INST(Vfmsub132ph      , VexRvm_Lx          , E(66MAP6,9A,_,_,_,0,4,FV ), 0                         , 183, 0  , 5443 , 197, 132), // #1000
+  INST(Vfmsub132ps      , VexRvm_Lx          , V(660F38,9A,_,x,0,0,4,FV ), 0                         , 110, 0  , 5455 , 198, 148), // #1001
+  INST(Vfmsub132sd      , VexRvm             , V(660F38,9B,_,I,1,1,3,T1S), 0                         , 184, 0  , 5467 , 199, 149), // #1002
+  INST(Vfmsub132sh      , VexRvm             , E(66MAP6,9B,_,_,_,0,1,T1S), 0                         , 185, 0  , 5479 , 200, 134), // #1003
+  INST(Vfmsub132ss      , VexRvm             , V(660F38,9B,_,I,0,0,2,T1S), 0                         , 122, 0  , 5491 , 201, 149), // #1004
+  INST(Vfmsub213pd      , VexRvm_Lx          , V(660F38,AA,_,x,1,1,4,FV ), 0                         , 182, 0  , 5503 , 196, 148), // #1005
+  INST(Vfmsub213ph      , VexRvm_Lx          , E(66MAP6,AA,_,_,_,0,4,FV ), 0                         , 183, 0  , 5515 , 197, 132), // #1006
+  INST(Vfmsub213ps      , VexRvm_Lx          , V(660F38,AA,_,x,0,0,4,FV ), 0                         , 110, 0  , 5527 , 198, 148), // #1007
+  INST(Vfmsub213sd      , VexRvm             , V(660F38,AB,_,I,1,1,3,T1S), 0                         , 184, 0  , 5539 , 199, 149), // #1008
+  INST(Vfmsub213sh      , VexRvm             , E(66MAP6,AB,_,_,_,0,1,T1S), 0                         , 185, 0  , 5551 , 200, 134), // #1009
+  INST(Vfmsub213ss      , VexRvm             , V(660F38,AB,_,I,0,0,2,T1S), 0                         , 122, 0  , 5563 , 201, 149), // #1010
+  INST(Vfmsub231pd      , VexRvm_Lx          , V(660F38,BA,_,x,1,1,4,FV ), 0                         , 182, 0  , 5575 , 196, 148), // #1011
+  INST(Vfmsub231ph      , VexRvm_Lx          , E(66MAP6,BA,_,_,_,0,4,FV ), 0                         , 183, 0  , 5587 , 197, 132), // #1012
+  INST(Vfmsub231ps      , VexRvm_Lx          , V(660F38,BA,_,x,0,0,4,FV ), 0                         , 110, 0  , 5599 , 198, 148), // #1013
+  INST(Vfmsub231sd      , VexRvm             , V(660F38,BB,_,I,1,1,3,T1S), 0                         , 184, 0  , 5611 , 199, 149), // #1014
+  INST(Vfmsub231sh      , VexRvm             , E(66MAP6,BB,_,_,_,0,1,T1S), 0                         , 185, 0  , 5623 , 200, 134), // #1015
+  INST(Vfmsub231ss      , VexRvm             , V(660F38,BB,_,I,0,0,2,T1S), 0                         , 122, 0  , 5635 , 201, 149), // #1016
+  INST(Vfmsubadd132pd   , VexRvm_Lx          , V(660F38,97,_,x,1,1,4,FV ), 0                         , 182, 0  , 5647 , 196, 148), // #1017
+  INST(Vfmsubadd132ph   , VexRvm_Lx          , E(66MAP6,97,_,_,_,0,4,FV ), 0                         , 183, 0  , 5662 , 197, 132), // #1018
+  INST(Vfmsubadd132ps   , VexRvm_Lx          , V(660F38,97,_,x,0,0,4,FV ), 0                         , 110, 0  , 5677 , 198, 148), // #1019
+  INST(Vfmsubadd213pd   , VexRvm_Lx          , V(660F38,A7,_,x,1,1,4,FV ), 0                         , 182, 0  , 5692 , 196, 148), // #1020
+  INST(Vfmsubadd213ph   , VexRvm_Lx          , E(66MAP6,A7,_,_,_,0,4,FV ), 0                         , 183, 0  , 5707 , 197, 132), // #1021
+  INST(Vfmsubadd213ps   , VexRvm_Lx          , V(660F38,A7,_,x,0,0,4,FV ), 0                         , 110, 0  , 5722 , 198, 148), // #1022
+  INST(Vfmsubadd231pd   , VexRvm_Lx          , V(660F38,B7,_,x,1,1,4,FV ), 0                         , 182, 0  , 5737 , 196, 148), // #1023
+  INST(Vfmsubadd231ph   , VexRvm_Lx          , E(66MAP6,B7,_,_,_,0,4,FV ), 0                         , 183, 0  , 5752 , 197, 132), // #1024
+  INST(Vfmsubadd231ps   , VexRvm_Lx          , V(660F38,B7,_,x,0,0,4,FV ), 0                         , 110, 0  , 5767 , 198, 148), // #1025
+  INST(Vfmsubaddpd      , Fma4_Lx            , V(660F3A,5F,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5782 , 289, 150), // #1026
+  INST(Vfmsubaddps      , Fma4_Lx            , V(660F3A,5E,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5794 , 289, 150), // #1027
+  INST(Vfmsubpd         , Fma4_Lx            , V(660F3A,6D,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5806 , 289, 150), // #1028
+  INST(Vfmsubps         , Fma4_Lx            , V(660F3A,6C,_,x,x,_,_,_  ), 0                         , 73 , 0  , 5815 , 289, 150), // #1029
+  INST(Vfmsubsd         , Fma4               , V(660F3A,6F,_,0,x,_,_,_  ), 0                         , 73 , 0  , 5824 , 290, 150), // #1030
+  INST(Vfmsubss         , Fma4               , V(660F3A,6E,_,0,x,_,_,_  ), 0                         , 73 , 0  , 5833 , 291, 150), // #1031
+  INST(Vfmulcph         , VexRvm_Lx          , E(F3MAP6,D6,_,_,_,0,4,FV ), 0                         , 186, 0  , 5842 , 284, 132), // #1032
+  INST(Vfmulcsh         , VexRvm             , E(F3MAP6,D7,_,_,_,0,2,T1S), 0                         , 187, 0  , 5851 , 259, 132), // #1033
+  INST(Vfnmadd132pd     , VexRvm_Lx          , V(660F38,9C,_,x,1,1,4,FV ), 0                         , 182, 0  , 5860 , 196, 148), // #1034
+  INST(Vfnmadd132ph     , VexRvm_Lx          , E(66MAP6,9C,_,_,_,0,4,FV ), 0                         , 183, 0  , 5873 , 197, 132), // #1035
+  INST(Vfnmadd132ps     , VexRvm_Lx          , V(660F38,9C,_,x,0,0,4,FV ), 0                         , 110, 0  , 5886 , 198, 148), // #1036
+  INST(Vfnmadd132sd     , VexRvm             , V(660F38,9D,_,I,1,1,3,T1S), 0                         , 184, 0  , 5899 , 199, 149), // #1037
+  INST(Vfnmadd132sh     , VexRvm             , E(66MAP6,9D,_,_,_,0,1,T1S), 0                         , 185, 0  , 5912 , 200, 134), // #1038
+  INST(Vfnmadd132ss     , VexRvm             , V(660F38,9D,_,I,0,0,2,T1S), 0                         , 122, 0  , 5925 , 201, 149), // #1039
+  INST(Vfnmadd213pd     , VexRvm_Lx          , V(660F38,AC,_,x,1,1,4,FV ), 0                         , 182, 0  , 5938 , 196, 148), // #1040
+  INST(Vfnmadd213ph     , VexRvm_Lx          , E(66MAP6,AC,_,_,_,0,4,FV ), 0                         , 183, 0  , 5951 , 197, 132), // #1041
+  INST(Vfnmadd213ps     , VexRvm_Lx          , V(660F38,AC,_,x,0,0,4,FV ), 0                         , 110, 0  , 5964 , 198, 148), // #1042
+  INST(Vfnmadd213sd     , VexRvm             , V(660F38,AD,_,I,1,1,3,T1S), 0                         , 184, 0  , 5977 , 199, 149), // #1043
+  INST(Vfnmadd213sh     , VexRvm             , E(66MAP6,AD,_,_,_,0,1,T1S), 0                         , 185, 0  , 5990 , 200, 134), // #1044
+  INST(Vfnmadd213ss     , VexRvm             , V(660F38,AD,_,I,0,0,2,T1S), 0                         , 122, 0  , 6003 , 201, 149), // #1045
+  INST(Vfnmadd231pd     , VexRvm_Lx          , V(660F38,BC,_,x,1,1,4,FV ), 0                         , 182, 0  , 6016 , 196, 148), // #1046
+  INST(Vfnmadd231ph     , VexRvm_Lx          , E(66MAP6,BC,_,_,_,0,4,FV ), 0                         , 183, 0  , 6029 , 197, 132), // #1047
+  INST(Vfnmadd231ps     , VexRvm_Lx          , V(660F38,BC,_,x,0,0,4,FV ), 0                         , 110, 0  , 6042 , 198, 148), // #1048
+  INST(Vfnmadd231sd     , VexRvm             , V(660F38,BD,_,I,1,1,3,T1S), 0                         , 184, 0  , 6055 , 199, 149), // #1049
+  INST(Vfnmadd231sh     , VexRvm             , E(66MAP6,BD,_,_,_,0,1,T1S), 0                         , 185, 0  , 6068 , 200, 134), // #1050
+  INST(Vfnmadd231ss     , VexRvm             , V(660F38,BD,_,I,0,0,2,T1S), 0                         , 122, 0  , 6081 , 201, 149), // #1051
+  INST(Vfnmaddpd        , Fma4_Lx            , V(660F3A,79,_,x,x,_,_,_  ), 0                         , 73 , 0  , 6094 , 289, 150), // #1052
+  INST(Vfnmaddps        , Fma4_Lx            , V(660F3A,78,_,x,x,_,_,_  ), 0                         , 73 , 0  , 6104 , 289, 150), // #1053
+  INST(Vfnmaddsd        , Fma4               , V(660F3A,7B,_,0,x,_,_,_  ), 0                         , 73 , 0  , 6114 , 290, 150), // #1054
+  INST(Vfnmaddss        , Fma4               , V(660F3A,7A,_,0,x,_,_,_  ), 0                         , 73 , 0  , 6124 , 291, 150), // #1055
+  INST(Vfnmsub132pd     , VexRvm_Lx          , V(660F38,9E,_,x,1,1,4,FV ), 0                         , 182, 0  , 6134 , 196, 148), // #1056
+  INST(Vfnmsub132ph     , VexRvm_Lx          , E(66MAP6,9E,_,_,_,0,4,FV ), 0                         , 183, 0  , 6147 , 197, 132), // #1057
+  INST(Vfnmsub132ps     , VexRvm_Lx          , V(660F38,9E,_,x,0,0,4,FV ), 0                         , 110, 0  , 6160 , 198, 148), // #1058
+  INST(Vfnmsub132sd     , VexRvm             , V(660F38,9F,_,I,1,1,3,T1S), 0                         , 184, 0  , 6173 , 199, 149), // #1059
+  INST(Vfnmsub132sh     , VexRvm             , E(66MAP6,9F,_,_,_,0,1,T1S), 0                         , 185, 0  , 6186 , 200, 134), // #1060
+  INST(Vfnmsub132ss     , VexRvm             , V(660F38,9F,_,I,0,0,2,T1S), 0                         , 122, 0  , 6199 , 201, 149), // #1061
+  INST(Vfnmsub213pd     , VexRvm_Lx          , V(660F38,AE,_,x,1,1,4,FV ), 0                         , 182, 0  , 6212 , 196, 148), // #1062
+  INST(Vfnmsub213ph     , VexRvm_Lx          , E(66MAP6,AE,_,_,_,0,4,FV ), 0                         , 183, 0  , 6225 , 197, 132), // #1063
+  INST(Vfnmsub213ps     , VexRvm_Lx          , V(660F38,AE,_,x,0,0,4,FV ), 0                         , 110, 0  , 6238 , 198, 148), // #1064
+  INST(Vfnmsub213sd     , VexRvm             , V(660F38,AF,_,I,1,1,3,T1S), 0                         , 184, 0  , 6251 , 199, 149), // #1065
+  INST(Vfnmsub213sh     , VexRvm             , E(66MAP6,AF,_,_,_,0,1,T1S), 0                         , 185, 0  , 6264 , 200, 134), // #1066
+  INST(Vfnmsub213ss     , VexRvm             , V(660F38,AF,_,I,0,0,2,T1S), 0                         , 122, 0  , 6277 , 201, 149), // #1067
+  INST(Vfnmsub231pd     , VexRvm_Lx          , V(660F38,BE,_,x,1,1,4,FV ), 0                         , 182, 0  , 6290 , 196, 148), // #1068
+  INST(Vfnmsub231ph     , VexRvm_Lx          , E(66MAP6,BE,_,_,_,0,4,FV ), 0                         , 183, 0  , 6303 , 197, 132), // #1069
+  INST(Vfnmsub231ps     , VexRvm_Lx          , V(660F38,BE,_,x,0,0,4,FV ), 0                         , 110, 0  , 6316 , 198, 148), // #1070
+  INST(Vfnmsub231sd     , VexRvm             , V(660F38,BF,_,I,1,1,3,T1S), 0                         , 184, 0  , 6329 , 199, 149), // #1071
+  INST(Vfnmsub231sh     , VexRvm             , E(66MAP6,BF,_,_,_,0,1,T1S), 0                         , 185, 0  , 6342 , 200, 134), // #1072
+  INST(Vfnmsub231ss     , VexRvm             , V(660F38,BF,_,I,0,0,2,T1S), 0                         , 122, 0  , 6355 , 201, 149), // #1073
+  INST(Vfnmsubpd        , Fma4_Lx            , V(660F3A,7D,_,x,x,_,_,_  ), 0                         , 73 , 0  , 6368 , 289, 150), // #1074
+  INST(Vfnmsubps        , Fma4_Lx            , V(660F3A,7C,_,x,x,_,_,_  ), 0                         , 73 , 0  , 6378 , 289, 150), // #1075
+  INST(Vfnmsubsd        , Fma4               , V(660F3A,7F,_,0,x,_,_,_  ), 0                         , 73 , 0  , 6388 , 290, 150), // #1076
+  INST(Vfnmsubss        , Fma4               , V(660F3A,7E,_,0,x,_,_,_  ), 0                         , 73 , 0  , 6398 , 291, 150), // #1077
+  INST(Vfpclasspd       , VexRmi_Lx          , E(660F3A,66,_,x,_,1,4,FV ), 0                         , 112, 0  , 6408 , 292, 140), // #1078
+  INST(Vfpclassph       , VexRmi_Lx          , E(000F3A,66,_,_,_,0,4,FV ), 0                         , 123, 0  , 6419 , 293, 132), // #1079
+  INST(Vfpclassps       , VexRmi_Lx          , E(660F3A,66,_,x,_,0,4,FV ), 0                         , 111, 0  , 6430 , 294, 140), // #1080
+  INST(Vfpclasssd       , VexRmi             , E(660F3A,67,_,I,_,1,3,T1S), 0                         , 180, 0  , 6441 , 295, 66 ), // #1081
+  INST(Vfpclasssh       , VexRmi             , E(000F3A,67,_,_,_,0,1,T1S), 0                         , 188, 0  , 6452 , 296, 134), // #1082
+  INST(Vfpclassss       , VexRmi             , E(660F3A,67,_,I,_,0,2,T1S), 0                         , 181, 0  , 6463 , 297, 66 ), // #1083
+  INST(Vfrczpd          , VexRm_Lx           , V(XOP_M9,81,_,x,0,_,_,_  ), 0                         , 79 , 0  , 6474 , 298, 151), // #1084
+  INST(Vfrczps          , VexRm_Lx           , V(XOP_M9,80,_,x,0,_,_,_  ), 0                         , 79 , 0  , 6482 , 298, 151), // #1085
+  INST(Vfrczsd          , VexRm              , V(XOP_M9,83,_,0,0,_,_,_  ), 0                         , 79 , 0  , 6490 , 299, 151), // #1086
+  INST(Vfrczss          , VexRm              , V(XOP_M9,82,_,0,0,_,_,_  ), 0                         , 79 , 0  , 6498 , 300, 151), // #1087
+  INST(Vgatherdpd       , VexRmvRm_VM        , V(660F38,92,_,x,1,_,_,_  ), E(660F38,92,_,x,_,1,3,T1S), 189, 80 , 6506 , 301, 152), // #1088
+  INST(Vgatherdps       , VexRmvRm_VM        , V(660F38,92,_,x,0,_,_,_  ), E(660F38,92,_,x,_,0,2,T1S), 96 , 81 , 6517 , 302, 152), // #1089
+  INST(Vgatherpf0dpd    , VexM_VM            , E(660F38,C6,1,2,_,1,3,T1S), 0                         , 190, 0  , 6528 , 303, 153), // #1090
+  INST(Vgatherpf0dps    , VexM_VM            , E(660F38,C6,1,2,_,0,2,T1S), 0                         , 191, 0  , 6542 , 304, 153), // #1091
+  INST(Vgatherpf0qpd    , VexM_VM            , E(660F38,C7,1,2,_,1,3,T1S), 0                         , 190, 0  , 6556 , 305, 153), // #1092
+  INST(Vgatherpf0qps    , VexM_VM            , E(660F38,C7,1,2,_,0,2,T1S), 0                         , 191, 0  , 6570 , 305, 153), // #1093
+  INST(Vgatherpf1dpd    , VexM_VM            , E(660F38,C6,2,2,_,1,3,T1S), 0                         , 192, 0  , 6584 , 303, 153), // #1094
+  INST(Vgatherpf1dps    , VexM_VM            , E(660F38,C6,2,2,_,0,2,T1S), 0                         , 193, 0  , 6598 , 304, 153), // #1095
+  INST(Vgatherpf1qpd    , VexM_VM            , E(660F38,C7,2,2,_,1,3,T1S), 0                         , 192, 0  , 6612 , 305, 153), // #1096
+  INST(Vgatherpf1qps    , VexM_VM            , E(660F38,C7,2,2,_,0,2,T1S), 0                         , 193, 0  , 6626 , 305, 153), // #1097
+  INST(Vgatherqpd       , VexRmvRm_VM        , V(660F38,93,_,x,1,_,_,_  ), E(660F38,93,_,x,_,1,3,T1S), 189, 82 , 6640 , 306, 152), // #1098
+  INST(Vgatherqps       , VexRmvRm_VM        , V(660F38,93,_,x,0,_,_,_  ), E(660F38,93,_,x,_,0,2,T1S), 96 , 83 , 6651 , 307, 152), // #1099
+  INST(Vgetexppd        , VexRm_Lx           , E(660F38,42,_,x,_,1,4,FV ), 0                         , 113, 0  , 6662 , 263, 138), // #1100
+  INST(Vgetexpph        , VexRm_Lx           , E(66MAP6,42,_,_,_,0,4,FV ), 0                         , 183, 0  , 6672 , 265, 132), // #1101
+  INST(Vgetexpps        , VexRm_Lx           , E(660F38,42,_,x,_,0,4,FV ), 0                         , 114, 0  , 6682 , 268, 138), // #1102
+  INST(Vgetexpsd        , VexRvm             , E(660F38,43,_,I,_,1,3,T1S), 0                         , 128, 0  , 6692 , 308, 68 ), // #1103
+  INST(Vgetexpsh        , VexRvm             , E(66MAP6,43,_,_,_,0,1,T1S), 0                         , 185, 0  , 6702 , 254, 134), // #1104
+  INST(Vgetexpss        , VexRvm             , E(660F38,43,_,I,_,0,2,T1S), 0                         , 129, 0  , 6712 , 309, 68 ), // #1105
+  INST(Vgetmantpd       , VexRmi_Lx          , E(660F3A,26,_,x,_,1,4,FV ), 0                         , 112, 0  , 6722 , 310, 138), // #1106
+  INST(Vgetmantph       , VexRmi_Lx          , E(000F3A,26,_,_,_,0,4,FV ), 0                         , 123, 0  , 6733 , 311, 132), // #1107
+  INST(Vgetmantps       , VexRmi_Lx          , E(660F3A,26,_,x,_,0,4,FV ), 0                         , 111, 0  , 6744 , 312, 138), // #1108
+  INST(Vgetmantsd       , VexRvmi            , E(660F3A,27,_,I,_,1,3,T1S), 0                         , 180, 0  , 6755 , 287, 68 ), // #1109
+  INST(Vgetmantsh       , VexRvmi            , E(000F3A,27,_,_,_,0,1,T1S), 0                         , 188, 0  , 6766 , 313, 134), // #1110
+  INST(Vgetmantss       , VexRvmi            , E(660F3A,27,_,I,_,0,2,T1S), 0                         , 181, 0  , 6777 , 288, 68 ), // #1111
+  INST(Vgf2p8affineinvqb, VexRvmi_Lx         , V(660F3A,CF,_,x,1,1,4,FV ), 0                         , 194, 0  , 6788 , 314, 154), // #1112
+  INST(Vgf2p8affineqb   , VexRvmi_Lx         , V(660F3A,CE,_,x,1,1,4,FV ), 0                         , 194, 0  , 6806 , 314, 154), // #1113
+  INST(Vgf2p8mulb       , VexRvm_Lx          , V(660F38,CF,_,x,0,0,4,FV ), 0                         , 110, 0  , 6821 , 315, 154), // #1114
+  INST(Vhaddpd          , VexRvm_Lx          , V(660F00,7C,_,x,I,_,_,_  ), 0                         , 69 , 0  , 6832 , 202, 135), // #1115
+  INST(Vhaddps          , VexRvm_Lx          , V(F20F00,7C,_,x,I,_,_,_  ), 0                         , 109, 0  , 6840 , 202, 135), // #1116
+  INST(Vhsubpd          , VexRvm_Lx          , V(660F00,7D,_,x,I,_,_,_  ), 0                         , 69 , 0  , 6848 , 202, 135), // #1117
+  INST(Vhsubps          , VexRvm_Lx          , V(F20F00,7D,_,x,I,_,_,_  ), 0                         , 109, 0  , 6856 , 202, 135), // #1118
+  INST(Vinsertf128      , VexRvmi            , V(660F3A,18,_,1,0,_,_,_  ), 0                         , 172, 0  , 6864 , 316, 135), // #1119
+  INST(Vinsertf32x4     , VexRvmi_Lx         , E(660F3A,18,_,x,_,0,4,T4 ), 0                         , 173, 0  , 6876 , 317, 138), // #1120
+  INST(Vinsertf32x8     , VexRvmi            , E(660F3A,1A,_,2,_,0,5,T8 ), 0                         , 174, 0  , 6889 , 318, 66 ), // #1121
+  INST(Vinsertf64x2     , VexRvmi_Lx         , E(660F3A,18,_,x,_,1,4,T2 ), 0                         , 175, 0  , 6902 , 317, 140), // #1122
+  INST(Vinsertf64x4     , VexRvmi            , E(660F3A,1A,_,2,_,1,5,T4 ), 0                         , 176, 0  , 6915 , 318, 68 ), // #1123
+  INST(Vinserti128      , VexRvmi            , V(660F3A,38,_,1,0,_,_,_  ), 0                         , 172, 0  , 6928 , 316, 141), // #1124
+  INST(Vinserti32x4     , VexRvmi_Lx         , E(660F3A,38,_,x,_,0,4,T4 ), 0                         , 173, 0  , 6940 , 317, 138), // #1125
+  INST(Vinserti32x8     , VexRvmi            , E(660F3A,3A,_,2,_,0,5,T8 ), 0                         , 174, 0  , 6953 , 318, 66 ), // #1126
+  INST(Vinserti64x2     , VexRvmi_Lx         , E(660F3A,38,_,x,_,1,4,T2 ), 0                         , 175, 0  , 6966 , 317, 140), // #1127
+  INST(Vinserti64x4     , VexRvmi            , E(660F3A,3A,_,2,_,1,5,T4 ), 0                         , 176, 0  , 6979 , 318, 68 ), // #1128
+  INST(Vinsertps        , VexRvmi            , V(660F3A,21,_,0,I,0,2,T1S), 0                         , 177, 0  , 6992 , 319, 133), // #1129
+  INST(Vlddqu           , VexRm_Lx           , V(F20F00,F0,_,x,I,_,_,_  ), 0                         , 109, 0  , 7002 , 320, 135), // #1130
+  INST(Vldmxcsr         , VexM               , V(000F00,AE,2,0,I,_,_,_  ), 0                         , 195, 0  , 7009 , 321, 135), // #1131
+  INST(Vmaskmovdqu      , VexRm_ZDI          , V(660F00,F7,_,0,I,_,_,_  ), 0                         , 69 , 0  , 7018 , 322, 135), // #1132
+  INST(Vmaskmovpd       , VexRvmMvr_Lx       , V(660F38,2D,_,x,0,_,_,_  ), V(660F38,2F,_,x,0,_,_,_  ), 96 , 84 , 7030 , 323, 135), // #1133
+  INST(Vmaskmovps       , VexRvmMvr_Lx       , V(660F38,2C,_,x,0,_,_,_  ), V(660F38,2E,_,x,0,_,_,_  ), 96 , 85 , 7041 , 323, 135), // #1134
+  INST(Vmaxpd           , VexRvm_Lx          , V(660F00,5F,_,x,I,1,4,FV ), 0                         , 103, 0  , 7052 , 324, 131), // #1135
+  INST(Vmaxph           , VexRvm_Lx          , E(00MAP5,5F,_,_,_,0,4,FV ), 0                         , 104, 0  , 7059 , 325, 132), // #1136
+  INST(Vmaxps           , VexRvm_Lx          , V(000F00,5F,_,x,I,0,4,FV ), 0                         , 105, 0  , 7066 , 326, 131), // #1137
+  INST(Vmaxsd           , VexRvm             , V(F20F00,5F,_,I,I,1,3,T1S), 0                         , 106, 0  , 7073 , 327, 131), // #1138
+  INST(Vmaxsh           , VexRvm             , E(F3MAP5,5F,_,_,_,0,1,T1S), 0                         , 107, 0  , 7080 , 254, 134), // #1139
+  INST(Vmaxss           , VexRvm             , V(F30F00,5F,_,I,I,0,2,T1S), 0                         , 108, 0  , 7087 , 258, 131), // #1140
+  INST(Vmcall           , X86Op              , O(000F01,C1,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7094 , 30 , 58 ), // #1141
+  INST(Vmclear          , X86M_Only          , O(660F00,C7,6,_,_,_,_,_  ), 0                         , 26 , 0  , 7101 , 32 , 58 ), // #1142
+  INST(Vmfunc           , X86Op              , O(000F01,D4,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7109 , 30 , 58 ), // #1143
+  INST(Vminpd           , VexRvm_Lx          , V(660F00,5D,_,x,I,1,4,FV ), 0                         , 103, 0  , 7116 , 324, 131), // #1144
+  INST(Vminph           , VexRvm_Lx          , E(00MAP5,5D,_,_,_,0,4,FV ), 0                         , 104, 0  , 7123 , 325, 132), // #1145
+  INST(Vminps           , VexRvm_Lx          , V(000F00,5D,_,x,I,0,4,FV ), 0                         , 105, 0  , 7130 , 326, 131), // #1146
+  INST(Vminsd           , VexRvm             , V(F20F00,5D,_,I,I,1,3,T1S), 0                         , 106, 0  , 7137 , 327, 131), // #1147
+  INST(Vminsh           , VexRvm             , E(F3MAP5,5D,_,_,_,0,1,T1S), 0                         , 107, 0  , 7144 , 254, 134), // #1148
+  INST(Vminss           , VexRvm             , V(F30F00,5D,_,I,I,0,2,T1S), 0                         , 108, 0  , 7151 , 258, 131), // #1149
+  INST(Vmlaunch         , X86Op              , O(000F01,C2,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7158 , 30 , 58 ), // #1150
+  INST(Vmload           , X86Op_xAX          , O(000F01,DA,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7167 , 328, 22 ), // #1151
+  INST(Vmmcall          , X86Op              , O(000F01,D9,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7174 , 30 , 22 ), // #1152
+  INST(Vmovapd          , VexRmMr_Lx         , V(660F00,28,_,x,I,1,4,FVM), V(660F00,29,_,x,I,1,4,FVM), 103, 86 , 7182 , 329, 155), // #1153
+  INST(Vmovaps          , VexRmMr_Lx         , V(000F00,28,_,x,I,0,4,FVM), V(000F00,29,_,x,I,0,4,FVM), 105, 87 , 7190 , 329, 155), // #1154
+  INST(Vmovd            , VexMovdMovq        , V(660F00,6E,_,0,0,0,2,T1S), V(660F00,7E,_,0,0,0,2,T1S), 196, 88 , 7198 , 330, 133), // #1155
+  INST(Vmovddup         , VexRm_Lx           , V(F20F00,12,_,x,I,1,3,DUP), 0                         , 197, 0  , 7204 , 331, 131), // #1156
+  INST(Vmovdqa          , VexRmMr_Lx         , V(660F00,6F,_,x,I,_,_,_  ), V(660F00,7F,_,x,I,_,_,_  ), 69 , 89 , 7213 , 332, 156), // #1157
+  INST(Vmovdqa32        , VexRmMr_Lx         , E(660F00,6F,_,x,_,0,4,FVM), E(660F00,7F,_,x,_,0,4,FVM), 198, 90 , 7221 , 333, 157), // #1158
+  INST(Vmovdqa64        , VexRmMr_Lx         , E(660F00,6F,_,x,_,1,4,FVM), E(660F00,7F,_,x,_,1,4,FVM), 135, 91 , 7231 , 333, 157), // #1159
+  INST(Vmovdqu          , VexRmMr_Lx         , V(F30F00,6F,_,x,I,_,_,_  ), V(F30F00,7F,_,x,I,_,_,_  ), 199, 92 , 7241 , 332, 156), // #1160
+  INST(Vmovdqu16        , VexRmMr_Lx         , E(F20F00,6F,_,x,_,1,4,FVM), E(F20F00,7F,_,x,_,1,4,FVM), 166, 93 , 7249 , 333, 158), // #1161
+  INST(Vmovdqu32        , VexRmMr_Lx         , E(F30F00,6F,_,x,_,0,4,FVM), E(F30F00,7F,_,x,_,0,4,FVM), 200, 94 , 7259 , 333, 157), // #1162
+  INST(Vmovdqu64        , VexRmMr_Lx         , E(F30F00,6F,_,x,_,1,4,FVM), E(F30F00,7F,_,x,_,1,4,FVM), 149, 95 , 7269 , 333, 157), // #1163
+  INST(Vmovdqu8         , VexRmMr_Lx         , E(F20F00,6F,_,x,_,0,4,FVM), E(F20F00,7F,_,x,_,0,4,FVM), 164, 96 , 7279 , 333, 158), // #1164
+  INST(Vmovhlps         , VexRvm             , V(000F00,12,_,0,I,0,_,_  ), 0                         , 72 , 0  , 7288 , 334, 133), // #1165
+  INST(Vmovhpd          , VexRvmMr           , V(660F00,16,_,0,I,1,3,T1S), V(660F00,17,_,0,I,1,3,T1S), 125, 97 , 7297 , 335, 133), // #1166
+  INST(Vmovhps          , VexRvmMr           , V(000F00,16,_,0,I,0,3,T2 ), V(000F00,17,_,0,I,0,3,T2 ), 201, 98 , 7305 , 335, 133), // #1167
+  INST(Vmovlhps         , VexRvm             , V(000F00,16,_,0,I,0,_,_  ), 0                         , 72 , 0  , 7313 , 334, 133), // #1168
+  INST(Vmovlpd          , VexRvmMr           , V(660F00,12,_,0,I,1,3,T1S), V(660F00,13,_,0,I,1,3,T1S), 125, 99 , 7322 , 335, 133), // #1169
+  INST(Vmovlps          , VexRvmMr           , V(000F00,12,_,0,I,0,3,T2 ), V(000F00,13,_,0,I,0,3,T2 ), 201, 100, 7330 , 335, 133), // #1170
+  INST(Vmovmskpd        , VexRm_Lx           , V(660F00,50,_,x,I,_,_,_  ), 0                         , 69 , 0  , 7338 , 336, 135), // #1171
+  INST(Vmovmskps        , VexRm_Lx           , V(000F00,50,_,x,I,_,_,_  ), 0                         , 72 , 0  , 7348 , 336, 135), // #1172
+  INST(Vmovntdq         , VexMr_Lx           , V(660F00,E7,_,x,I,0,4,FVM), 0                         , 144, 0  , 7358 , 337, 131), // #1173
+  INST(Vmovntdqa        , VexRm_Lx           , V(660F38,2A,_,x,I,0,4,FVM), 0                         , 110, 0  , 7367 , 338, 142), // #1174
+  INST(Vmovntpd         , VexMr_Lx           , V(660F00,2B,_,x,I,1,4,FVM), 0                         , 103, 0  , 7377 , 337, 131), // #1175
+  INST(Vmovntps         , VexMr_Lx           , V(000F00,2B,_,x,I,0,4,FVM), 0                         , 105, 0  , 7386 , 337, 131), // #1176
+  INST(Vmovq            , VexMovdMovq        , V(660F00,6E,_,0,I,1,3,T1S), V(660F00,7E,_,0,I,1,3,T1S), 125, 101, 7395 , 339, 159), // #1177
+  INST(Vmovsd           , VexMovssMovsd      , V(F20F00,10,_,I,I,1,3,T1S), V(F20F00,11,_,I,I,1,3,T1S), 106, 102, 7401 , 340, 159), // #1178
+  INST(Vmovsh           , VexMovssMovsd      , E(F3MAP5,10,_,I,_,0,1,T1S), E(F3MAP5,11,_,I,_,0,1,T1S), 107, 103, 7408 , 341, 134), // #1179
+  INST(Vmovshdup        , VexRm_Lx           , V(F30F00,16,_,x,I,0,4,FVM), 0                         , 161, 0  , 7415 , 342, 131), // #1180
+  INST(Vmovsldup        , VexRm_Lx           , V(F30F00,12,_,x,I,0,4,FVM), 0                         , 161, 0  , 7425 , 342, 131), // #1181
+  INST(Vmovss           , VexMovssMovsd      , V(F30F00,10,_,I,I,0,2,T1S), V(F30F00,11,_,I,I,0,2,T1S), 108, 104, 7435 , 343, 159), // #1182
+  INST(Vmovupd          , VexRmMr_Lx         , V(660F00,10,_,x,I,1,4,FVM), V(660F00,11,_,x,I,1,4,FVM), 103, 105, 7442 , 329, 155), // #1183
+  INST(Vmovups          , VexRmMr_Lx         , V(000F00,10,_,x,I,0,4,FVM), V(000F00,11,_,x,I,0,4,FVM), 105, 106, 7450 , 329, 155), // #1184
+  INST(Vmovw            , VexMovdMovq        , E(66MAP5,6E,_,0,_,I,1,T1S), E(66MAP5,7E,_,0,_,I,1,T1S), 202, 107, 7458 , 344, 134), // #1185
+  INST(Vmpsadbw         , VexRvmi_Lx         , V(660F3A,42,_,x,I,_,_,_  ), 0                         , 73 , 0  , 7464 , 214, 160), // #1186
+  INST(Vmptrld          , X86M_Only          , O(000F00,C7,6,_,_,_,_,_  ), 0                         , 80 , 0  , 7473 , 32 , 58 ), // #1187
+  INST(Vmptrst          , X86M_Only          , O(000F00,C7,7,_,_,_,_,_  ), 0                         , 22 , 0  , 7481 , 32 , 58 ), // #1188
+  INST(Vmread           , X86Mr_NoSize       , O(000F00,78,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7489 , 345, 58 ), // #1189
+  INST(Vmresume         , X86Op              , O(000F01,C3,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7496 , 30 , 58 ), // #1190
+  INST(Vmrun            , X86Op_xAX          , O(000F01,D8,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7505 , 328, 22 ), // #1191
+  INST(Vmsave           , X86Op_xAX          , O(000F01,DB,_,_,_,_,_,_  ), 0                         , 21 , 0  , 7511 , 328, 22 ), // #1192
+  INST(Vmulpd           , VexRvm_Lx          , V(660F00,59,_,x,I,1,4,FV ), 0                         , 103, 0  , 7518 , 196, 131), // #1193
+  INST(Vmulph           , VexRvm_Lx          , E(00MAP5,59,_,_,_,0,4,FV ), 0                         , 104, 0  , 7525 , 197, 132), // #1194
+  INST(Vmulps           , VexRvm_Lx          , V(000F00,59,_,x,I,0,4,FV ), 0                         , 105, 0  , 7532 , 198, 131), // #1195
+  INST(Vmulsd           , VexRvm             , V(F20F00,59,_,I,I,1,3,T1S), 0                         , 106, 0  , 7539 , 199, 133), // #1196
+  INST(Vmulsh           , VexRvm             , E(F3MAP5,59,_,_,_,0,1,T1S), 0                         , 107, 0  , 7546 , 200, 134), // #1197
+  INST(Vmulss           , VexRvm             , V(F30F00,59,_,I,I,0,2,T1S), 0                         , 108, 0  , 7553 , 201, 133), // #1198
+  INST(Vmwrite          , X86Rm_NoSize       , O(000F00,79,_,_,_,_,_,_  ), 0                         , 4  , 0  , 7560 , 346, 58 ), // #1199
+  INST(Vmxon            , X86M_Only          , O(F30F00,C7,6,_,_,_,_,_  ), 0                         , 24 , 0  , 7568 , 32 , 58 ), // #1200
+  INST(Vorpd            , VexRvm_Lx          , V(660F00,56,_,x,I,1,4,FV ), 0                         , 103, 0  , 7574 , 210, 139), // #1201
+  INST(Vorps            , VexRvm_Lx          , V(000F00,56,_,x,I,0,4,FV ), 0                         , 105, 0  , 7580 , 211, 139), // #1202
+  INST(Vp2intersectd    , VexRvm_Lx_2xK      , E(F20F38,68,_,_,_,0,4,FV ), 0                         , 131, 0  , 7586 , 347, 161), // #1203
+  INST(Vp2intersectq    , VexRvm_Lx_2xK      , E(F20F38,68,_,_,_,1,4,FV ), 0                         , 203, 0  , 7600 , 348, 161), // #1204
+  INST(Vp4dpwssd        , VexRm_T1_4X        , E(F20F38,52,_,2,_,0,4,T4X), 0                         , 101, 0  , 7614 , 194, 162), // #1205
+  INST(Vp4dpwssds       , VexRm_T1_4X        , E(F20F38,53,_,2,_,0,4,T4X), 0                         , 101, 0  , 7624 , 194, 162), // #1206
+  INST(Vpabsb           , VexRm_Lx           , V(660F38,1C,_,x,I,_,4,FVM), 0                         , 110, 0  , 7635 , 342, 163), // #1207
+  INST(Vpabsd           , VexRm_Lx           , V(660F38,1E,_,x,I,0,4,FV ), 0                         , 110, 0  , 7642 , 349, 142), // #1208
+  INST(Vpabsq           , VexRm_Lx           , E(660F38,1F,_,x,_,1,4,FV ), 0                         , 113, 0  , 7649 , 350, 138), // #1209
+  INST(Vpabsw           , VexRm_Lx           , V(660F38,1D,_,x,I,_,4,FVM), 0                         , 110, 0  , 7656 , 342, 163), // #1210
+  INST(Vpackssdw        , VexRvm_Lx          , V(660F00,6B,_,x,I,0,4,FV ), 0                         , 144, 0  , 7663 , 209, 163), // #1211
+  INST(Vpacksswb        , VexRvm_Lx          , V(660F00,63,_,x,I,I,4,FVM), 0                         , 144, 0  , 7673 , 315, 163), // #1212
+  INST(Vpackusdw        , VexRvm_Lx          , V(660F38,2B,_,x,I,0,4,FV ), 0                         , 110, 0  , 7683 , 209, 163), // #1213
+  INST(Vpackuswb        , VexRvm_Lx          , V(660F00,67,_,x,I,I,4,FVM), 0                         , 144, 0  , 7693 , 315, 163), // #1214
+  INST(Vpaddb           , VexRvm_Lx          , V(660F00,FC,_,x,I,I,4,FVM), 0                         , 144, 0  , 7703 , 315, 163), // #1215
+  INST(Vpaddd           , VexRvm_Lx          , V(660F00,FE,_,x,I,0,4,FV ), 0                         , 144, 0  , 7710 , 209, 142), // #1216
+  INST(Vpaddq           , VexRvm_Lx          , V(660F00,D4,_,x,I,1,4,FV ), 0                         , 103, 0  , 7717 , 208, 142), // #1217
+  INST(Vpaddsb          , VexRvm_Lx          , V(660F00,EC,_,x,I,I,4,FVM), 0                         , 144, 0  , 7724 , 315, 163), // #1218
+  INST(Vpaddsw          , VexRvm_Lx          , V(660F00,ED,_,x,I,I,4,FVM), 0                         , 144, 0  , 7732 , 315, 163), // #1219
+  INST(Vpaddusb         , VexRvm_Lx          , V(660F00,DC,_,x,I,I,4,FVM), 0                         , 144, 0  , 7740 , 315, 163), // #1220
+  INST(Vpaddusw         , VexRvm_Lx          , V(660F00,DD,_,x,I,I,4,FVM), 0                         , 144, 0  , 7749 , 315, 163), // #1221
+  INST(Vpaddw           , VexRvm_Lx          , V(660F00,FD,_,x,I,I,4,FVM), 0                         , 144, 0  , 7758 , 315, 163), // #1222
+  INST(Vpalignr         , VexRvmi_Lx         , V(660F3A,0F,_,x,I,I,4,FVM), 0                         , 204, 0  , 7765 , 314, 163), // #1223
+  INST(Vpand            , VexRvm_Lx          , V(660F00,DB,_,x,I,_,_,_  ), 0                         , 69 , 0  , 7774 , 351, 160), // #1224
+  INST(Vpandd           , VexRvm_Lx          , E(660F00,DB,_,x,_,0,4,FV ), 0                         , 198, 0  , 7780 , 352, 138), // #1225
+  INST(Vpandn           , VexRvm_Lx          , V(660F00,DF,_,x,I,_,_,_  ), 0                         , 69 , 0  , 7787 , 353, 160), // #1226
+  INST(Vpandnd          , VexRvm_Lx          , E(660F00,DF,_,x,_,0,4,FV ), 0                         , 198, 0  , 7794 , 354, 138), // #1227
+  INST(Vpandnq          , VexRvm_Lx          , E(660F00,DF,_,x,_,1,4,FV ), 0                         , 135, 0  , 7802 , 355, 138), // #1228
+  INST(Vpandq           , VexRvm_Lx          , E(660F00,DB,_,x,_,1,4,FV ), 0                         , 135, 0  , 7810 , 356, 138), // #1229
+  INST(Vpavgb           , VexRvm_Lx          , V(660F00,E0,_,x,I,I,4,FVM), 0                         , 144, 0  , 7817 , 315, 163), // #1230
+  INST(Vpavgw           , VexRvm_Lx          , V(660F00,E3,_,x,I,I,4,FVM), 0                         , 144, 0  , 7824 , 315, 163), // #1231
+  INST(Vpblendd         , VexRvmi_Lx         , V(660F3A,02,_,x,0,_,_,_  ), 0                         , 73 , 0  , 7831 , 214, 141), // #1232
+  INST(Vpblendmb        , VexRvm_Lx          , E(660F38,66,_,x,_,0,4,FVM), 0                         , 114, 0  , 7840 , 357, 146), // #1233
+  INST(Vpblendmd        , VexRvm_Lx          , E(660F38,64,_,x,_,0,4,FV ), 0                         , 114, 0  , 7850 , 213, 138), // #1234
+  INST(Vpblendmq        , VexRvm_Lx          , E(660F38,64,_,x,_,1,4,FV ), 0                         , 113, 0  , 7860 , 212, 138), // #1235
+  INST(Vpblendmw        , VexRvm_Lx          , E(660F38,66,_,x,_,1,4,FVM), 0                         , 113, 0  , 7870 , 357, 146), // #1236
+  INST(Vpblendvb        , VexRvmr_Lx         , V(660F3A,4C,_,x,0,_,_,_  ), 0                         , 73 , 0  , 7880 , 215, 160), // #1237
+  INST(Vpblendw         , VexRvmi_Lx         , V(660F3A,0E,_,x,I,_,_,_  ), 0                         , 73 , 0  , 7890 , 214, 160), // #1238
+  INST(Vpbroadcastb     , VexRm_Lx_Bcst      , V(660F38,78,_,x,0,0,0,T1S), E(660F38,7A,_,x,0,0,0,T1S), 96 , 108, 7899 , 358, 164), // #1239
+  INST(Vpbroadcastd     , VexRm_Lx_Bcst      , V(660F38,58,_,x,0,0,2,T1S), E(660F38,7C,_,x,0,0,0,T1S), 122, 109, 7912 , 359, 152), // #1240
+  INST(Vpbroadcastmb2q  , VexRm_Lx           , E(F30F38,2A,_,x,_,1,_,_  ), 0                         , 205, 0  , 7925 , 360, 165), // #1241
+  INST(Vpbroadcastmw2d  , VexRm_Lx           , E(F30F38,3A,_,x,_,0,_,_  ), 0                         , 206, 0  , 7941 , 360, 165), // #1242
+  INST(Vpbroadcastq     , VexRm_Lx_Bcst      , V(660F38,59,_,x,0,1,3,T1S), E(660F38,7C,_,x,0,1,0,T1S), 121, 110, 7957 , 361, 152), // #1243
+  INST(Vpbroadcastw     , VexRm_Lx_Bcst      , V(660F38,79,_,x,0,0,1,T1S), E(660F38,7B,_,x,0,0,0,T1S), 207, 111, 7970 , 362, 164), // #1244
+  INST(Vpclmulqdq       , VexRvmi_Lx         , V(660F3A,44,_,x,I,_,4,FVM), 0                         , 204, 0  , 7983 , 363, 166), // #1245
+  INST(Vpcmov           , VexRvrmRvmr_Lx     , V(XOP_M8,A2,_,x,x,_,_,_  ), 0                         , 208, 0  , 7994 , 289, 151), // #1246
+  INST(Vpcmpb           , VexRvmi_Lx         , E(660F3A,3F,_,x,_,0,4,FVM), 0                         , 111, 0  , 8001 , 364, 146), // #1247
+  INST(Vpcmpd           , VexRvmi_Lx         , E(660F3A,1F,_,x,_,0,4,FV ), 0                         , 111, 0  , 8008 , 365, 138), // #1248
+  INST(Vpcmpeqb         , VexRvm_Lx_KEvex    , V(660F00,74,_,x,I,I,4,FV ), 0                         , 144, 0  , 8015 , 366, 163), // #1249
+  INST(Vpcmpeqd         , VexRvm_Lx_KEvex    , V(660F00,76,_,x,I,0,4,FVM), 0                         , 144, 0  , 8024 , 367, 142), // #1250
+  INST(Vpcmpeqq         , VexRvm_Lx_KEvex    , V(660F38,29,_,x,I,1,4,FVM), 0                         , 209, 0  , 8033 , 368, 142), // #1251
+  INST(Vpcmpeqw         , VexRvm_Lx_KEvex    , V(660F00,75,_,x,I,I,4,FV ), 0                         , 144, 0  , 8042 , 366, 163), // #1252
+  INST(Vpcmpestri       , VexRmi             , V(660F3A,61,_,0,I,_,_,_  ), 0                         , 73 , 0  , 8051 , 369, 167), // #1253
+  INST(Vpcmpestrm       , VexRmi             , V(660F3A,60,_,0,I,_,_,_  ), 0                         , 73 , 0  , 8062 , 370, 167), // #1254
+  INST(Vpcmpgtb         , VexRvm_Lx_KEvex    , V(660F00,64,_,x,I,I,4,FV ), 0                         , 144, 0  , 8073 , 366, 163), // #1255
+  INST(Vpcmpgtd         , VexRvm_Lx_KEvex    , V(660F00,66,_,x,I,0,4,FVM), 0                         , 144, 0  , 8082 , 367, 142), // #1256
+  INST(Vpcmpgtq         , VexRvm_Lx_KEvex    , V(660F38,37,_,x,I,1,4,FVM), 0                         , 209, 0  , 8091 , 368, 142), // #1257
+  INST(Vpcmpgtw         , VexRvm_Lx_KEvex    , V(660F00,65,_,x,I,I,4,FV ), 0                         , 144, 0  , 8100 , 366, 163), // #1258
+  INST(Vpcmpistri       , VexRmi             , V(660F3A,63,_,0,I,_,_,_  ), 0                         , 73 , 0  , 8109 , 371, 167), // #1259
+  INST(Vpcmpistrm       , VexRmi             , V(660F3A,62,_,0,I,_,_,_  ), 0                         , 73 , 0  , 8120 , 372, 167), // #1260
+  INST(Vpcmpq           , VexRvmi_Lx         , E(660F3A,1F,_,x,_,1,4,FV ), 0                         , 112, 0  , 8131 , 373, 138), // #1261
+  INST(Vpcmpub          , VexRvmi_Lx         , E(660F3A,3E,_,x,_,0,4,FVM), 0                         , 111, 0  , 8138 , 364, 146), // #1262
+  INST(Vpcmpud          , VexRvmi_Lx         , E(660F3A,1E,_,x,_,0,4,FV ), 0                         , 111, 0  , 8146 , 365, 138), // #1263
+  INST(Vpcmpuq          , VexRvmi_Lx         , E(660F3A,1E,_,x,_,1,4,FV ), 0                         , 112, 0  , 8154 , 373, 138), // #1264
+  INST(Vpcmpuw          , VexRvmi_Lx         , E(660F3A,3E,_,x,_,1,4,FVM), 0                         , 112, 0  , 8162 , 373, 146), // #1265
+  INST(Vpcmpw           , VexRvmi_Lx         , E(660F3A,3F,_,x,_,1,4,FVM), 0                         , 112, 0  , 8170 , 373, 146), // #1266
+  INST(Vpcomb           , VexRvmi            , V(XOP_M8,CC,_,0,0,_,_,_  ), 0                         , 208, 0  , 8177 , 276, 151), // #1267
+  INST(Vpcomd           , VexRvmi            , V(XOP_M8,CE,_,0,0,_,_,_  ), 0                         , 208, 0  , 8184 , 276, 151), // #1268
+  INST(Vpcompressb      , VexMr_Lx           , E(660F38,63,_,x,_,0,0,T1S), 0                         , 210, 0  , 8191 , 232, 168), // #1269
+  INST(Vpcompressd      , VexMr_Lx           , E(660F38,8B,_,x,_,0,2,T1S), 0                         , 129, 0  , 8203 , 232, 138), // #1270
+  INST(Vpcompressq      , VexMr_Lx           , E(660F38,8B,_,x,_,1,3,T1S), 0                         , 128, 0  , 8215 , 232, 138), // #1271
+  INST(Vpcompressw      , VexMr_Lx           , E(660F38,63,_,x,_,1,1,T1S), 0                         , 211, 0  , 8227 , 232, 168), // #1272
+  INST(Vpcomq           , VexRvmi            , V(XOP_M8,CF,_,0,0,_,_,_  ), 0                         , 208, 0  , 8239 , 276, 151), // #1273
+  INST(Vpcomub          , VexRvmi            , V(XOP_M8,EC,_,0,0,_,_,_  ), 0                         , 208, 0  , 8246 , 276, 151), // #1274
+  INST(Vpcomud          , VexRvmi            , V(XOP_M8,EE,_,0,0,_,_,_  ), 0                         , 208, 0  , 8254 , 276, 151), // #1275
+  INST(Vpcomuq          , VexRvmi            , V(XOP_M8,EF,_,0,0,_,_,_  ), 0                         , 208, 0  , 8262 , 276, 151), // #1276
+  INST(Vpcomuw          , VexRvmi            , V(XOP_M8,ED,_,0,0,_,_,_  ), 0                         , 208, 0  , 8270 , 276, 151), // #1277
+  INST(Vpcomw           , VexRvmi            , V(XOP_M8,CD,_,0,0,_,_,_  ), 0                         , 208, 0  , 8278 , 276, 151), // #1278
+  INST(Vpconflictd      , VexRm_Lx           , E(660F38,C4,_,x,_,0,4,FV ), 0                         , 114, 0  , 8285 , 374, 165), // #1279
+  INST(Vpconflictq      , VexRm_Lx           , E(660F38,C4,_,x,_,1,4,FV ), 0                         , 113, 0  , 8297 , 374, 165), // #1280
+  INST(Vpdpbusd         , VexRvm_Lx          , V(660F38,50,_,x,_,0,4,FV ), 0                         , 110, 0  , 8309 , 375, 169), // #1281
+  INST(Vpdpbusds        , VexRvm_Lx          , V(660F38,51,_,x,_,0,4,FV ), 0                         , 110, 0  , 8318 , 375, 169), // #1282
+  INST(Vpdpwssd         , VexRvm_Lx          , V(660F38,52,_,x,_,0,4,FV ), 0                         , 110, 0  , 8328 , 375, 169), // #1283
+  INST(Vpdpwssds        , VexRvm_Lx          , V(660F38,53,_,x,_,0,4,FV ), 0                         , 110, 0  , 8337 , 375, 169), // #1284
+  INST(Vperm2f128       , VexRvmi            , V(660F3A,06,_,1,0,_,_,_  ), 0                         , 172, 0  , 8347 , 376, 135), // #1285
+  INST(Vperm2i128       , VexRvmi            , V(660F3A,46,_,1,0,_,_,_  ), 0                         , 172, 0  , 8358 , 376, 141), // #1286
+  INST(Vpermb           , VexRvm_Lx          , E(660F38,8D,_,x,_,0,4,FVM), 0                         , 114, 0  , 8369 , 357, 170), // #1287
+  INST(Vpermd           , VexRvm_Lx          , V(660F38,36,_,x,0,0,4,FV ), 0                         , 110, 0  , 8376 , 377, 152), // #1288
+  INST(Vpermi2b         , VexRvm_Lx          , E(660F38,75,_,x,_,0,4,FVM), 0                         , 114, 0  , 8383 , 357, 170), // #1289
+  INST(Vpermi2d         , VexRvm_Lx          , E(660F38,76,_,x,_,0,4,FV ), 0                         , 114, 0  , 8392 , 213, 138), // #1290
+  INST(Vpermi2pd        , VexRvm_Lx          , E(660F38,77,_,x,_,1,4,FV ), 0                         , 113, 0  , 8401 , 212, 138), // #1291
+  INST(Vpermi2ps        , VexRvm_Lx          , E(660F38,77,_,x,_,0,4,FV ), 0                         , 114, 0  , 8411 , 213, 138), // #1292
+  INST(Vpermi2q         , VexRvm_Lx          , E(660F38,76,_,x,_,1,4,FV ), 0                         , 113, 0  , 8421 , 212, 138), // #1293
+  INST(Vpermi2w         , VexRvm_Lx          , E(660F38,75,_,x,_,1,4,FVM), 0                         , 113, 0  , 8430 , 357, 146), // #1294
+  INST(Vpermil2pd       , VexRvrmiRvmri_Lx   , V(660F3A,49,_,x,x,_,_,_  ), 0                         , 73 , 0  , 8439 , 378, 151), // #1295
+  INST(Vpermil2ps       , VexRvrmiRvmri_Lx   , V(660F3A,48,_,x,x,_,_,_  ), 0                         , 73 , 0  , 8450 , 378, 151), // #1296
+  INST(Vpermilpd        , VexRvmRmi_Lx       , V(660F38,0D,_,x,0,1,4,FV ), V(660F3A,05,_,x,0,1,4,FV ), 209, 112, 8461 , 379, 131), // #1297
+  INST(Vpermilps        , VexRvmRmi_Lx       , V(660F38,0C,_,x,0,0,4,FV ), V(660F3A,04,_,x,0,0,4,FV ), 110, 113, 8471 , 380, 131), // #1298
+  INST(Vpermpd          , VexRvmRmi_Lx       , E(660F38,16,_,x,1,1,4,FV ), V(660F3A,01,_,x,1,1,4,FV ), 212, 114, 8481 , 381, 152), // #1299
+  INST(Vpermps          , VexRvm_Lx          , V(660F38,16,_,x,0,0,4,FV ), 0                         , 110, 0  , 8489 , 377, 152), // #1300
+  INST(Vpermq           , VexRvmRmi_Lx       , E(660F38,36,_,x,_,1,4,FV ), V(660F3A,00,_,x,1,1,4,FV ), 113, 115, 8497 , 381, 152), // #1301
+  INST(Vpermt2b         , VexRvm_Lx          , E(660F38,7D,_,x,_,0,4,FVM), 0                         , 114, 0  , 8504 , 357, 170), // #1302
+  INST(Vpermt2d         , VexRvm_Lx          , E(660F38,7E,_,x,_,0,4,FV ), 0                         , 114, 0  , 8513 , 213, 138), // #1303
+  INST(Vpermt2pd        , VexRvm_Lx          , E(660F38,7F,_,x,_,1,4,FV ), 0                         , 113, 0  , 8522 , 212, 138), // #1304
+  INST(Vpermt2ps        , VexRvm_Lx          , E(660F38,7F,_,x,_,0,4,FV ), 0                         , 114, 0  , 8532 , 213, 138), // #1305
+  INST(Vpermt2q         , VexRvm_Lx          , E(660F38,7E,_,x,_,1,4,FV ), 0                         , 113, 0  , 8542 , 212, 138), // #1306
+  INST(Vpermt2w         , VexRvm_Lx          , E(660F38,7D,_,x,_,1,4,FVM), 0                         , 113, 0  , 8551 , 357, 146), // #1307
+  INST(Vpermw           , VexRvm_Lx          , E(660F38,8D,_,x,_,1,4,FVM), 0                         , 113, 0  , 8560 , 357, 146), // #1308
+  INST(Vpexpandb        , VexRm_Lx           , E(660F38,62,_,x,_,0,0,T1S), 0                         , 210, 0  , 8567 , 279, 168), // #1309
+  INST(Vpexpandd        , VexRm_Lx           , E(660F38,89,_,x,_,0,2,T1S), 0                         , 129, 0  , 8577 , 279, 138), // #1310
+  INST(Vpexpandq        , VexRm_Lx           , E(660F38,89,_,x,_,1,3,T1S), 0                         , 128, 0  , 8587 , 279, 138), // #1311
+  INST(Vpexpandw        , VexRm_Lx           , E(660F38,62,_,x,_,1,1,T1S), 0                         , 211, 0  , 8597 , 279, 168), // #1312
+  INST(Vpextrb          , VexMri             , V(660F3A,14,_,0,0,I,0,T1S), 0                         , 73 , 0  , 8607 , 382, 171), // #1313
+  INST(Vpextrd          , VexMri             , V(660F3A,16,_,0,0,0,2,T1S), 0                         , 177, 0  , 8615 , 283, 172), // #1314
+  INST(Vpextrq          , VexMri             , V(660F3A,16,_,0,1,1,3,T1S), 0                         , 213, 0  , 8623 , 383, 172), // #1315
+  INST(Vpextrw          , VexMri_Vpextrw     , V(660F3A,15,_,0,0,I,1,T1S), 0                         , 214, 0  , 8631 , 384, 171), // #1316
+  INST(Vpgatherdd       , VexRmvRm_VM        , V(660F38,90,_,x,0,_,_,_  ), E(660F38,90,_,x,_,0,2,T1S), 96 , 116, 8639 , 302, 152), // #1317
+  INST(Vpgatherdq       , VexRmvRm_VM        , V(660F38,90,_,x,1,_,_,_  ), E(660F38,90,_,x,_,1,3,T1S), 189, 117, 8650 , 301, 152), // #1318
+  INST(Vpgatherqd       , VexRmvRm_VM        , V(660F38,91,_,x,0,_,_,_  ), E(660F38,91,_,x,_,0,2,T1S), 96 , 118, 8661 , 307, 152), // #1319
+  INST(Vpgatherqq       , VexRmvRm_VM        , V(660F38,91,_,x,1,_,_,_  ), E(660F38,91,_,x,_,1,3,T1S), 189, 119, 8672 , 306, 152), // #1320
+  INST(Vphaddbd         , VexRm              , V(XOP_M9,C2,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8683 , 204, 151), // #1321
+  INST(Vphaddbq         , VexRm              , V(XOP_M9,C3,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8692 , 204, 151), // #1322
+  INST(Vphaddbw         , VexRm              , V(XOP_M9,C1,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8701 , 204, 151), // #1323
+  INST(Vphaddd          , VexRvm_Lx          , V(660F38,02,_,x,I,_,_,_  ), 0                         , 96 , 0  , 8710 , 202, 160), // #1324
+  INST(Vphadddq         , VexRm              , V(XOP_M9,CB,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8718 , 204, 151), // #1325
+  INST(Vphaddsw         , VexRvm_Lx          , V(660F38,03,_,x,I,_,_,_  ), 0                         , 96 , 0  , 8727 , 202, 160), // #1326
+  INST(Vphaddubd        , VexRm              , V(XOP_M9,D2,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8736 , 204, 151), // #1327
+  INST(Vphaddubq        , VexRm              , V(XOP_M9,D3,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8746 , 204, 151), // #1328
+  INST(Vphaddubw        , VexRm              , V(XOP_M9,D1,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8756 , 204, 151), // #1329
+  INST(Vphaddudq        , VexRm              , V(XOP_M9,DB,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8766 , 204, 151), // #1330
+  INST(Vphadduwd        , VexRm              , V(XOP_M9,D6,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8776 , 204, 151), // #1331
+  INST(Vphadduwq        , VexRm              , V(XOP_M9,D7,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8786 , 204, 151), // #1332
+  INST(Vphaddw          , VexRvm_Lx          , V(660F38,01,_,x,I,_,_,_  ), 0                         , 96 , 0  , 8796 , 202, 160), // #1333
+  INST(Vphaddwd         , VexRm              , V(XOP_M9,C6,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8804 , 204, 151), // #1334
+  INST(Vphaddwq         , VexRm              , V(XOP_M9,C7,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8813 , 204, 151), // #1335
+  INST(Vphminposuw      , VexRm              , V(660F38,41,_,0,I,_,_,_  ), 0                         , 96 , 0  , 8822 , 204, 135), // #1336
+  INST(Vphsubbw         , VexRm              , V(XOP_M9,E1,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8834 , 204, 151), // #1337
+  INST(Vphsubd          , VexRvm_Lx          , V(660F38,06,_,x,I,_,_,_  ), 0                         , 96 , 0  , 8843 , 202, 160), // #1338
+  INST(Vphsubdq         , VexRm              , V(XOP_M9,E3,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8851 , 204, 151), // #1339
+  INST(Vphsubsw         , VexRvm_Lx          , V(660F38,07,_,x,I,_,_,_  ), 0                         , 96 , 0  , 8860 , 202, 160), // #1340
+  INST(Vphsubw          , VexRvm_Lx          , V(660F38,05,_,x,I,_,_,_  ), 0                         , 96 , 0  , 8869 , 202, 160), // #1341
+  INST(Vphsubwd         , VexRm              , V(XOP_M9,E2,_,0,0,_,_,_  ), 0                         , 79 , 0  , 8877 , 204, 151), // #1342
+  INST(Vpinsrb          , VexRvmi            , V(660F3A,20,_,0,0,I,0,T1S), 0                         , 73 , 0  , 8886 , 385, 171), // #1343
+  INST(Vpinsrd          , VexRvmi            , V(660F3A,22,_,0,0,0,2,T1S), 0                         , 177, 0  , 8894 , 386, 172), // #1344
+  INST(Vpinsrq          , VexRvmi            , V(660F3A,22,_,0,1,1,3,T1S), 0                         , 213, 0  , 8902 , 387, 172), // #1345
+  INST(Vpinsrw          , VexRvmi            , V(660F00,C4,_,0,0,I,1,T1S), 0                         , 215, 0  , 8910 , 388, 171), // #1346
+  INST(Vplzcntd         , VexRm_Lx           , E(660F38,44,_,x,_,0,4,FV ), 0                         , 114, 0  , 8918 , 374, 165), // #1347
+  INST(Vplzcntq         , VexRm_Lx           , E(660F38,44,_,x,_,1,4,FV ), 0                         , 113, 0  , 8927 , 350, 165), // #1348
+  INST(Vpmacsdd         , VexRvmr            , V(XOP_M8,9E,_,0,0,_,_,_  ), 0                         , 208, 0  , 8936 , 389, 151), // #1349
+  INST(Vpmacsdqh        , VexRvmr            , V(XOP_M8,9F,_,0,0,_,_,_  ), 0                         , 208, 0  , 8945 , 389, 151), // #1350
+  INST(Vpmacsdql        , VexRvmr            , V(XOP_M8,97,_,0,0,_,_,_  ), 0                         , 208, 0  , 8955 , 389, 151), // #1351
+  INST(Vpmacssdd        , VexRvmr            , V(XOP_M8,8E,_,0,0,_,_,_  ), 0                         , 208, 0  , 8965 , 389, 151), // #1352
+  INST(Vpmacssdqh       , VexRvmr            , V(XOP_M8,8F,_,0,0,_,_,_  ), 0                         , 208, 0  , 8975 , 389, 151), // #1353
+  INST(Vpmacssdql       , VexRvmr            , V(XOP_M8,87,_,0,0,_,_,_  ), 0                         , 208, 0  , 8986 , 389, 151), // #1354
+  INST(Vpmacsswd        , VexRvmr            , V(XOP_M8,86,_,0,0,_,_,_  ), 0                         , 208, 0  , 8997 , 389, 151), // #1355
+  INST(Vpmacssww        , VexRvmr            , V(XOP_M8,85,_,0,0,_,_,_  ), 0                         , 208, 0  , 9007 , 389, 151), // #1356
+  INST(Vpmacswd         , VexRvmr            , V(XOP_M8,96,_,0,0,_,_,_  ), 0                         , 208, 0  , 9017 , 389, 151), // #1357
+  INST(Vpmacsww         , VexRvmr            , V(XOP_M8,95,_,0,0,_,_,_  ), 0                         , 208, 0  , 9026 , 389, 151), // #1358
+  INST(Vpmadcsswd       , VexRvmr            , V(XOP_M8,A6,_,0,0,_,_,_  ), 0                         , 208, 0  , 9035 , 389, 151), // #1359
+  INST(Vpmadcswd        , VexRvmr            , V(XOP_M8,B6,_,0,0,_,_,_  ), 0                         , 208, 0  , 9046 , 389, 151), // #1360
+  INST(Vpmadd52huq      , VexRvm_Lx          , E(660F38,B5,_,x,_,1,4,FV ), 0                         , 113, 0  , 9056 , 212, 173), // #1361
+  INST(Vpmadd52luq      , VexRvm_Lx          , E(660F38,B4,_,x,_,1,4,FV ), 0                         , 113, 0  , 9068 , 212, 173), // #1362
+  INST(Vpmaddubsw       , VexRvm_Lx          , V(660F38,04,_,x,I,I,4,FVM), 0                         , 110, 0  , 9080 , 315, 163), // #1363
+  INST(Vpmaddwd         , VexRvm_Lx          , V(660F00,F5,_,x,I,I,4,FVM), 0                         , 144, 0  , 9091 , 315, 163), // #1364
+  INST(Vpmaskmovd       , VexRvmMvr_Lx       , V(660F38,8C,_,x,0,_,_,_  ), V(660F38,8E,_,x,0,_,_,_  ), 96 , 120, 9100 , 323, 141), // #1365
+  INST(Vpmaskmovq       , VexRvmMvr_Lx       , V(660F38,8C,_,x,1,_,_,_  ), V(660F38,8E,_,x,1,_,_,_  ), 189, 121, 9111 , 323, 141), // #1366
+  INST(Vpmaxsb          , VexRvm_Lx          , V(660F38,3C,_,x,I,I,4,FVM), 0                         , 110, 0  , 9122 , 390, 163), // #1367
+  INST(Vpmaxsd          , VexRvm_Lx          , V(660F38,3D,_,x,I,0,4,FV ), 0                         , 110, 0  , 9130 , 211, 142), // #1368
+  INST(Vpmaxsq          , VexRvm_Lx          , E(660F38,3D,_,x,_,1,4,FV ), 0                         , 113, 0  , 9138 , 212, 138), // #1369
+  INST(Vpmaxsw          , VexRvm_Lx          , V(660F00,EE,_,x,I,I,4,FVM), 0                         , 144, 0  , 9146 , 390, 163), // #1370
+  INST(Vpmaxub          , VexRvm_Lx          , V(660F00,DE,_,x,I,I,4,FVM), 0                         , 144, 0  , 9154 , 390, 163), // #1371
+  INST(Vpmaxud          , VexRvm_Lx          , V(660F38,3F,_,x,I,0,4,FV ), 0                         , 110, 0  , 9162 , 211, 142), // #1372
+  INST(Vpmaxuq          , VexRvm_Lx          , E(660F38,3F,_,x,_,1,4,FV ), 0                         , 113, 0  , 9170 , 212, 138), // #1373
+  INST(Vpmaxuw          , VexRvm_Lx          , V(660F38,3E,_,x,I,I,4,FVM), 0                         , 110, 0  , 9178 , 390, 163), // #1374
+  INST(Vpminsb          , VexRvm_Lx          , V(660F38,38,_,x,I,I,4,FVM), 0                         , 110, 0  , 9186 , 390, 163), // #1375
+  INST(Vpminsd          , VexRvm_Lx          , V(660F38,39,_,x,I,0,4,FV ), 0                         , 110, 0  , 9194 , 211, 142), // #1376
+  INST(Vpminsq          , VexRvm_Lx          , E(660F38,39,_,x,_,1,4,FV ), 0                         , 113, 0  , 9202 , 212, 138), // #1377
+  INST(Vpminsw          , VexRvm_Lx          , V(660F00,EA,_,x,I,I,4,FVM), 0                         , 144, 0  , 9210 , 390, 163), // #1378
+  INST(Vpminub          , VexRvm_Lx          , V(660F00,DA,_,x,I,_,4,FVM), 0                         , 144, 0  , 9218 , 390, 163), // #1379
+  INST(Vpminud          , VexRvm_Lx          , V(660F38,3B,_,x,I,0,4,FV ), 0                         , 110, 0  , 9226 , 211, 142), // #1380
+  INST(Vpminuq          , VexRvm_Lx          , E(660F38,3B,_,x,_,1,4,FV ), 0                         , 113, 0  , 9234 , 212, 138), // #1381
+  INST(Vpminuw          , VexRvm_Lx          , V(660F38,3A,_,x,I,_,4,FVM), 0                         , 110, 0  , 9242 , 390, 163), // #1382
+  INST(Vpmovb2m         , VexRm_Lx           , E(F30F38,29,_,x,_,0,_,_  ), 0                         , 206, 0  , 9250 , 391, 146), // #1383
+  INST(Vpmovd2m         , VexRm_Lx           , E(F30F38,39,_,x,_,0,_,_  ), 0                         , 206, 0  , 9259 , 391, 140), // #1384
+  INST(Vpmovdb          , VexMr_Lx           , E(F30F38,31,_,x,_,0,2,QVM), 0                         , 216, 0  , 9268 , 392, 138), // #1385
+  INST(Vpmovdw          , VexMr_Lx           , E(F30F38,33,_,x,_,0,3,HVM), 0                         , 217, 0  , 9276 , 393, 138), // #1386
+  INST(Vpmovm2b         , VexRm_Lx           , E(F30F38,28,_,x,_,0,_,_  ), 0                         , 206, 0  , 9284 , 360, 146), // #1387
+  INST(Vpmovm2d         , VexRm_Lx           , E(F30F38,38,_,x,_,0,_,_  ), 0                         , 206, 0  , 9293 , 360, 140), // #1388
+  INST(Vpmovm2q         , VexRm_Lx           , E(F30F38,38,_,x,_,1,_,_  ), 0                         , 205, 0  , 9302 , 360, 140), // #1389
+  INST(Vpmovm2w         , VexRm_Lx           , E(F30F38,28,_,x,_,1,_,_  ), 0                         , 205, 0  , 9311 , 360, 146), // #1390
+  INST(Vpmovmskb        , VexRm_Lx           , V(660F00,D7,_,x,I,_,_,_  ), 0                         , 69 , 0  , 9320 , 336, 160), // #1391
+  INST(Vpmovq2m         , VexRm_Lx           , E(F30F38,39,_,x,_,1,_,_  ), 0                         , 205, 0  , 9330 , 391, 140), // #1392
+  INST(Vpmovqb          , VexMr_Lx           , E(F30F38,32,_,x,_,0,1,OVM), 0                         , 218, 0  , 9339 , 394, 138), // #1393
+  INST(Vpmovqd          , VexMr_Lx           , E(F30F38,35,_,x,_,0,3,HVM), 0                         , 217, 0  , 9347 , 393, 138), // #1394
+  INST(Vpmovqw          , VexMr_Lx           , E(F30F38,34,_,x,_,0,2,QVM), 0                         , 216, 0  , 9355 , 392, 138), // #1395
+  INST(Vpmovsdb         , VexMr_Lx           , E(F30F38,21,_,x,_,0,2,QVM), 0                         , 216, 0  , 9363 , 392, 138), // #1396
+  INST(Vpmovsdw         , VexMr_Lx           , E(F30F38,23,_,x,_,0,3,HVM), 0                         , 217, 0  , 9372 , 393, 138), // #1397
+  INST(Vpmovsqb         , VexMr_Lx           , E(F30F38,22,_,x,_,0,1,OVM), 0                         , 218, 0  , 9381 , 394, 138), // #1398
+  INST(Vpmovsqd         , VexMr_Lx           , E(F30F38,25,_,x,_,0,3,HVM), 0                         , 217, 0  , 9390 , 393, 138), // #1399
+  INST(Vpmovsqw         , VexMr_Lx           , E(F30F38,24,_,x,_,0,2,QVM), 0                         , 216, 0  , 9399 , 392, 138), // #1400
+  INST(Vpmovswb         , VexMr_Lx           , E(F30F38,20,_,x,_,0,3,HVM), 0                         , 217, 0  , 9408 , 393, 146), // #1401
+  INST(Vpmovsxbd        , VexRm_Lx           , V(660F38,21,_,x,I,I,2,QVM), 0                         , 219, 0  , 9417 , 395, 142), // #1402
+  INST(Vpmovsxbq        , VexRm_Lx           , V(660F38,22,_,x,I,I,1,OVM), 0                         , 220, 0  , 9427 , 396, 142), // #1403
+  INST(Vpmovsxbw        , VexRm_Lx           , V(660F38,20,_,x,I,I,3,HVM), 0                         , 139, 0  , 9437 , 397, 163), // #1404
+  INST(Vpmovsxdq        , VexRm_Lx           , V(660F38,25,_,x,I,0,3,HVM), 0                         , 139, 0  , 9447 , 397, 142), // #1405
+  INST(Vpmovsxwd        , VexRm_Lx           , V(660F38,23,_,x,I,I,3,HVM), 0                         , 139, 0  , 9457 , 397, 142), // #1406
+  INST(Vpmovsxwq        , VexRm_Lx           , V(660F38,24,_,x,I,I,2,QVM), 0                         , 219, 0  , 9467 , 395, 142), // #1407
+  INST(Vpmovusdb        , VexMr_Lx           , E(F30F38,11,_,x,_,0,2,QVM), 0                         , 216, 0  , 9477 , 392, 138), // #1408
+  INST(Vpmovusdw        , VexMr_Lx           , E(F30F38,13,_,x,_,0,3,HVM), 0                         , 217, 0  , 9487 , 393, 138), // #1409
+  INST(Vpmovusqb        , VexMr_Lx           , E(F30F38,12,_,x,_,0,1,OVM), 0                         , 218, 0  , 9497 , 394, 138), // #1410
+  INST(Vpmovusqd        , VexMr_Lx           , E(F30F38,15,_,x,_,0,3,HVM), 0                         , 217, 0  , 9507 , 393, 138), // #1411
+  INST(Vpmovusqw        , VexMr_Lx           , E(F30F38,14,_,x,_,0,2,QVM), 0                         , 216, 0  , 9517 , 392, 138), // #1412
+  INST(Vpmovuswb        , VexMr_Lx           , E(F30F38,10,_,x,_,0,3,HVM), 0                         , 217, 0  , 9527 , 393, 146), // #1413
+  INST(Vpmovw2m         , VexRm_Lx           , E(F30F38,29,_,x,_,1,_,_  ), 0                         , 205, 0  , 9537 , 391, 146), // #1414
+  INST(Vpmovwb          , VexMr_Lx           , E(F30F38,30,_,x,_,0,3,HVM), 0                         , 217, 0  , 9546 , 393, 146), // #1415
+  INST(Vpmovzxbd        , VexRm_Lx           , V(660F38,31,_,x,I,I,2,QVM), 0                         , 219, 0  , 9554 , 395, 142), // #1416
+  INST(Vpmovzxbq        , VexRm_Lx           , V(660F38,32,_,x,I,I,1,OVM), 0                         , 220, 0  , 9564 , 396, 142), // #1417
+  INST(Vpmovzxbw        , VexRm_Lx           , V(660F38,30,_,x,I,I,3,HVM), 0                         , 139, 0  , 9574 , 397, 163), // #1418
+  INST(Vpmovzxdq        , VexRm_Lx           , V(660F38,35,_,x,I,0,3,HVM), 0                         , 139, 0  , 9584 , 397, 142), // #1419
+  INST(Vpmovzxwd        , VexRm_Lx           , V(660F38,33,_,x,I,I,3,HVM), 0                         , 139, 0  , 9594 , 397, 142), // #1420
+  INST(Vpmovzxwq        , VexRm_Lx           , V(660F38,34,_,x,I,I,2,QVM), 0                         , 219, 0  , 9604 , 395, 142), // #1421
+  INST(Vpmuldq          , VexRvm_Lx          , V(660F38,28,_,x,I,1,4,FV ), 0                         , 209, 0  , 9614 , 208, 142), // #1422
+  INST(Vpmulhrsw        , VexRvm_Lx          , V(660F38,0B,_,x,I,I,4,FVM), 0                         , 110, 0  , 9622 , 315, 163), // #1423
+  INST(Vpmulhuw         , VexRvm_Lx          , V(660F00,E4,_,x,I,I,4,FVM), 0                         , 144, 0  , 9632 , 315, 163), // #1424
+  INST(Vpmulhw          , VexRvm_Lx          , V(660F00,E5,_,x,I,I,4,FVM), 0                         , 144, 0  , 9641 , 315, 163), // #1425
+  INST(Vpmulld          , VexRvm_Lx          , V(660F38,40,_,x,I,0,4,FV ), 0                         , 110, 0  , 9649 , 209, 142), // #1426
+  INST(Vpmullq          , VexRvm_Lx          , E(660F38,40,_,x,_,1,4,FV ), 0                         , 113, 0  , 9657 , 212, 140), // #1427
+  INST(Vpmullw          , VexRvm_Lx          , V(660F00,D5,_,x,I,I,4,FVM), 0                         , 144, 0  , 9665 , 315, 163), // #1428
+  INST(Vpmultishiftqb   , VexRvm_Lx          , E(660F38,83,_,x,_,1,4,FV ), 0                         , 113, 0  , 9673 , 212, 170), // #1429
+  INST(Vpmuludq         , VexRvm_Lx          , V(660F00,F4,_,x,I,1,4,FV ), 0                         , 103, 0  , 9688 , 208, 142), // #1430
+  INST(Vpopcntb         , VexRm_Lx           , E(660F38,54,_,x,_,0,4,FV ), 0                         , 114, 0  , 9697 , 279, 174), // #1431
+  INST(Vpopcntd         , VexRm_Lx           , E(660F38,55,_,x,_,0,4,FVM), 0                         , 114, 0  , 9706 , 374, 175), // #1432
+  INST(Vpopcntq         , VexRm_Lx           , E(660F38,55,_,x,_,1,4,FVM), 0                         , 113, 0  , 9715 , 350, 175), // #1433
+  INST(Vpopcntw         , VexRm_Lx           , E(660F38,54,_,x,_,1,4,FV ), 0                         , 113, 0  , 9724 , 279, 174), // #1434
+  INST(Vpor             , VexRvm_Lx          , V(660F00,EB,_,x,I,_,_,_  ), 0                         , 69 , 0  , 9733 , 351, 160), // #1435
+  INST(Vpord            , VexRvm_Lx          , E(660F00,EB,_,x,_,0,4,FV ), 0                         , 198, 0  , 9738 , 352, 138), // #1436
+  INST(Vporq            , VexRvm_Lx          , E(660F00,EB,_,x,_,1,4,FV ), 0                         , 135, 0  , 9744 , 356, 138), // #1437
+  INST(Vpperm           , VexRvrmRvmr        , V(XOP_M8,A3,_,0,x,_,_,_  ), 0                         , 208, 0  , 9750 , 398, 151), // #1438
+  INST(Vprold           , VexVmi_Lx          , E(660F00,72,1,x,_,0,4,FV ), 0                         , 221, 0  , 9757 , 399, 138), // #1439
+  INST(Vprolq           , VexVmi_Lx          , E(660F00,72,1,x,_,1,4,FV ), 0                         , 222, 0  , 9764 , 400, 138), // #1440
+  INST(Vprolvd          , VexRvm_Lx          , E(660F38,15,_,x,_,0,4,FV ), 0                         , 114, 0  , 9771 , 213, 138), // #1441
+  INST(Vprolvq          , VexRvm_Lx          , E(660F38,15,_,x,_,1,4,FV ), 0                         , 113, 0  , 9779 , 212, 138), // #1442
+  INST(Vprord           , VexVmi_Lx          , E(660F00,72,0,x,_,0,4,FV ), 0                         , 198, 0  , 9787 , 399, 138), // #1443
+  INST(Vprorq           , VexVmi_Lx          , E(660F00,72,0,x,_,1,4,FV ), 0                         , 135, 0  , 9794 , 400, 138), // #1444
+  INST(Vprorvd          , VexRvm_Lx          , E(660F38,14,_,x,_,0,4,FV ), 0                         , 114, 0  , 9801 , 213, 138), // #1445
+  INST(Vprorvq          , VexRvm_Lx          , E(660F38,14,_,x,_,1,4,FV ), 0                         , 113, 0  , 9809 , 212, 138), // #1446
+  INST(Vprotb           , VexRvmRmvRmi       , V(XOP_M9,90,_,0,x,_,_,_  ), V(XOP_M8,C0,_,0,x,_,_,_  ), 79 , 122, 9817 , 401, 151), // #1447
+  INST(Vprotd           , VexRvmRmvRmi       , V(XOP_M9,92,_,0,x,_,_,_  ), V(XOP_M8,C2,_,0,x,_,_,_  ), 79 , 123, 9824 , 401, 151), // #1448
+  INST(Vprotq           , VexRvmRmvRmi       , V(XOP_M9,93,_,0,x,_,_,_  ), V(XOP_M8,C3,_,0,x,_,_,_  ), 79 , 124, 9831 , 401, 151), // #1449
+  INST(Vprotw           , VexRvmRmvRmi       , V(XOP_M9,91,_,0,x,_,_,_  ), V(XOP_M8,C1,_,0,x,_,_,_  ), 79 , 125, 9838 , 401, 151), // #1450
+  INST(Vpsadbw          , VexRvm_Lx          , V(660F00,F6,_,x,I,I,4,FVM), 0                         , 144, 0  , 9845 , 203, 163), // #1451
+  INST(Vpscatterdd      , VexMr_VM           , E(660F38,A0,_,x,_,0,2,T1S), 0                         , 129, 0  , 9853 , 402, 138), // #1452
+  INST(Vpscatterdq      , VexMr_VM           , E(660F38,A0,_,x,_,1,3,T1S), 0                         , 128, 0  , 9865 , 403, 138), // #1453
+  INST(Vpscatterqd      , VexMr_VM           , E(660F38,A1,_,x,_,0,2,T1S), 0                         , 129, 0  , 9877 , 404, 138), // #1454
+  INST(Vpscatterqq      , VexMr_VM           , E(660F38,A1,_,x,_,1,3,T1S), 0                         , 128, 0  , 9889 , 405, 138), // #1455
+  INST(Vpshab           , VexRvmRmv          , V(XOP_M9,98,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9901 , 406, 151), // #1456
+  INST(Vpshad           , VexRvmRmv          , V(XOP_M9,9A,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9908 , 406, 151), // #1457
+  INST(Vpshaq           , VexRvmRmv          , V(XOP_M9,9B,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9915 , 406, 151), // #1458
+  INST(Vpshaw           , VexRvmRmv          , V(XOP_M9,99,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9922 , 406, 151), // #1459
+  INST(Vpshlb           , VexRvmRmv          , V(XOP_M9,94,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9929 , 406, 151), // #1460
+  INST(Vpshld           , VexRvmRmv          , V(XOP_M9,96,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9936 , 406, 151), // #1461
+  INST(Vpshldd          , VexRvmi_Lx         , E(660F3A,71,_,x,_,0,4,FV ), 0                         , 111, 0  , 9943 , 206, 168), // #1462
+  INST(Vpshldq          , VexRvmi_Lx         , E(660F3A,71,_,x,_,1,4,FV ), 0                         , 112, 0  , 9951 , 207, 168), // #1463
+  INST(Vpshldvd         , VexRvm_Lx          , E(660F38,71,_,x,_,0,4,FV ), 0                         , 114, 0  , 9959 , 213, 168), // #1464
+  INST(Vpshldvq         , VexRvm_Lx          , E(660F38,71,_,x,_,1,4,FV ), 0                         , 113, 0  , 9968 , 212, 168), // #1465
+  INST(Vpshldvw         , VexRvm_Lx          , E(660F38,70,_,x,_,1,4,FVM), 0                         , 113, 0  , 9977 , 357, 168), // #1466
+  INST(Vpshldw          , VexRvmi_Lx         , E(660F3A,70,_,x,_,1,4,FVM), 0                         , 112, 0  , 9986 , 275, 168), // #1467
+  INST(Vpshlq           , VexRvmRmv          , V(XOP_M9,97,_,0,x,_,_,_  ), 0                         , 79 , 0  , 9994 , 406, 151), // #1468
+  INST(Vpshlw           , VexRvmRmv          , V(XOP_M9,95,_,0,x,_,_,_  ), 0                         , 79 , 0  , 10001, 406, 151), // #1469
+  INST(Vpshrdd          , VexRvmi_Lx         , E(660F3A,73,_,x,_,0,4,FV ), 0                         , 111, 0  , 10008, 206, 168), // #1470
+  INST(Vpshrdq          , VexRvmi_Lx         , E(660F3A,73,_,x,_,1,4,FV ), 0                         , 112, 0  , 10016, 207, 168), // #1471
+  INST(Vpshrdvd         , VexRvm_Lx          , E(660F38,73,_,x,_,0,4,FV ), 0                         , 114, 0  , 10024, 213, 168), // #1472
+  INST(Vpshrdvq         , VexRvm_Lx          , E(660F38,73,_,x,_,1,4,FV ), 0                         , 113, 0  , 10033, 212, 168), // #1473
+  INST(Vpshrdvw         , VexRvm_Lx          , E(660F38,72,_,x,_,1,4,FVM), 0                         , 113, 0  , 10042, 357, 168), // #1474
+  INST(Vpshrdw          , VexRvmi_Lx         , E(660F3A,72,_,x,_,1,4,FVM), 0                         , 112, 0  , 10051, 275, 168), // #1475
+  INST(Vpshufb          , VexRvm_Lx          , V(660F38,00,_,x,I,I,4,FVM), 0                         , 110, 0  , 10059, 315, 163), // #1476
+  INST(Vpshufbitqmb     , VexRvm_Lx          , E(660F38,8F,_,x,0,0,4,FVM), 0                         , 114, 0  , 10067, 407, 174), // #1477
+  INST(Vpshufd          , VexRmi_Lx          , V(660F00,70,_,x,I,0,4,FV ), 0                         , 144, 0  , 10080, 408, 142), // #1478
+  INST(Vpshufhw         , VexRmi_Lx          , V(F30F00,70,_,x,I,I,4,FVM), 0                         , 161, 0  , 10088, 409, 163), // #1479
+  INST(Vpshuflw         , VexRmi_Lx          , V(F20F00,70,_,x,I,I,4,FVM), 0                         , 223, 0  , 10097, 409, 163), // #1480
+  INST(Vpsignb          , VexRvm_Lx          , V(660F38,08,_,x,I,_,_,_  ), 0                         , 96 , 0  , 10106, 202, 160), // #1481
+  INST(Vpsignd          , VexRvm_Lx          , V(660F38,0A,_,x,I,_,_,_  ), 0                         , 96 , 0  , 10114, 202, 160), // #1482
+  INST(Vpsignw          , VexRvm_Lx          , V(660F38,09,_,x,I,_,_,_  ), 0                         , 96 , 0  , 10122, 202, 160), // #1483
+  INST(Vpslld           , VexRvmVmi_Lx_MEvex , V(660F00,F2,_,x,I,0,4,128), V(660F00,72,6,x,I,0,4,FV ), 224, 126, 10130, 410, 142), // #1484
+  INST(Vpslldq          , VexVmi_Lx_MEvex    , V(660F00,73,7,x,I,I,4,FVM), 0                         , 225, 0  , 10137, 411, 163), // #1485
+  INST(Vpsllq           , VexRvmVmi_Lx_MEvex , V(660F00,F3,_,x,I,1,4,128), V(660F00,73,6,x,I,1,4,FV ), 226, 127, 10145, 412, 142), // #1486
+  INST(Vpsllvd          , VexRvm_Lx          , V(660F38,47,_,x,0,0,4,FV ), 0                         , 110, 0  , 10152, 209, 152), // #1487
+  INST(Vpsllvq          , VexRvm_Lx          , V(660F38,47,_,x,1,1,4,FV ), 0                         , 182, 0  , 10160, 208, 152), // #1488
+  INST(Vpsllvw          , VexRvm_Lx          , E(660F38,12,_,x,_,1,4,FVM), 0                         , 113, 0  , 10168, 357, 146), // #1489
+  INST(Vpsllw           , VexRvmVmi_Lx_MEvex , V(660F00,F1,_,x,I,I,4,128), V(660F00,71,6,x,I,I,4,FVM), 224, 128, 10176, 413, 163), // #1490
+  INST(Vpsrad           , VexRvmVmi_Lx_MEvex , V(660F00,E2,_,x,I,0,4,128), V(660F00,72,4,x,I,0,4,FV ), 224, 129, 10183, 410, 142), // #1491
+  INST(Vpsraq           , VexRvmVmi_Lx_MEvex , E(660F00,E2,_,x,_,1,4,128), E(660F00,72,4,x,_,1,4,FV ), 227, 130, 10190, 414, 138), // #1492
+  INST(Vpsravd          , VexRvm_Lx          , V(660F38,46,_,x,0,0,4,FV ), 0                         , 110, 0  , 10197, 209, 152), // #1493
+  INST(Vpsravq          , VexRvm_Lx          , E(660F38,46,_,x,_,1,4,FV ), 0                         , 113, 0  , 10205, 212, 138), // #1494
+  INST(Vpsravw          , VexRvm_Lx          , E(660F38,11,_,x,_,1,4,FVM), 0                         , 113, 0  , 10213, 357, 146), // #1495
+  INST(Vpsraw           , VexRvmVmi_Lx_MEvex , V(660F00,E1,_,x,I,I,4,128), V(660F00,71,4,x,I,I,4,FVM), 224, 131, 10221, 413, 163), // #1496
+  INST(Vpsrld           , VexRvmVmi_Lx_MEvex , V(660F00,D2,_,x,I,0,4,128), V(660F00,72,2,x,I,0,4,FV ), 224, 132, 10228, 410, 142), // #1497
+  INST(Vpsrldq          , VexVmi_Lx_MEvex    , V(660F00,73,3,x,I,I,4,FVM), 0                         , 228, 0  , 10235, 411, 163), // #1498
+  INST(Vpsrlq           , VexRvmVmi_Lx_MEvex , V(660F00,D3,_,x,I,1,4,128), V(660F00,73,2,x,I,1,4,FV ), 226, 133, 10243, 412, 142), // #1499
+  INST(Vpsrlvd          , VexRvm_Lx          , V(660F38,45,_,x,0,0,4,FV ), 0                         , 110, 0  , 10250, 209, 152), // #1500
+  INST(Vpsrlvq          , VexRvm_Lx          , V(660F38,45,_,x,1,1,4,FV ), 0                         , 182, 0  , 10258, 208, 152), // #1501
+  INST(Vpsrlvw          , VexRvm_Lx          , E(660F38,10,_,x,_,1,4,FVM), 0                         , 113, 0  , 10266, 357, 146), // #1502
+  INST(Vpsrlw           , VexRvmVmi_Lx_MEvex , V(660F00,D1,_,x,I,I,4,128), V(660F00,71,2,x,I,I,4,FVM), 224, 134, 10274, 413, 163), // #1503
+  INST(Vpsubb           , VexRvm_Lx          , V(660F00,F8,_,x,I,I,4,FVM), 0                         , 144, 0  , 10281, 415, 163), // #1504
+  INST(Vpsubd           , VexRvm_Lx          , V(660F00,FA,_,x,I,0,4,FV ), 0                         , 144, 0  , 10288, 416, 142), // #1505
+  INST(Vpsubq           , VexRvm_Lx          , V(660F00,FB,_,x,I,1,4,FV ), 0                         , 103, 0  , 10295, 417, 142), // #1506
+  INST(Vpsubsb          , VexRvm_Lx          , V(660F00,E8,_,x,I,I,4,FVM), 0                         , 144, 0  , 10302, 415, 163), // #1507
+  INST(Vpsubsw          , VexRvm_Lx          , V(660F00,E9,_,x,I,I,4,FVM), 0                         , 144, 0  , 10310, 415, 163), // #1508
+  INST(Vpsubusb         , VexRvm_Lx          , V(660F00,D8,_,x,I,I,4,FVM), 0                         , 144, 0  , 10318, 415, 163), // #1509
+  INST(Vpsubusw         , VexRvm_Lx          , V(660F00,D9,_,x,I,I,4,FVM), 0                         , 144, 0  , 10327, 415, 163), // #1510
+  INST(Vpsubw           , VexRvm_Lx          , V(660F00,F9,_,x,I,I,4,FVM), 0                         , 144, 0  , 10336, 415, 163), // #1511
+  INST(Vpternlogd       , VexRvmi_Lx         , E(660F3A,25,_,x,_,0,4,FV ), 0                         , 111, 0  , 10343, 206, 138), // #1512
+  INST(Vpternlogq       , VexRvmi_Lx         , E(660F3A,25,_,x,_,1,4,FV ), 0                         , 112, 0  , 10354, 207, 138), // #1513
+  INST(Vptest           , VexRm_Lx           , V(660F38,17,_,x,I,_,_,_  ), 0                         , 96 , 0  , 10365, 298, 167), // #1514
+  INST(Vptestmb         , VexRvm_Lx          , E(660F38,26,_,x,_,0,4,FVM), 0                         , 114, 0  , 10372, 407, 146), // #1515
+  INST(Vptestmd         , VexRvm_Lx          , E(660F38,27,_,x,_,0,4,FV ), 0                         , 114, 0  , 10381, 418, 138), // #1516
+  INST(Vptestmq         , VexRvm_Lx          , E(660F38,27,_,x,_,1,4,FV ), 0                         , 113, 0  , 10390, 419, 138), // #1517
+  INST(Vptestmw         , VexRvm_Lx          , E(660F38,26,_,x,_,1,4,FVM), 0                         , 113, 0  , 10399, 407, 146), // #1518
+  INST(Vptestnmb        , VexRvm_Lx          , E(F30F38,26,_,x,_,0,4,FVM), 0                         , 132, 0  , 10408, 407, 146), // #1519
+  INST(Vptestnmd        , VexRvm_Lx          , E(F30F38,27,_,x,_,0,4,FV ), 0                         , 132, 0  , 10418, 418, 138), // #1520
+  INST(Vptestnmq        , VexRvm_Lx          , E(F30F38,27,_,x,_,1,4,FV ), 0                         , 229, 0  , 10428, 419, 138), // #1521
+  INST(Vptestnmw        , VexRvm_Lx          , E(F30F38,26,_,x,_,1,4,FVM), 0                         , 229, 0  , 10438, 407, 146), // #1522
+  INST(Vpunpckhbw       , VexRvm_Lx          , V(660F00,68,_,x,I,I,4,FVM), 0                         , 144, 0  , 10448, 315, 163), // #1523
+  INST(Vpunpckhdq       , VexRvm_Lx          , V(660F00,6A,_,x,I,0,4,FV ), 0                         , 144, 0  , 10459, 209, 142), // #1524
+  INST(Vpunpckhqdq      , VexRvm_Lx          , V(660F00,6D,_,x,I,1,4,FV ), 0                         , 103, 0  , 10470, 208, 142), // #1525
+  INST(Vpunpckhwd       , VexRvm_Lx          , V(660F00,69,_,x,I,I,4,FVM), 0                         , 144, 0  , 10482, 315, 163), // #1526
+  INST(Vpunpcklbw       , VexRvm_Lx          , V(660F00,60,_,x,I,I,4,FVM), 0                         , 144, 0  , 10493, 315, 163), // #1527
+  INST(Vpunpckldq       , VexRvm_Lx          , V(660F00,62,_,x,I,0,4,FV ), 0                         , 144, 0  , 10504, 209, 142), // #1528
+  INST(Vpunpcklqdq      , VexRvm_Lx          , V(660F00,6C,_,x,I,1,4,FV ), 0                         , 103, 0  , 10515, 208, 142), // #1529
+  INST(Vpunpcklwd       , VexRvm_Lx          , V(660F00,61,_,x,I,I,4,FVM), 0                         , 144, 0  , 10527, 315, 163), // #1530
+  INST(Vpxor            , VexRvm_Lx          , V(660F00,EF,_,x,I,_,_,_  ), 0                         , 69 , 0  , 10538, 353, 160), // #1531
+  INST(Vpxord           , VexRvm_Lx          , E(660F00,EF,_,x,_,0,4,FV ), 0                         , 198, 0  , 10544, 354, 138), // #1532
+  INST(Vpxorq           , VexRvm_Lx          , E(660F00,EF,_,x,_,1,4,FV ), 0                         , 135, 0  , 10551, 355, 138), // #1533
+  INST(Vrangepd         , VexRvmi_Lx         , E(660F3A,50,_,x,_,1,4,FV ), 0                         , 112, 0  , 10558, 285, 140), // #1534
+  INST(Vrangeps         , VexRvmi_Lx         , E(660F3A,50,_,x,_,0,4,FV ), 0                         , 111, 0  , 10567, 286, 140), // #1535
+  INST(Vrangesd         , VexRvmi            , E(660F3A,51,_,I,_,1,3,T1S), 0                         , 180, 0  , 10576, 287, 66 ), // #1536
+  INST(Vrangess         , VexRvmi            , E(660F3A,51,_,I,_,0,2,T1S), 0                         , 181, 0  , 10585, 288, 66 ), // #1537
+  INST(Vrcp14pd         , VexRm_Lx           , E(660F38,4C,_,x,_,1,4,FV ), 0                         , 113, 0  , 10594, 350, 138), // #1538
+  INST(Vrcp14ps         , VexRm_Lx           , E(660F38,4C,_,x,_,0,4,FV ), 0                         , 114, 0  , 10603, 374, 138), // #1539
+  INST(Vrcp14sd         , VexRvm             , E(660F38,4D,_,I,_,1,3,T1S), 0                         , 128, 0  , 10612, 420, 68 ), // #1540
+  INST(Vrcp14ss         , VexRvm             , E(660F38,4D,_,I,_,0,2,T1S), 0                         , 129, 0  , 10621, 421, 68 ), // #1541
+  INST(Vrcp28pd         , VexRm              , E(660F38,CA,_,2,_,1,4,FV ), 0                         , 170, 0  , 10630, 277, 147), // #1542
+  INST(Vrcp28ps         , VexRm              , E(660F38,CA,_,2,_,0,4,FV ), 0                         , 171, 0  , 10639, 278, 147), // #1543
+  INST(Vrcp28sd         , VexRvm             , E(660F38,CB,_,I,_,1,3,T1S), 0                         , 128, 0  , 10648, 308, 147), // #1544
+  INST(Vrcp28ss         , VexRvm             , E(660F38,CB,_,I,_,0,2,T1S), 0                         , 129, 0  , 10657, 309, 147), // #1545
+  INST(Vrcpph           , VexRm_Lx           , E(66MAP6,4C,_,_,_,0,4,FV ), 0                         , 183, 0  , 10666, 422, 134), // #1546
+  INST(Vrcpps           , VexRm_Lx           , V(000F00,53,_,x,I,_,_,_  ), 0                         , 72 , 0  , 10673, 298, 135), // #1547
+  INST(Vrcpsh           , VexRvm             , E(66MAP6,4D,_,_,_,0,1,T1S), 0                         , 185, 0  , 10680, 423, 134), // #1548
+  INST(Vrcpss           , VexRvm             , V(F30F00,53,_,I,I,_,_,_  ), 0                         , 199, 0  , 10687, 424, 135), // #1549
+  INST(Vreducepd        , VexRmi_Lx          , E(660F3A,56,_,x,_,1,4,FV ), 0                         , 112, 0  , 10694, 400, 140), // #1550
+  INST(Vreduceph        , VexRmi_Lx          , E(000F3A,56,_,_,_,0,4,FV ), 0                         , 123, 0  , 10704, 311, 132), // #1551
+  INST(Vreduceps        , VexRmi_Lx          , E(660F3A,56,_,x,_,0,4,FV ), 0                         , 111, 0  , 10714, 399, 140), // #1552
+  INST(Vreducesd        , VexRvmi            , E(660F3A,57,_,I,_,1,3,T1S), 0                         , 180, 0  , 10724, 425, 66 ), // #1553
+  INST(Vreducesh        , VexRvmi            , E(000F3A,57,_,_,_,0,1,T1S), 0                         , 188, 0  , 10734, 313, 134), // #1554
+  INST(Vreducess        , VexRvmi            , E(660F3A,57,_,I,_,0,2,T1S), 0                         , 181, 0  , 10744, 426, 66 ), // #1555
+  INST(Vrndscalepd      , VexRmi_Lx          , E(660F3A,09,_,x,_,1,4,FV ), 0                         , 112, 0  , 10754, 310, 138), // #1556
+  INST(Vrndscaleph      , VexRmi_Lx          , E(000F3A,08,_,_,_,0,4,FV ), 0                         , 123, 0  , 10766, 311, 132), // #1557
+  INST(Vrndscaleps      , VexRmi_Lx          , E(660F3A,08,_,x,_,0,4,FV ), 0                         , 111, 0  , 10778, 312, 138), // #1558
+  INST(Vrndscalesd      , VexRvmi            , E(660F3A,0B,_,I,_,1,3,T1S), 0                         , 180, 0  , 10790, 287, 68 ), // #1559
+  INST(Vrndscalesh      , VexRvmi            , E(000F3A,0A,_,_,_,0,1,T1S), 0                         , 188, 0  , 10802, 313, 134), // #1560
+  INST(Vrndscaless      , VexRvmi            , E(660F3A,0A,_,I,_,0,2,T1S), 0                         , 181, 0  , 10814, 288, 68 ), // #1561
+  INST(Vroundpd         , VexRmi_Lx          , V(660F3A,09,_,x,I,_,_,_  ), 0                         , 73 , 0  , 10826, 427, 135), // #1562
+  INST(Vroundps         , VexRmi_Lx          , V(660F3A,08,_,x,I,_,_,_  ), 0                         , 73 , 0  , 10835, 427, 135), // #1563
+  INST(Vroundsd         , VexRvmi            , V(660F3A,0B,_,I,I,_,_,_  ), 0                         , 73 , 0  , 10844, 428, 135), // #1564
+  INST(Vroundss         , VexRvmi            , V(660F3A,0A,_,I,I,_,_,_  ), 0                         , 73 , 0  , 10853, 429, 135), // #1565
+  INST(Vrsqrt14pd       , VexRm_Lx           , E(660F38,4E,_,x,_,1,4,FV ), 0                         , 113, 0  , 10862, 350, 138), // #1566
+  INST(Vrsqrt14ps       , VexRm_Lx           , E(660F38,4E,_,x,_,0,4,FV ), 0                         , 114, 0  , 10873, 374, 138), // #1567
+  INST(Vrsqrt14sd       , VexRvm             , E(660F38,4F,_,I,_,1,3,T1S), 0                         , 128, 0  , 10884, 420, 68 ), // #1568
+  INST(Vrsqrt14ss       , VexRvm             , E(660F38,4F,_,I,_,0,2,T1S), 0                         , 129, 0  , 10895, 421, 68 ), // #1569
+  INST(Vrsqrt28pd       , VexRm              , E(660F38,CC,_,2,_,1,4,FV ), 0                         , 170, 0  , 10906, 277, 147), // #1570
+  INST(Vrsqrt28ps       , VexRm              , E(660F38,CC,_,2,_,0,4,FV ), 0                         , 171, 0  , 10917, 278, 147), // #1571
+  INST(Vrsqrt28sd       , VexRvm             , E(660F38,CD,_,I,_,1,3,T1S), 0                         , 128, 0  , 10928, 308, 147), // #1572
+  INST(Vrsqrt28ss       , VexRvm             , E(660F38,CD,_,I,_,0,2,T1S), 0                         , 129, 0  , 10939, 309, 147), // #1573
+  INST(Vrsqrtph         , VexRm_Lx           , E(66MAP6,4E,_,_,_,0,4,FV ), 0                         , 183, 0  , 10950, 422, 132), // #1574
+  INST(Vrsqrtps         , VexRm_Lx           , V(000F00,52,_,x,I,_,_,_  ), 0                         , 72 , 0  , 10959, 298, 135), // #1575
+  INST(Vrsqrtsh         , VexRvm             , E(66MAP6,4F,_,_,_,0,1,T1S), 0                         , 185, 0  , 10968, 423, 134), // #1576
+  INST(Vrsqrtss         , VexRvm             , V(F30F00,52,_,I,I,_,_,_  ), 0                         , 199, 0  , 10977, 424, 135), // #1577
+  INST(Vscalefpd        , VexRvm_Lx          , E(660F38,2C,_,x,_,1,4,FV ), 0                         , 113, 0  , 10986, 430, 138), // #1578
+  INST(Vscalefph        , VexRvm_Lx          , E(66MAP6,2C,_,_,_,0,4,FV ), 0                         , 183, 0  , 10996, 197, 132), // #1579
+  INST(Vscalefps        , VexRvm_Lx          , E(660F38,2C,_,x,_,0,4,FV ), 0                         , 114, 0  , 11006, 284, 138), // #1580
+  INST(Vscalefsd        , VexRvm             , E(660F38,2D,_,I,_,1,3,T1S), 0                         , 128, 0  , 11016, 251, 68 ), // #1581
+  INST(Vscalefsh        , VexRvm             , E(66MAP6,2D,_,_,_,0,1,T1S), 0                         , 185, 0  , 11026, 200, 134), // #1582
+  INST(Vscalefss        , VexRvm             , E(660F38,2D,_,I,_,0,2,T1S), 0                         , 129, 0  , 11036, 259, 68 ), // #1583
+  INST(Vscatterdpd      , VexMr_VM           , E(660F38,A2,_,x,_,1,3,T1S), 0                         , 128, 0  , 11046, 403, 138), // #1584
+  INST(Vscatterdps      , VexMr_VM           , E(660F38,A2,_,x,_,0,2,T1S), 0                         , 129, 0  , 11058, 402, 138), // #1585
+  INST(Vscatterpf0dpd   , VexM_VM            , E(660F38,C6,5,2,_,1,3,T1S), 0                         , 230, 0  , 11070, 303, 153), // #1586
+  INST(Vscatterpf0dps   , VexM_VM            , E(660F38,C6,5,2,_,0,2,T1S), 0                         , 231, 0  , 11085, 304, 153), // #1587
+  INST(Vscatterpf0qpd   , VexM_VM            , E(660F38,C7,5,2,_,1,3,T1S), 0                         , 230, 0  , 11100, 305, 153), // #1588
+  INST(Vscatterpf0qps   , VexM_VM            , E(660F38,C7,5,2,_,0,2,T1S), 0                         , 231, 0  , 11115, 305, 153), // #1589
+  INST(Vscatterpf1dpd   , VexM_VM            , E(660F38,C6,6,2,_,1,3,T1S), 0                         , 232, 0  , 11130, 303, 153), // #1590
+  INST(Vscatterpf1dps   , VexM_VM            , E(660F38,C6,6,2,_,0,2,T1S), 0                         , 233, 0  , 11145, 304, 153), // #1591
+  INST(Vscatterpf1qpd   , VexM_VM            , E(660F38,C7,6,2,_,1,3,T1S), 0                         , 232, 0  , 11160, 305, 153), // #1592
+  INST(Vscatterpf1qps   , VexM_VM            , E(660F38,C7,6,2,_,0,2,T1S), 0                         , 233, 0  , 11175, 305, 153), // #1593
+  INST(Vscatterqpd      , VexMr_VM           , E(660F38,A3,_,x,_,1,3,T1S), 0                         , 128, 0  , 11190, 405, 138), // #1594
+  INST(Vscatterqps      , VexMr_VM           , E(660F38,A3,_,x,_,0,2,T1S), 0                         , 129, 0  , 11202, 404, 138), // #1595
+  INST(Vshuff32x4       , VexRvmi_Lx         , E(660F3A,23,_,x,_,0,4,FV ), 0                         , 111, 0  , 11214, 431, 138), // #1596
+  INST(Vshuff64x2       , VexRvmi_Lx         , E(660F3A,23,_,x,_,1,4,FV ), 0                         , 112, 0  , 11225, 432, 138), // #1597
+  INST(Vshufi32x4       , VexRvmi_Lx         , E(660F3A,43,_,x,_,0,4,FV ), 0                         , 111, 0  , 11236, 431, 138), // #1598
+  INST(Vshufi64x2       , VexRvmi_Lx         , E(660F3A,43,_,x,_,1,4,FV ), 0                         , 112, 0  , 11247, 432, 138), // #1599
+  INST(Vshufpd          , VexRvmi_Lx         , V(660F00,C6,_,x,I,1,4,FV ), 0                         , 103, 0  , 11258, 433, 131), // #1600
+  INST(Vshufps          , VexRvmi_Lx         , V(000F00,C6,_,x,I,0,4,FV ), 0                         , 105, 0  , 11266, 434, 131), // #1601
+  INST(Vsqrtpd          , VexRm_Lx           , V(660F00,51,_,x,I,1,4,FV ), 0                         , 103, 0  , 11274, 435, 131), // #1602
+  INST(Vsqrtph          , VexRm_Lx           , E(00MAP5,51,_,_,_,0,4,FV ), 0                         , 104, 0  , 11282, 246, 132), // #1603
+  INST(Vsqrtps          , VexRm_Lx           , V(000F00,51,_,x,I,0,4,FV ), 0                         , 105, 0  , 11290, 235, 131), // #1604
+  INST(Vsqrtsd          , VexRvm             , V(F20F00,51,_,I,I,1,3,T1S), 0                         , 106, 0  , 11298, 199, 133), // #1605
+  INST(Vsqrtsh          , VexRvm             , E(F3MAP5,51,_,_,_,0,1,T1S), 0                         , 107, 0  , 11306, 200, 134), // #1606
+  INST(Vsqrtss          , VexRvm             , V(F30F00,51,_,I,I,0,2,T1S), 0                         , 108, 0  , 11314, 201, 133), // #1607
+  INST(Vstmxcsr         , VexM               , V(000F00,AE,3,0,I,_,_,_  ), 0                         , 234, 0  , 11322, 321, 135), // #1608
+  INST(Vsubpd           , VexRvm_Lx          , V(660F00,5C,_,x,I,1,4,FV ), 0                         , 103, 0  , 11331, 196, 131), // #1609
+  INST(Vsubph           , VexRvm_Lx          , E(00MAP5,5C,_,_,_,0,4,FV ), 0                         , 104, 0  , 11338, 197, 132), // #1610
+  INST(Vsubps           , VexRvm_Lx          , V(000F00,5C,_,x,I,0,4,FV ), 0                         , 105, 0  , 11345, 198, 131), // #1611
+  INST(Vsubsd           , VexRvm             , V(F20F00,5C,_,I,I,1,3,T1S), 0                         , 106, 0  , 11352, 199, 133), // #1612
+  INST(Vsubsh           , VexRvm             , E(F3MAP5,5C,_,_,_,0,1,T1S), 0                         , 107, 0  , 11359, 200, 134), // #1613
+  INST(Vsubss           , VexRvm             , V(F30F00,5C,_,I,I,0,2,T1S), 0                         , 108, 0  , 11366, 201, 133), // #1614
+  INST(Vtestpd          , VexRm_Lx           , V(660F38,0F,_,x,0,_,_,_  ), 0                         , 96 , 0  , 11373, 298, 167), // #1615
+  INST(Vtestps          , VexRm_Lx           , V(660F38,0E,_,x,0,_,_,_  ), 0                         , 96 , 0  , 11381, 298, 167), // #1616
+  INST(Vucomisd         , VexRm              , V(660F00,2E,_,I,I,1,3,T1S), 0                         , 125, 0  , 11389, 229, 143), // #1617
+  INST(Vucomish         , VexRm              , E(00MAP5,2E,_,_,_,0,1,T1S), 0                         , 126, 0  , 11398, 230, 134), // #1618
+  INST(Vucomiss         , VexRm              , V(000F00,2E,_,I,I,0,2,T1S), 0                         , 127, 0  , 11407, 231, 143), // #1619
+  INST(Vunpckhpd        , VexRvm_Lx          , V(660F00,15,_,x,I,1,4,FV ), 0                         , 103, 0  , 11416, 208, 131), // #1620
+  INST(Vunpckhps        , VexRvm_Lx          , V(000F00,15,_,x,I,0,4,FV ), 0                         , 105, 0  , 11426, 209, 131), // #1621
+  INST(Vunpcklpd        , VexRvm_Lx          , V(660F00,14,_,x,I,1,4,FV ), 0                         , 103, 0  , 11436, 208, 131), // #1622
+  INST(Vunpcklps        , VexRvm_Lx          , V(000F00,14,_,x,I,0,4,FV ), 0                         , 105, 0  , 11446, 209, 131), // #1623
+  INST(Vxorpd           , VexRvm_Lx          , V(660F00,57,_,x,I,1,4,FV ), 0                         , 103, 0  , 11456, 417, 139), // #1624
+  INST(Vxorps           , VexRvm_Lx          , V(000F00,57,_,x,I,0,4,FV ), 0                         , 105, 0  , 11463, 416, 139), // #1625
+  INST(Vzeroall         , VexOp              , V(000F00,77,_,1,I,_,_,_  ), 0                         , 68 , 0  , 11470, 436, 135), // #1626
+  INST(Vzeroupper       , VexOp              , V(000F00,77,_,0,I,_,_,_  ), 0                         , 72 , 0  , 11479, 436, 135), // #1627
+  INST(Wbinvd           , X86Op              , O(000F00,09,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11490, 30 , 0  ), // #1628
+  INST(Wbnoinvd         , X86Op              , O(F30F00,09,_,_,_,_,_,_  ), 0                         , 6  , 0  , 11497, 30 , 176), // #1629
+  INST(Wrfsbase         , X86M               , O(F30F00,AE,2,_,x,_,_,_  ), 0                         , 235, 0  , 11506, 173, 111), // #1630
+  INST(Wrgsbase         , X86M               , O(F30F00,AE,3,_,x,_,_,_  ), 0                         , 236, 0  , 11515, 173, 111), // #1631
+  INST(Wrmsr            , X86Op              , O(000F00,30,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11524, 174, 112), // #1632
+  INST(Wrssd            , X86Mr              , O(000F38,F6,_,_,_,_,_,_  ), 0                         , 83 , 0  , 11530, 437, 56 ), // #1633
+  INST(Wrssq            , X86Mr              , O(000F38,F6,_,_,1,_,_,_  ), 0                         , 237, 0  , 11536, 438, 56 ), // #1634
+  INST(Wrussd           , X86Mr              , O(660F38,F5,_,_,_,_,_,_  ), 0                         , 2  , 0  , 11542, 437, 56 ), // #1635
+  INST(Wrussq           , X86Mr              , O(660F38,F5,_,_,1,_,_,_  ), 0                         , 238, 0  , 11549, 438, 56 ), // #1636
+  INST(Xabort           , X86Op_Mod11RM_I8   , O(000000,C6,7,_,_,_,_,_  ), 0                         , 27 , 0  , 11556, 80 , 177), // #1637
+  INST(Xadd             , X86Xadd            , O(000F00,C0,_,_,x,_,_,_  ), 0                         , 4  , 0  , 11563, 439, 38 ), // #1638
+  INST(Xbegin           , X86JmpRel          , O(000000,C7,7,_,_,_,_,_  ), 0                         , 27 , 0  , 11568, 440, 177), // #1639
+  INST(Xchg             , X86Xchg            , O(000000,86,_,_,x,_,_,_  ), 0                         , 0  , 0  , 462  , 441, 0  ), // #1640
+  INST(Xend             , X86Op              , O(000F01,D5,_,_,_,_,_,_  ), 0                         , 21 , 0  , 11575, 30 , 177), // #1641
+  INST(Xgetbv           , X86Op              , O(000F01,D0,_,_,_,_,_,_  ), 0                         , 21 , 0  , 11580, 174, 178), // #1642
+  INST(Xlatb            , X86Op              , O(000000,D7,_,_,_,_,_,_  ), 0                         , 0  , 0  , 11587, 30 , 0  ), // #1643
+  INST(Xor              , X86Arith           , O(000000,30,6,_,x,_,_,_  ), 0                         , 32 , 0  , 10540, 179, 1  ), // #1644
+  INST(Xorpd            , ExtRm              , O(660F00,57,_,_,_,_,_,_  ), 0                         , 3  , 0  , 11457, 151, 4  ), // #1645
+  INST(Xorps            , ExtRm              , O(000F00,57,_,_,_,_,_,_  ), 0                         , 4  , 0  , 11464, 151, 5  ), // #1646
+  INST(Xresldtrk        , X86Op              , O(F20F01,E9,_,_,_,_,_,_  ), 0                         , 92 , 0  , 11593, 30 , 179), // #1647
+  INST(Xrstor           , X86M_Only_EDX_EAX  , O(000F00,AE,5,_,_,_,_,_  ), 0                         , 77 , 0  , 1164 , 442, 178), // #1648
+  INST(Xrstor64         , X86M_Only_EDX_EAX  , O(000F00,AE,5,_,1,_,_,_  ), 0                         , 239, 0  , 1172 , 443, 178), // #1649
+  INST(Xrstors          , X86M_Only_EDX_EAX  , O(000F00,C7,3,_,_,_,_,_  ), 0                         , 78 , 0  , 11603, 442, 180), // #1650
+  INST(Xrstors64        , X86M_Only_EDX_EAX  , O(000F00,C7,3,_,1,_,_,_  ), 0                         , 240, 0  , 11611, 443, 180), // #1651
+  INST(Xsave            , X86M_Only_EDX_EAX  , O(000F00,AE,4,_,_,_,_,_  ), 0                         , 97 , 0  , 1182 , 442, 178), // #1652
+  INST(Xsave64          , X86M_Only_EDX_EAX  , O(000F00,AE,4,_,1,_,_,_  ), 0                         , 241, 0  , 1189 , 443, 178), // #1653
+  INST(Xsavec           , X86M_Only_EDX_EAX  , O(000F00,C7,4,_,_,_,_,_  ), 0                         , 97 , 0  , 11621, 442, 181), // #1654
+  INST(Xsavec64         , X86M_Only_EDX_EAX  , O(000F00,C7,4,_,1,_,_,_  ), 0                         , 241, 0  , 11628, 443, 181), // #1655
+  INST(Xsaveopt         , X86M_Only_EDX_EAX  , O(000F00,AE,6,_,_,_,_,_  ), 0                         , 80 , 0  , 11637, 442, 182), // #1656
+  INST(Xsaveopt64       , X86M_Only_EDX_EAX  , O(000F00,AE,6,_,1,_,_,_  ), 0                         , 242, 0  , 11646, 443, 182), // #1657
+  INST(Xsaves           , X86M_Only_EDX_EAX  , O(000F00,C7,5,_,_,_,_,_  ), 0                         , 77 , 0  , 11657, 442, 180), // #1658
+  INST(Xsaves64         , X86M_Only_EDX_EAX  , O(000F00,C7,5,_,1,_,_,_  ), 0                         , 239, 0  , 11664, 443, 180), // #1659
+  INST(Xsetbv           , X86Op              , O(000F01,D1,_,_,_,_,_,_  ), 0                         , 21 , 0  , 11673, 174, 178), // #1660
+  INST(Xsusldtrk        , X86Op              , O(F20F01,E8,_,_,_,_,_,_  ), 0                         , 92 , 0  , 11680, 30 , 179), // #1661
+  INST(Xtest            , X86Op              , O(000F01,D6,_,_,_,_,_,_  ), 0                         , 21 , 0  , 11690, 30 , 183)  // #1662
+  // ${InstInfo:End}
+};
+#undef NAME_DATA_INDEX
+#undef INST
+
+// x86::InstDB - Opcode Tables
+// ===========================
+
+// ${MainOpcodeTable:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const uint32_t InstDB::_mainOpcodeTable[] = {
+  O(000000,00,0,0,0,0,0,0   ), // #0 [ref=56x]
+  O(000000,00,2,0,0,0,0,0   ), // #1 [ref=4x]
+  O(660F38,00,0,0,0,0,0,0   ), // #2 [ref=43x]
+  O(660F00,00,0,0,0,0,0,0   ), // #3 [ref=38x]
+  O(000F00,00,0,0,0,0,0,0   ), // #4 [ref=231x]
+  O(F20F00,00,0,0,0,0,0,0   ), // #5 [ref=24x]
+  O(F30F00,00,0,0,0,0,0,0   ), // #6 [ref=29x]
+  O(F30F38,00,0,0,0,0,0,0   ), // #7 [ref=2x]
+  O(660F3A,00,0,0,0,0,0,0   ), // #8 [ref=22x]
+  O(000000,00,4,0,0,0,0,0   ), // #9 [ref=5x]
+  V(000F38,00,0,0,0,0,0,None), // #10 [ref=6x]
+  V(XOP_M9,00,1,0,0,0,0,None), // #11 [ref=3x]
+  V(XOP_M9,00,6,0,0,0,0,None), // #12 [ref=2x]
+  V(XOP_M9,00,5,0,0,0,0,None), // #13 [ref=1x]
+  V(XOP_M9,00,3,0,0,0,0,None), // #14 [ref=1x]
+  V(XOP_M9,00,2,0,0,0,0,None), // #15 [ref=1x]
+  V(000F38,00,3,0,0,0,0,None), // #16 [ref=1x]
+  V(000F38,00,2,0,0,0,0,None), // #17 [ref=1x]
+  V(000F38,00,1,0,0,0,0,None), // #18 [ref=1x]
+  O(660000,00,0,0,0,0,0,0   ), // #19 [ref=7x]
+  O(000000,00,0,0,1,0,0,0   ), // #20 [ref=3x]
+  O(000F01,00,0,0,0,0,0,0   ), // #21 [ref=29x]
+  O(000F00,00,7,0,0,0,0,0   ), // #22 [ref=5x]
+  O(660F00,00,7,0,0,0,0,0   ), // #23 [ref=1x]
+  O(F30F00,00,6,0,0,0,0,0   ), // #24 [ref=4x]
+  O(F30F01,00,0,0,0,0,0,0   ), // #25 [ref=9x]
+  O(660F00,00,6,0,0,0,0,0   ), // #26 [ref=3x]
+  O(000000,00,7,0,0,0,0,0   ), // #27 [ref=5x]
+  O(000F00,00,1,0,1,0,0,0   ), // #28 [ref=2x]
+  O(000F00,00,1,0,0,0,0,0   ), // #29 [ref=6x]
+  O(F20F38,00,0,0,0,0,0,0   ), // #30 [ref=2x]
+  O(000000,00,1,0,0,0,0,0   ), // #31 [ref=3x]
+  O(000000,00,6,0,0,0,0,0   ), // #32 [ref=3x]
+  O(F30F00,00,7,0,0,0,0,3   ), // #33 [ref=1x]
+  O(F30F00,00,7,0,0,0,0,2   ), // #34 [ref=1x]
+  O_FPU(00,D900,0)           , // #35 [ref=29x]
+  O_FPU(00,C000,0)           , // #36 [ref=1x]
+  O_FPU(00,DE00,0)           , // #37 [ref=7x]
+  O_FPU(00,0000,4)           , // #38 [ref=4x]
+  O_FPU(00,0000,6)           , // #39 [ref=4x]
+  O_FPU(9B,DB00,0)           , // #40 [ref=2x]
+  O_FPU(00,DA00,0)           , // #41 [ref=5x]
+  O_FPU(00,DB00,0)           , // #42 [ref=8x]
+  O_FPU(00,D000,2)           , // #43 [ref=1x]
+  O_FPU(00,DF00,0)           , // #44 [ref=2x]
+  O_FPU(00,D800,3)           , // #45 [ref=1x]
+  O_FPU(00,F000,6)           , // #46 [ref=1x]
+  O_FPU(00,F800,7)           , // #47 [ref=1x]
+  O_FPU(00,DD00,0)           , // #48 [ref=3x]
+  O_FPU(00,0000,0)           , // #49 [ref=4x]
+  O_FPU(00,0000,2)           , // #50 [ref=3x]
+  O_FPU(00,0000,3)           , // #51 [ref=3x]
+  O_FPU(00,0000,7)           , // #52 [ref=3x]
+  O_FPU(00,0000,1)           , // #53 [ref=2x]
+  O_FPU(00,0000,5)           , // #54 [ref=2x]
+  O_FPU(00,C800,1)           , // #55 [ref=1x]
+  O_FPU(9B,0000,6)           , // #56 [ref=2x]
+  O_FPU(9B,0000,7)           , // #57 [ref=2x]
+  O_FPU(00,E000,4)           , // #58 [ref=1x]
+  O_FPU(00,E800,5)           , // #59 [ref=1x]
+  O(000F00,00,0,0,1,0,0,0   ), // #60 [ref=3x]
+  O(F30F3A,00,0,0,0,0,0,0   ), // #61 [ref=1x]
+  O(000000,00,5,0,0,0,0,0   ), // #62 [ref=4x]
+  O(F30F00,00,5,0,0,0,0,0   ), // #63 [ref=2x]
+  O(F30F00,00,5,0,1,0,0,0   ), // #64 [ref=1x]
+  V(660F00,00,0,1,0,0,0,None), // #65 [ref=7x]
+  V(660F00,00,0,1,1,0,0,None), // #66 [ref=6x]
+  V(000F00,00,0,1,1,0,0,None), // #67 [ref=7x]
+  V(000F00,00,0,1,0,0,0,None), // #68 [ref=8x]
+  V(660F00,00,0,0,0,0,0,None), // #69 [ref=15x]
+  V(660F00,00,0,0,1,0,0,None), // #70 [ref=4x]
+  V(000F00,00,0,0,1,0,0,None), // #71 [ref=4x]
+  V(000F00,00,0,0,0,0,0,None), // #72 [ref=10x]
+  V(660F3A,00,0,0,0,0,0,None), // #73 [ref=47x]
+  V(660F3A,00,0,0,1,0,0,None), // #74 [ref=4x]
+  O(000000,00,3,0,0,0,0,0   ), // #75 [ref=4x]
+  O(000F00,00,2,0,0,0,0,0   ), // #76 [ref=5x]
+  O(000F00,00,5,0,0,0,0,0   ), // #77 [ref=4x]
+  O(000F00,00,3,0,0,0,0,0   ), // #78 [ref=5x]
+  V(XOP_M9,00,0,0,0,0,0,None), // #79 [ref=32x]
+  O(000F00,00,6,0,0,0,0,0   ), // #80 [ref=5x]
+  V(XOP_MA,00,0,0,0,0,0,None), // #81 [ref=1x]
+  V(XOP_MA,00,1,0,0,0,0,None), // #82 [ref=1x]
+  O(000F38,00,0,0,0,0,0,0   ), // #83 [ref=24x]
+  V(F20F38,00,0,0,0,0,0,None), // #84 [ref=6x]
+  O(000F3A,00,0,0,0,0,0,0   ), // #85 [ref=4x]
+  O(F30000,00,0,0,0,0,0,0   ), // #86 [ref=1x]
+  O(000F0F,00,0,0,0,0,0,0   ), // #87 [ref=26x]
+  V(F30F38,00,0,0,0,0,0,None), // #88 [ref=5x]
+  O(000F3A,00,0,0,1,0,0,0   ), // #89 [ref=1x]
+  O(660F3A,00,0,0,1,0,0,0   ), // #90 [ref=1x]
+  O(F30F00,00,4,0,0,0,0,0   ), // #91 [ref=1x]
+  O(F20F01,00,0,0,0,0,0,0   ), // #92 [ref=4x]
+  O(F30F00,00,1,0,0,0,0,0   ), // #93 [ref=3x]
+  O(F30F00,00,7,0,0,0,0,0   ), // #94 [ref=1x]
+  V(F20F3A,00,0,0,0,0,0,None), // #95 [ref=1x]
+  V(660F38,00,0,0,0,0,0,None), // #96 [ref=26x]
+  O(000F00,00,4,0,0,0,0,0   ), // #97 [ref=4x]
+  V(XOP_M9,00,7,0,0,0,0,None), // #98 [ref=1x]
+  V(XOP_M9,00,4,0,0,0,0,None), // #99 [ref=1x]
+  O(F20F00,00,6,0,0,0,0,0   ), // #100 [ref=1x]
+  E(F20F38,00,0,2,0,0,4,None), // #101 [ref=4x]
+  E(F20F38,00,0,0,0,0,4,None), // #102 [ref=2x]
+  V(660F00,00,0,0,0,1,4,ByLL), // #103 [ref=25x]
+  E(00MAP5,00,0,0,0,0,4,ByLL), // #104 [ref=10x]
+  V(000F00,00,0,0,0,0,4,ByLL), // #105 [ref=19x]
+  V(F20F00,00,0,0,0,1,3,None), // #106 [ref=10x]
+  E(F3MAP5,00,0,0,0,0,1,None), // #107 [ref=13x]
+  V(F30F00,00,0,0,0,0,2,None), // #108 [ref=12x]
+  V(F20F00,00,0,0,0,0,0,None), // #109 [ref=4x]
+  V(660F38,00,0,0,0,0,4,ByLL), // #110 [ref=50x]
+  E(660F3A,00,0,0,0,0,4,ByLL), // #111 [ref=17x]
+  E(660F3A,00,0,0,0,1,4,ByLL), // #112 [ref=18x]
+  E(660F38,00,0,0,0,1,4,ByLL), // #113 [ref=40x]
+  E(660F38,00,0,0,0,0,4,ByLL), // #114 [ref=25x]
+  V(660F38,00,0,1,0,0,0,None), // #115 [ref=2x]
+  E(660F38,00,0,0,0,0,3,None), // #116 [ref=2x]
+  E(660F38,00,0,0,0,0,4,None), // #117 [ref=2x]
+  E(660F38,00,0,2,0,0,5,None), // #118 [ref=2x]
+  E(660F38,00,0,0,0,1,4,None), // #119 [ref=2x]
+  E(660F38,00,0,2,0,1,5,None), // #120 [ref=2x]
+  V(660F38,00,0,0,0,1,3,None), // #121 [ref=2x]
+  V(660F38,00,0,0,0,0,2,None), // #122 [ref=14x]
+  E(000F3A,00,0,0,0,0,4,ByLL), // #123 [ref=5x]
+  E(F30F3A,00,0,0,0,0,1,None), // #124 [ref=1x]
+  V(660F00,00,0,0,0,1,3,None), // #125 [ref=5x]
+  E(00MAP5,00,0,0,0,0,1,None), // #126 [ref=2x]
+  V(000F00,00,0,0,0,0,2,None), // #127 [ref=2x]
+  E(660F38,00,0,0,0,1,3,None), // #128 [ref=14x]
+  E(660F38,00,0,0,0,0,2,None), // #129 [ref=14x]
+  V(F30F00,00,0,0,0,0,3,ByLL), // #130 [ref=1x]
+  E(F20F38,00,0,0,0,0,4,ByLL), // #131 [ref=2x]
+  E(F30F38,00,0,0,0,0,4,ByLL), // #132 [ref=4x]
+  V(F20F00,00,0,0,0,1,4,ByLL), // #133 [ref=1x]
+  E(66MAP5,00,0,0,0,1,4,ByLL), // #134 [ref=1x]
+  E(660F00,00,0,0,0,1,4,ByLL), // #135 [ref=10x]
+  E(000F00,00,0,0,0,1,4,ByLL), // #136 [ref=3x]
+  E(66MAP5,00,0,0,0,0,3,ByLL), // #137 [ref=1x]
+  E(00MAP5,00,0,0,0,0,2,ByLL), // #138 [ref=1x]
+  V(660F38,00,0,0,0,0,3,ByLL), // #139 [ref=7x]
+  E(66MAP6,00,0,0,0,0,3,ByLL), // #140 [ref=1x]
+  E(66MAP5,00,0,0,0,0,2,ByLL), // #141 [ref=4x]
+  E(00MAP5,00,0,0,0,0,3,ByLL), // #142 [ref=2x]
+  E(66MAP5,00,0,0,0,0,4,ByLL), // #143 [ref=3x]
+  V(660F00,00,0,0,0,0,4,ByLL), // #144 [ref=43x]
+  V(000F00,00,0,0,0,0,3,ByLL), // #145 [ref=1x]
+  V(660F3A,00,0,0,0,0,3,ByLL), // #146 [ref=1x]
+  E(660F00,00,0,0,0,0,3,ByLL), // #147 [ref=4x]
+  E(000F00,00,0,0,0,0,4,ByLL), // #148 [ref=2x]
+  E(F30F00,00,0,0,0,1,4,ByLL), // #149 [ref=3x]
+  E(00MAP5,00,0,0,0,1,4,ByLL), // #150 [ref=1x]
+  E(F2MAP5,00,0,0,0,1,3,None), // #151 [ref=1x]
+  V(F20F00,00,0,0,0,0,3,None), // #152 [ref=2x]
+  E(F20F00,00,0,0,0,0,3,None), // #153 [ref=2x]
+  E(00MAP6,00,0,0,0,0,1,None), // #154 [ref=1x]
+  V(F20F00,00,0,0,0,0,2,T1W ), // #155 [ref=1x]
+  E(F3MAP5,00,0,0,0,0,2,T1W ), // #156 [ref=2x]
+  V(F30F00,00,0,0,0,0,2,T1W ), // #157 [ref=1x]
+  E(00MAP5,00,0,0,0,0,2,None), // #158 [ref=1x]
+  E(F30F00,00,0,0,0,0,2,None), // #159 [ref=2x]
+  E(F3MAP5,00,0,0,0,0,3,ByLL), // #160 [ref=1x]
+  V(F30F00,00,0,0,0,0,4,ByLL), // #161 [ref=4x]
+  E(F30F00,00,0,0,0,0,3,ByLL), // #162 [ref=1x]
+  E(F2MAP5,00,0,0,0,0,4,ByLL), // #163 [ref=2x]
+  E(F20F00,00,0,0,0,0,4,ByLL), // #164 [ref=2x]
+  E(F2MAP5,00,0,0,0,1,4,ByLL), // #165 [ref=1x]
+  E(F20F00,00,0,0,0,1,4,ByLL), // #166 [ref=2x]
+  E(F20F00,00,0,0,0,0,2,T1W ), // #167 [ref=1x]
+  E(F30F00,00,0,0,0,0,2,T1W ), // #168 [ref=1x]
+  E(F3MAP5,00,0,0,0,0,4,ByLL), // #169 [ref=1x]
+  E(660F38,00,0,2,0,1,4,ByLL), // #170 [ref=3x]
+  E(660F38,00,0,2,0,0,4,ByLL), // #171 [ref=3x]
+  V(660F3A,00,0,1,0,0,0,None), // #172 [ref=6x]
+  E(660F3A,00,0,0,0,0,4,None), // #173 [ref=4x]
+  E(660F3A,00,0,2,0,0,5,None), // #174 [ref=4x]
+  E(660F3A,00,0,0,0,1,4,None), // #175 [ref=4x]
+  E(660F3A,00,0,2,0,1,5,None), // #176 [ref=4x]
+  V(660F3A,00,0,0,0,0,2,None), // #177 [ref=4x]
+  E(F2MAP6,00,0,0,0,0,4,ByLL), // #178 [ref=2x]
+  E(F2MAP6,00,0,0,0,0,2,None), // #179 [ref=2x]
+  E(660F3A,00,0,0,0,1,3,None), // #180 [ref=6x]
+  E(660F3A,00,0,0,0,0,2,None), // #181 [ref=6x]
+  V(660F38,00,0,0,1,1,4,ByLL), // #182 [ref=20x]
+  E(66MAP6,00,0,0,0,0,4,ByLL), // #183 [ref=22x]
+  V(660F38,00,0,0,1,1,3,None), // #184 [ref=12x]
+  E(66MAP6,00,0,0,0,0,1,None), // #185 [ref=16x]
+  E(F3MAP6,00,0,0,0,0,4,ByLL), // #186 [ref=2x]
+  E(F3MAP6,00,0,0,0,0,2,None), // #187 [ref=2x]
+  E(000F3A,00,0,0,0,0,1,None), // #188 [ref=4x]
+  V(660F38,00,0,0,1,0,0,None), // #189 [ref=5x]
+  E(660F38,00,1,2,0,1,3,None), // #190 [ref=2x]
+  E(660F38,00,1,2,0,0,2,None), // #191 [ref=2x]
+  E(660F38,00,2,2,0,1,3,None), // #192 [ref=2x]
+  E(660F38,00,2,2,0,0,2,None), // #193 [ref=2x]
+  V(660F3A,00,0,0,1,1,4,ByLL), // #194 [ref=2x]
+  V(000F00,00,2,0,0,0,0,None), // #195 [ref=1x]
+  V(660F00,00,0,0,0,0,2,None), // #196 [ref=1x]
+  V(F20F00,00,0,0,0,1,3,DUP ), // #197 [ref=1x]
+  E(660F00,00,0,0,0,0,4,ByLL), // #198 [ref=6x]
+  V(F30F00,00,0,0,0,0,0,None), // #199 [ref=3x]
+  E(F30F00,00,0,0,0,0,4,ByLL), // #200 [ref=1x]
+  V(000F00,00,0,0,0,0,3,None), // #201 [ref=2x]
+  E(66MAP5,00,0,0,0,0,1,None), // #202 [ref=1x]
+  E(F20F38,00,0,0,0,1,4,ByLL), // #203 [ref=1x]
+  V(660F3A,00,0,0,0,0,4,ByLL), // #204 [ref=2x]
+  E(F30F38,00,0,0,0,1,0,None), // #205 [ref=5x]
+  E(F30F38,00,0,0,0,0,0,None), // #206 [ref=5x]
+  V(660F38,00,0,0,0,0,1,None), // #207 [ref=1x]
+  V(XOP_M8,00,0,0,0,0,0,None), // #208 [ref=22x]
+  V(660F38,00,0,0,0,1,4,ByLL), // #209 [ref=4x]
+  E(660F38,00,0,0,0,0,0,None), // #210 [ref=2x]
+  E(660F38,00,0,0,0,1,1,None), // #211 [ref=2x]
+  E(660F38,00,0,0,1,1,4,ByLL), // #212 [ref=1x]
+  V(660F3A,00,0,0,1,1,3,None), // #213 [ref=2x]
+  V(660F3A,00,0,0,0,0,1,None), // #214 [ref=1x]
+  V(660F00,00,0,0,0,0,1,None), // #215 [ref=1x]
+  E(F30F38,00,0,0,0,0,2,ByLL), // #216 [ref=6x]
+  E(F30F38,00,0,0,0,0,3,ByLL), // #217 [ref=9x]
+  E(F30F38,00,0,0,0,0,1,ByLL), // #218 [ref=3x]
+  V(660F38,00,0,0,0,0,2,ByLL), // #219 [ref=4x]
+  V(660F38,00,0,0,0,0,1,ByLL), // #220 [ref=2x]
+  E(660F00,00,1,0,0,0,4,ByLL), // #221 [ref=1x]
+  E(660F00,00,1,0,0,1,4,ByLL), // #222 [ref=1x]
+  V(F20F00,00,0,0,0,0,4,ByLL), // #223 [ref=1x]
+  V(660F00,00,0,0,0,0,4,None), // #224 [ref=6x]
+  V(660F00,00,7,0,0,0,4,ByLL), // #225 [ref=1x]
+  V(660F00,00,0,0,0,1,4,None), // #226 [ref=2x]
+  E(660F00,00,0,0,0,1,4,None), // #227 [ref=1x]
+  V(660F00,00,3,0,0,0,4,ByLL), // #228 [ref=1x]
+  E(F30F38,00,0,0,0,1,4,ByLL), // #229 [ref=2x]
+  E(660F38,00,5,2,0,1,3,None), // #230 [ref=2x]
+  E(660F38,00,5,2,0,0,2,None), // #231 [ref=2x]
+  E(660F38,00,6,2,0,1,3,None), // #232 [ref=2x]
+  E(660F38,00,6,2,0,0,2,None), // #233 [ref=2x]
+  V(000F00,00,3,0,0,0,0,None), // #234 [ref=1x]
+  O(F30F00,00,2,0,0,0,0,0   ), // #235 [ref=1x]
+  O(F30F00,00,3,0,0,0,0,0   ), // #236 [ref=1x]
+  O(000F38,00,0,0,1,0,0,0   ), // #237 [ref=1x]
+  O(660F38,00,0,0,1,0,0,0   ), // #238 [ref=1x]
+  O(000F00,00,5,0,1,0,0,0   ), // #239 [ref=2x]
+  O(000F00,00,3,0,1,0,0,0   ), // #240 [ref=1x]
+  O(000F00,00,4,0,1,0,0,0   ), // #241 [ref=2x]
+  O(000F00,00,6,0,1,0,0,0   )  // #242 [ref=1x]
+};
+// ----------------------------------------------------------------------------
+// ${MainOpcodeTable:End}
+
+// ${AltOpcodeTable:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const uint32_t InstDB::_altOpcodeTable[] = {
+  O(000000,00,0,0,0,0,0,0   ), // #0 [ref=1514x]
+  O(660F00,1B,0,0,0,0,0,0   ), // #1 [ref=1x]
+  O(000F00,BA,4,0,0,0,0,0   ), // #2 [ref=1x]
+  O(000F00,BA,7,0,0,0,0,0   ), // #3 [ref=1x]
+  O(000F00,BA,6,0,0,0,0,0   ), // #4 [ref=1x]
+  O(000F00,BA,5,0,0,0,0,0   ), // #5 [ref=1x]
+  O(000000,48,0,0,0,0,0,0   ), // #6 [ref=1x]
+  O(660F00,78,0,0,0,0,0,0   ), // #7 [ref=1x]
+  O_FPU(00,00DF,5)           , // #8 [ref=1x]
+  O_FPU(00,00DF,7)           , // #9 [ref=1x]
+  O_FPU(00,00DD,1)           , // #10 [ref=1x]
+  O_FPU(00,00DB,5)           , // #11 [ref=1x]
+  O_FPU(00,DFE0,0)           , // #12 [ref=1x]
+  O(000000,DB,7,0,0,0,0,0   ), // #13 [ref=1x]
+  O_FPU(9B,DFE0,0)           , // #14 [ref=1x]
+  O(000000,E4,0,0,0,0,0,0   ), // #15 [ref=1x]
+  O(000000,40,0,0,0,0,0,0   ), // #16 [ref=1x]
+  O(F20F00,78,0,0,0,0,0,0   ), // #17 [ref=1x]
+  O(000000,77,0,0,0,0,0,0   ), // #18 [ref=2x]
+  O(000000,73,0,0,0,0,0,0   ), // #19 [ref=3x]
+  O(000000,72,0,0,0,0,0,0   ), // #20 [ref=3x]
+  O(000000,76,0,0,0,0,0,0   ), // #21 [ref=2x]
+  O(000000,74,0,0,0,0,0,0   ), // #22 [ref=2x]
+  O(000000,E3,0,0,0,0,0,0   ), // #23 [ref=1x]
+  O(000000,7F,0,0,0,0,0,0   ), // #24 [ref=2x]
+  O(000000,7D,0,0,0,0,0,0   ), // #25 [ref=2x]
+  O(000000,7C,0,0,0,0,0,0   ), // #26 [ref=2x]
+  O(000000,7E,0,0,0,0,0,0   ), // #27 [ref=2x]
+  O(000000,EB,0,0,0,0,0,0   ), // #28 [ref=1x]
+  O(000000,75,0,0,0,0,0,0   ), // #29 [ref=2x]
+  O(000000,71,0,0,0,0,0,0   ), // #30 [ref=1x]
+  O(000000,7B,0,0,0,0,0,0   ), // #31 [ref=2x]
+  O(000000,79,0,0,0,0,0,0   ), // #32 [ref=1x]
+  O(000000,70,0,0,0,0,0,0   ), // #33 [ref=1x]
+  O(000000,7A,0,0,0,0,0,0   ), // #34 [ref=2x]
+  O(000000,78,0,0,0,0,0,0   ), // #35 [ref=1x]
+  V(660F00,92,0,0,0,0,0,None), // #36 [ref=1x]
+  V(F20F00,92,0,0,0,0,0,None), // #37 [ref=1x]
+  V(F20F00,92,0,0,1,0,0,None), // #38 [ref=1x]
+  V(000F00,92,0,0,0,0,0,None), // #39 [ref=1x]
+  O(000000,9A,0,0,0,0,0,0   ), // #40 [ref=1x]
+  O(000000,EA,0,0,0,0,0,0   ), // #41 [ref=1x]
+  O(000000,E2,0,0,0,0,0,0   ), // #42 [ref=1x]
+  O(000000,E1,0,0,0,0,0,0   ), // #43 [ref=1x]
+  O(000000,E0,0,0,0,0,0,0   ), // #44 [ref=1x]
+  O(660F00,29,0,0,0,0,0,0   ), // #45 [ref=1x]
+  O(000F00,29,0,0,0,0,0,0   ), // #46 [ref=1x]
+  O(000F38,F1,0,0,0,0,0,0   ), // #47 [ref=1x]
+  O(000F00,7E,0,0,0,0,0,0   ), // #48 [ref=2x]
+  O(660F00,7F,0,0,0,0,0,0   ), // #49 [ref=1x]
+  O(F30F00,7F,0,0,0,0,0,0   ), // #50 [ref=1x]
+  O(660F00,17,0,0,0,0,0,0   ), // #51 [ref=1x]
+  O(000F00,17,0,0,0,0,0,0   ), // #52 [ref=1x]
+  O(660F00,13,0,0,0,0,0,0   ), // #53 [ref=1x]
+  O(000F00,13,0,0,0,0,0,0   ), // #54 [ref=1x]
+  O(660F00,E7,0,0,0,0,0,0   ), // #55 [ref=1x]
+  O(660F00,2B,0,0,0,0,0,0   ), // #56 [ref=1x]
+  O(000F00,2B,0,0,0,0,0,0   ), // #57 [ref=1x]
+  O(000F00,E7,0,0,0,0,0,0   ), // #58 [ref=1x]
+  O(F20F00,2B,0,0,0,0,0,0   ), // #59 [ref=1x]
+  O(F30F00,2B,0,0,0,0,0,0   ), // #60 [ref=1x]
+  O(F20F00,11,0,0,0,0,0,0   ), // #61 [ref=1x]
+  O(F30F00,11,0,0,0,0,0,0   ), // #62 [ref=1x]
+  O(660F00,11,0,0,0,0,0,0   ), // #63 [ref=1x]
+  O(000F00,11,0,0,0,0,0,0   ), // #64 [ref=1x]
+  O(000000,E6,0,0,0,0,0,0   ), // #65 [ref=1x]
+  O(000F3A,15,0,0,0,0,0,0   ), // #66 [ref=1x]
+  O(000000,58,0,0,0,0,0,0   ), // #67 [ref=1x]
+  O(000F00,72,6,0,0,0,0,0   ), // #68 [ref=1x]
+  O(660F00,73,7,0,0,0,0,0   ), // #69 [ref=1x]
+  O(000F00,73,6,0,0,0,0,0   ), // #70 [ref=1x]
+  O(000F00,71,6,0,0,0,0,0   ), // #71 [ref=1x]
+  O(000F00,72,4,0,0,0,0,0   ), // #72 [ref=1x]
+  O(000F00,71,4,0,0,0,0,0   ), // #73 [ref=1x]
+  O(000F00,72,2,0,0,0,0,0   ), // #74 [ref=1x]
+  O(660F00,73,3,0,0,0,0,0   ), // #75 [ref=1x]
+  O(000F00,73,2,0,0,0,0,0   ), // #76 [ref=1x]
+  O(000F00,71,2,0,0,0,0,0   ), // #77 [ref=1x]
+  O(000000,50,0,0,0,0,0,0   ), // #78 [ref=1x]
+  O(000000,F6,0,0,0,0,0,0   ), // #79 [ref=1x]
+  E(660F38,92,0,0,0,1,3,None), // #80 [ref=1x]
+  E(660F38,92,0,0,0,0,2,None), // #81 [ref=1x]
+  E(660F38,93,0,0,0,1,3,None), // #82 [ref=1x]
+  E(660F38,93,0,0,0,0,2,None), // #83 [ref=1x]
+  V(660F38,2F,0,0,0,0,0,None), // #84 [ref=1x]
+  V(660F38,2E,0,0,0,0,0,None), // #85 [ref=1x]
+  V(660F00,29,0,0,0,1,4,ByLL), // #86 [ref=1x]
+  V(000F00,29,0,0,0,0,4,ByLL), // #87 [ref=1x]
+  V(660F00,7E,0,0,0,0,2,None), // #88 [ref=1x]
+  V(660F00,7F,0,0,0,0,0,None), // #89 [ref=1x]
+  E(660F00,7F,0,0,0,0,4,ByLL), // #90 [ref=1x]
+  E(660F00,7F,0,0,0,1,4,ByLL), // #91 [ref=1x]
+  V(F30F00,7F,0,0,0,0,0,None), // #92 [ref=1x]
+  E(F20F00,7F,0,0,0,1,4,ByLL), // #93 [ref=1x]
+  E(F30F00,7F,0,0,0,0,4,ByLL), // #94 [ref=1x]
+  E(F30F00,7F,0,0,0,1,4,ByLL), // #95 [ref=1x]
+  E(F20F00,7F,0,0,0,0,4,ByLL), // #96 [ref=1x]
+  V(660F00,17,0,0,0,1,3,None), // #97 [ref=1x]
+  V(000F00,17,0,0,0,0,3,None), // #98 [ref=1x]
+  V(660F00,13,0,0,0,1,3,None), // #99 [ref=1x]
+  V(000F00,13,0,0,0,0,3,None), // #100 [ref=1x]
+  V(660F00,7E,0,0,0,1,3,None), // #101 [ref=1x]
+  V(F20F00,11,0,0,0,1,3,None), // #102 [ref=1x]
+  E(F3MAP5,11,0,0,0,0,1,None), // #103 [ref=1x]
+  V(F30F00,11,0,0,0,0,2,None), // #104 [ref=1x]
+  V(660F00,11,0,0,0,1,4,ByLL), // #105 [ref=1x]
+  V(000F00,11,0,0,0,0,4,ByLL), // #106 [ref=1x]
+  E(66MAP5,7E,0,0,0,0,1,None), // #107 [ref=1x]
+  E(660F38,7A,0,0,0,0,0,None), // #108 [ref=1x]
+  E(660F38,7C,0,0,0,0,0,None), // #109 [ref=1x]
+  E(660F38,7C,0,0,0,1,0,None), // #110 [ref=1x]
+  E(660F38,7B,0,0,0,0,0,None), // #111 [ref=1x]
+  V(660F3A,05,0,0,0,1,4,ByLL), // #112 [ref=1x]
+  V(660F3A,04,0,0,0,0,4,ByLL), // #113 [ref=1x]
+  V(660F3A,01,0,0,1,1,4,ByLL), // #114 [ref=1x]
+  V(660F3A,00,0,0,1,1,4,ByLL), // #115 [ref=1x]
+  E(660F38,90,0,0,0,0,2,None), // #116 [ref=1x]
+  E(660F38,90,0,0,0,1,3,None), // #117 [ref=1x]
+  E(660F38,91,0,0,0,0,2,None), // #118 [ref=1x]
+  E(660F38,91,0,0,0,1,3,None), // #119 [ref=1x]
+  V(660F38,8E,0,0,0,0,0,None), // #120 [ref=1x]
+  V(660F38,8E,0,0,1,0,0,None), // #121 [ref=1x]
+  V(XOP_M8,C0,0,0,0,0,0,None), // #122 [ref=1x]
+  V(XOP_M8,C2,0,0,0,0,0,None), // #123 [ref=1x]
+  V(XOP_M8,C3,0,0,0,0,0,None), // #124 [ref=1x]
+  V(XOP_M8,C1,0,0,0,0,0,None), // #125 [ref=1x]
+  V(660F00,72,6,0,0,0,4,ByLL), // #126 [ref=1x]
+  V(660F00,73,6,0,0,1,4,ByLL), // #127 [ref=1x]
+  V(660F00,71,6,0,0,0,4,ByLL), // #128 [ref=1x]
+  V(660F00,72,4,0,0,0,4,ByLL), // #129 [ref=1x]
+  E(660F00,72,4,0,0,1,4,ByLL), // #130 [ref=1x]
+  V(660F00,71,4,0,0,0,4,ByLL), // #131 [ref=1x]
+  V(660F00,72,2,0,0,0,4,ByLL), // #132 [ref=1x]
+  V(660F00,73,2,0,0,1,4,ByLL), // #133 [ref=1x]
+  V(660F00,71,2,0,0,0,4,ByLL)  // #134 [ref=1x]
+};
+// ----------------------------------------------------------------------------
+// ${AltOpcodeTable:End}
+
+#undef O
+#undef V
+#undef E
+#undef O_FPU
+
+// x86::InstDB - CommonInfoTable
+// =============================
+
+// ${InstCommonTable:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+#define F(VAL) uint32_t(InstDB::InstFlags::k##VAL)
+#define X(VAL) uint32_t(InstDB::Avx512Flags::k##VAL)
+#define CONTROL_FLOW(VAL) uint8_t(InstControlFlow::k##VAL)
+#define SAME_REG_HINT(VAL) uint8_t(InstSameRegHint::k##VAL)
+const InstDB::CommonInfo InstDB::_commonInfoTable[] = {
+  { 0                                                 , 0                             , 0  , 0 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #0 [ref=1x]
+  { 0                                                 , 0                             , 383, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #1 [ref=4x]
+  { 0                                                 , 0                             , 384, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #2 [ref=2x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 16 , 12, CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #3 [ref=2x]
+  { 0                                                 , 0                             , 180, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #4 [ref=2x]
+  { F(Vec)                                            , 0                             , 79 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #5 [ref=54x]
+  { F(Vec)                                            , 0                             , 106, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #6 [ref=19x]
+  { F(Vec)                                            , 0                             , 212, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #7 [ref=16x]
+  { F(Vec)                                            , 0                             , 221, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #8 [ref=20x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 28 , 11, CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #9 [ref=1x]
+  { F(Vex)                                            , 0                             , 275, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #10 [ref=3x]
+  { F(Vec)                                            , 0                             , 79 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #11 [ref=12x]
+  { 0                                                 , 0                             , 385, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #12 [ref=1x]
+  { F(Vex)                                            , 0                             , 277, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #13 [ref=5x]
+  { F(Vex)                                            , 0                             , 180, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #14 [ref=12x]
+  { F(Vec)                                            , 0                             , 386, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #15 [ref=4x]
+  { 0                                                 , 0                             , 279, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #16 [ref=3x]
+  { F(Mib)                                            , 0                             , 387, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #17 [ref=1x]
+  { 0                                                 , 0                             , 388, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #18 [ref=1x]
+  { 0                                                 , 0                             , 281, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #19 [ref=1x]
+  { F(Mib)                                            , 0                             , 389, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #20 [ref=1x]
+  { 0                                                 , 0                             , 283, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #21 [ref=1x]
+  { 0                                                 , 0                             , 179, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #22 [ref=35x]
+  { 0                                                 , 0                             , 390, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #23 [ref=3x]
+  { 0                                                 , 0                             , 123, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #24 [ref=1x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 123, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #25 [ref=3x]
+  { F(Rep)|F(RepIgnored)                              , 0                             , 285, 2 , CONTROL_FLOW(Call), SAME_REG_HINT(None)}, // #26 [ref=1x]
+  { 0                                                 , 0                             , 391, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #27 [ref=1x]
+  { 0                                                 , 0                             , 392, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #28 [ref=2x]
+  { 0                                                 , 0                             , 364, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #29 [ref=1x]
+  { 0                                                 , 0                             , 108, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #30 [ref=83x]
+  { 0                                                 , 0                             , 393, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #31 [ref=11x]
+  { 0                                                 , 0                             , 394, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #32 [ref=6x]
+  { 0                                                 , 0                             , 395, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #33 [ref=13x]
+  { 0                                                 , 0                             , 396, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #34 [ref=1x]
+  { 0                                                 , 0                             , 16 , 12, CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #35 [ref=1x]
+  { F(Rep)                                            , 0                             , 127, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #36 [ref=1x]
+  { F(Vec)                                            , 0                             , 397, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #37 [ref=2x]
+  { F(Vec)                                            , 0                             , 398, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #38 [ref=3x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 131, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #39 [ref=1x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 399, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #40 [ref=1x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 400, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #41 [ref=1x]
+  { 0                                                 , 0                             , 401, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #42 [ref=1x]
+  { 0                                                 , 0                             , 402, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #43 [ref=1x]
+  { 0                                                 , 0                             , 287, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #44 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 403, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #45 [ref=2x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 404, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #46 [ref=2x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 405, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #47 [ref=2x]
+  { F(Vec)                                            , 0                             , 406, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #48 [ref=2x]
+  { F(Vec)                                            , 0                             , 407, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #49 [ref=2x]
+  { F(Vec)                                            , 0                             , 408, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #50 [ref=2x]
+  { 0                                                 , 0                             , 409, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #51 [ref=1x]
+  { 0                                                 , 0                             , 410, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #52 [ref=2x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 289, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #53 [ref=2x]
+  { 0                                                 , 0                             , 39 , 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #54 [ref=3x]
+  { F(Mmx)                                            , 0                             , 108, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #55 [ref=1x]
+  { 0                                                 , 0                             , 291, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #56 [ref=2x]
+  { 0                                                 , 0                             , 411, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #57 [ref=1x]
+  { F(Vec)                                            , 0                             , 412, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #58 [ref=2x]
+  { F(Vec)                                            , 0                             , 293, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #59 [ref=1x]
+  { F(FpuM32)|F(FpuM64)                               , 0                             , 182, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #60 [ref=6x]
+  { 0                                                 , 0                             , 295, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #61 [ref=9x]
+  { F(FpuM80)                                         , 0                             , 413, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #62 [ref=2x]
+  { 0                                                 , 0                             , 296, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #63 [ref=13x]
+  { F(FpuM32)|F(FpuM64)                               , 0                             , 297, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #64 [ref=2x]
+  { F(FpuM16)|F(FpuM32)                               , 0                             , 414, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #65 [ref=9x]
+  { F(FpuM16)|F(FpuM32)|F(FpuM64)                     , 0                             , 415, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #66 [ref=3x]
+  { F(FpuM32)|F(FpuM64)|F(FpuM80)                     , 0                             , 416, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #67 [ref=2x]
+  { F(FpuM16)                                         , 0                             , 417, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #68 [ref=3x]
+  { 0                                                 , 0                             , 418, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #69 [ref=13x]
+  { F(FpuM16)                                         , 0                             , 419, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #70 [ref=2x]
+  { F(FpuM32)|F(FpuM64)                               , 0                             , 298, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #71 [ref=1x]
+  { 0                                                 , 0                             , 420, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #72 [ref=2x]
+  { 0                                                 , 0                             , 421, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #73 [ref=1x]
+  { 0                                                 , 0                             , 39 , 10, CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #74 [ref=1x]
+  { 0                                                 , 0                             , 422, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #75 [ref=1x]
+  { 0                                                 , 0                             , 423, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #76 [ref=2x]
+  { 0                                                 , 0                             , 348, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #77 [ref=3x]
+  { F(Rep)                                            , 0                             , 424, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #78 [ref=1x]
+  { F(Vec)                                            , 0                             , 299, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #79 [ref=1x]
+  { 0                                                 , 0                             , 425, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #80 [ref=2x]
+  { 0                                                 , 0                             , 426, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #81 [ref=8x]
+  { 0                                                 , 0                             , 301, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #82 [ref=3x]
+  { 0                                                 , 0                             , 303, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #83 [ref=1x]
+  { 0                                                 , 0                             , 108, 1 , CONTROL_FLOW(Return), SAME_REG_HINT(None)}, // #84 [ref=2x]
+  { 0                                                 , 0                             , 395, 1 , CONTROL_FLOW(Return), SAME_REG_HINT(None)}, // #85 [ref=1x]
+  { F(Rep)|F(RepIgnored)                              , 0                             , 305, 2 , CONTROL_FLOW(Branch), SAME_REG_HINT(None)}, // #86 [ref=30x]
+  { F(Rep)|F(RepIgnored)                              , 0                             , 307, 2 , CONTROL_FLOW(Branch), SAME_REG_HINT(None)}, // #87 [ref=1x]
+  { F(Rep)|F(RepIgnored)                              , 0                             , 309, 2 , CONTROL_FLOW(Jump), SAME_REG_HINT(None)}, // #88 [ref=1x]
+  { F(Vex)                                            , 0                             , 427, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #89 [ref=19x]
+  { F(Vex)                                            , 0                             , 311, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #90 [ref=1x]
+  { F(Vex)                                            , 0                             , 313, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #91 [ref=1x]
+  { F(Vex)                                            , 0                             , 315, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #92 [ref=1x]
+  { F(Vex)                                            , 0                             , 317, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #93 [ref=1x]
+  { F(Vex)                                            , 0                             , 428, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #94 [ref=12x]
+  { F(Vex)                                            , 0                             , 429, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #95 [ref=8x]
+  { F(Vex)                                            , 0                             , 427, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #96 [ref=8x]
+  { 0                                                 , 0                             , 430, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #97 [ref=2x]
+  { 0                                                 , 0                             , 319, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #98 [ref=1x]
+  { 0                                                 , 0                             , 321, 2 , CONTROL_FLOW(Call), SAME_REG_HINT(None)}, // #99 [ref=1x]
+  { F(Vec)                                            , 0                             , 230, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #100 [ref=2x]
+  { 0                                                 , 0                             , 431, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #101 [ref=2x]
+  { 0                                                 , 0                             , 323, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #102 [ref=2x]
+  { F(Vex)                                            , 0                             , 432, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #103 [ref=2x]
+  { 0                                                 , 0                             , 433, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #104 [ref=1x]
+  { 0                                                 , 0                             , 185, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #105 [ref=3x]
+  { 0                                                 , 0                             , 321, 2 , CONTROL_FLOW(Jump), SAME_REG_HINT(None)}, // #106 [ref=1x]
+  { 0                                                 , 0                             , 434, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #107 [ref=5x]
+  { F(Vex)                                            , 0                             , 435, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #108 [ref=2x]
+  { F(Rep)                                            , 0                             , 135, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #109 [ref=1x]
+  { 0                                                 , 0                             , 307, 2 , CONTROL_FLOW(Branch), SAME_REG_HINT(None)}, // #110 [ref=3x]
+  { 0                                                 , 0                             , 325, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #111 [ref=1x]
+  { F(Vex)                                            , 0                             , 436, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #112 [ref=2x]
+  { F(Vec)                                            , 0                             , 437, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #113 [ref=1x]
+  { F(Mmx)                                            , 0                             , 438, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #114 [ref=1x]
+  { 0                                                 , 0                             , 439, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #115 [ref=2x]
+  { F(XRelease)                                       , 0                             , 0  , 16, CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #116 [ref=1x]
+  { 0                                                 , 0                             , 49 , 9 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #117 [ref=1x]
+  { F(Vec)                                            , 0                             , 79 , 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #118 [ref=6x]
+  { 0                                                 , 0                             , 73 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #119 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 327, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #120 [ref=1x]
+  { 0                                                 , 0                             , 440, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #121 [ref=1x]
+  { 0                                                 , 0                             , 77 , 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #122 [ref=2x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 441, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #123 [ref=1x]
+  { F(Vec)                                            , 0                             , 294, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #124 [ref=2x]
+  { F(Vec)                                            , 0                             , 236, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #125 [ref=4x]
+  { F(Vec)                                            , 0                             , 442, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #126 [ref=2x]
+  { F(Vec)                                            , 0                             , 80 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #127 [ref=3x]
+  { F(Mmx)                                            , 0                             , 443, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #128 [ref=1x]
+  { F(Vec)                                            , 0                             , 107, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #129 [ref=1x]
+  { F(Vec)                                            , 0                             , 242, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #130 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 103, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #131 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 444, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #132 [ref=1x]
+  { F(Rep)                                            , 0                             , 139, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #133 [ref=1x]
+  { F(Vec)                                            , 0                             , 106, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #134 [ref=1x]
+  { F(Vec)                                            , 0                             , 329, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #135 [ref=1x]
+  { 0                                                 , 0                             , 331, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #136 [ref=2x]
+  { 0                                                 , 0                             , 333, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #137 [ref=1x]
+  { F(Vex)                                            , 0                             , 335, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #138 [ref=1x]
+  { 0                                                 , 0                             , 445, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #139 [ref=1x]
+  { 0                                                 , 0                             , 446, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #140 [ref=1x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 290, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #141 [ref=2x]
+  { 0                                                 , 0                             , 108, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #142 [ref=1x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 16 , 12, CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #143 [ref=1x]
+  { 0                                                 , 0                             , 447, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #144 [ref=1x]
+  { F(Rep)                                            , 0                             , 448, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #145 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 337, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #146 [ref=37x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 339, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #147 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 337, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #148 [ref=6x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 337, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #149 [ref=16x]
+  { F(Mmx)                                            , 0                             , 337, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #150 [ref=26x]
+  { F(Vec)                                            , 0                             , 79 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #151 [ref=4x]
+  { F(Vec)                                            , 0                             , 449, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #152 [ref=1x]
+  { F(Vec)                                            , 0                             , 450, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #153 [ref=1x]
+  { F(Vec)                                            , 0                             , 451, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #154 [ref=1x]
+  { F(Vec)                                            , 0                             , 452, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #155 [ref=1x]
+  { F(Vec)                                            , 0                             , 453, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #156 [ref=1x]
+  { F(Vec)                                            , 0                             , 454, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #157 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 341, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #158 [ref=1x]
+  { F(Vec)                                            , 0                             , 455, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #159 [ref=1x]
+  { F(Vec)                                            , 0                             , 456, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #160 [ref=1x]
+  { F(Vec)                                            , 0                             , 457, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #161 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 458, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #162 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 459, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #163 [ref=1x]
+  { F(Vec)                                            , 0                             , 263, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #164 [ref=2x]
+  { 0                                                 , 0                             , 143, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #165 [ref=1x]
+  { F(Mmx)                                            , 0                             , 339, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #166 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 343, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #167 [ref=8x]
+  { F(Vec)                                            , 0                             , 460, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #168 [ref=2x]
+  { 0                                                 , 0                             , 461, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #169 [ref=1x]
+  { F(Mmx)|F(Vec)                                     , 0                             , 345, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #170 [ref=3x]
+  { 0                                                 , 0                             , 147, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #171 [ref=1x]
+  { 0                                                 , 0                             , 462, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #172 [ref=8x]
+  { 0                                                 , 0                             , 463, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #173 [ref=4x]
+  { 0                                                 , 0                             , 464, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #174 [ref=8x]
+  { 0                                                 , 0                             , 347, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #175 [ref=1x]
+  { F(Rep)|F(RepIgnored)                              , 0                             , 349, 2 , CONTROL_FLOW(Return), SAME_REG_HINT(None)}, // #176 [ref=1x]
+  { 0                                                 , 0                             , 349, 2 , CONTROL_FLOW(Return), SAME_REG_HINT(None)}, // #177 [ref=1x]
+  { F(Vex)                                            , 0                             , 351, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #178 [ref=1x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 16 , 12, CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #179 [ref=3x]
+  { F(Rep)                                            , 0                             , 151, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #180 [ref=1x]
+  { 0                                                 , 0                             , 465, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #181 [ref=30x]
+  { 0                                                 , 0                             , 188, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #182 [ref=2x]
+  { 0                                                 , 0                             , 466, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #183 [ref=3x]
+  { F(Rep)                                            , 0                             , 155, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #184 [ref=1x]
+  { F(Vex)                                            , 0                             , 467, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #185 [ref=5x]
+  { 0                                                 , 0                             , 66 , 7 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #186 [ref=1x]
+  { F(Tsib)|F(Vex)                                    , 0                             , 468, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #187 [ref=2x]
+  { F(Vex)                                            , 0                             , 395, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #188 [ref=1x]
+  { F(Tsib)|F(Vex)                                    , 0                             , 469, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #189 [ref=1x]
+  { F(Vex)                                            , 0                             , 470, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #190 [ref=1x]
+  { 0                                                 , 0                             , 471, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #191 [ref=2x]
+  { 0                                                 , 0                             , 180, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #192 [ref=2x]
+  { 0                                                 , 0                             , 472, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #193 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(T4X)|X(Z)              , 473, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #194 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(T4X)|X(Z)              , 474, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #195 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #196 [ref=22x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(ER)|X(K)|X(SAE)|X(Z) , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #197 [ref=23x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #198 [ref=22x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(ER)|X(K)|X(SAE)|X(Z)        , 475, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #199 [ref=18x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(K)|X(SAE)|X(Z)        , 476, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #200 [ref=18x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(ER)|X(K)|X(SAE)|X(Z)        , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #201 [ref=17x]
+  { F(Vec)|F(Vex)                                     , 0                             , 191, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #202 [ref=15x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #203 [ref=5x]
+  { F(Vec)|F(Vex)                                     , 0                             , 79 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #204 [ref=17x]
+  { F(Vec)|F(Vex)                                     , 0                             , 221, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #205 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #206 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #207 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #208 [ref=10x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #209 [ref=12x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #210 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #211 [ref=6x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #212 [ref=19x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #213 [ref=12x]
+  { F(Vec)|F(Vex)                                     , 0                             , 194, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #214 [ref=6x]
+  { F(Vec)|F(Vex)                                     , 0                             , 353, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #215 [ref=3x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 478, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #216 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 479, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #217 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 480, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #218 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 481, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #219 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 482, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #220 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 479, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #221 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 483, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #222 [ref=1x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(B64)|X(K)|X(SAE)|X(Z)       , 197, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #223 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(SAE)            , 200, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #224 [ref=1x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(B32)|X(K)|X(SAE)|X(Z)       , 197, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #225 [ref=1x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(K)|X(SAE)|X(Z)              , 484, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #226 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)                   , 485, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #227 [ref=1x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(K)|X(SAE)|X(Z)              , 486, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #228 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(SAE)                        , 106, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #229 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(SAE)                        , 263, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #230 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(SAE)                        , 212, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #231 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 203, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #232 [ref=6x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #233 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 355, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #234 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #235 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 355, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #236 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 355, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #237 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 487, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #238 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #239 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 355, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #240 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(ER)|X(K)|X(SAE)|X(Z) , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #241 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(SAE)|X(Z)       , 212, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #242 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(SAE)|X(Z)              , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #243 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(SAE)|X(Z)       , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #244 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(ER)|X(K)|X(SAE)|X(Z) , 212, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #245 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(ER)|X(K)|X(SAE)|X(Z) , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #246 [ref=5x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #247 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(SAE)|X(Z)              , 215, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #248 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #249 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #250 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(K)|X(SAE)|X(Z)        , 475, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #251 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(ER)|X(SAE)                  , 406, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #252 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(SAE)                  , 406, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #253 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)|X(Z)              , 476, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #254 [ref=5x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(SAE)                  , 488, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #255 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(ER)|X(SAE)                  , 489, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #256 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(SAE)                  , 489, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #257 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(SAE)|X(Z)              , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #258 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(K)|X(SAE)|X(Z)        , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #259 [ref=6x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(ER)|X(SAE)                  , 408, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #260 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(ER)|X(SAE)                  , 408, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #261 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(SAE)|X(Z)       , 355, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #262 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(SAE)|X(Z)       , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #263 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(SAE)|X(Z)       , 355, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #264 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(SAE)|X(Z)       , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #265 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(SAE)|X(Z)       , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #266 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(SAE)|X(Z)       , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #267 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(SAE)|X(Z)       , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #268 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(SAE)                        , 406, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #269 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(SAE)                        , 406, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #270 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(SAE)                        , 488, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #271 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(SAE)                        , 408, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #272 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(SAE)                        , 408, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #273 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #274 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #275 [ref=3x]
+  { F(Vec)|F(Vex)                                     , 0                             , 194, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #276 [ref=9x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(SAE)|X(Z)       , 83 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #277 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(SAE)|X(Z)       , 83 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #278 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #279 [ref=8x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 216, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #280 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 490, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #281 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 217, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #282 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 412, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #283 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #284 [ref=5x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(SAE)|X(Z)       , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #285 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(SAE)|X(Z)       , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #286 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)|X(Z)              , 491, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #287 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)|X(Z)              , 492, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #288 [ref=4x]
+  { F(Vec)|F(Vex)                                     , 0                             , 159, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #289 [ref=13x]
+  { F(Vec)|F(Vex)                                     , 0                             , 357, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #290 [ref=4x]
+  { F(Vec)|F(Vex)                                     , 0                             , 359, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #291 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)                   , 493, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #292 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)                   , 493, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #293 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)                   , 493, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #294 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)                          , 494, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #295 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)                          , 495, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #296 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)                          , 496, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #297 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 209, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #298 [ref=7x]
+  { F(Vec)|F(Vex)                                     , 0                             , 106, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #299 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 212, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #300 [ref=1x]
+  { F(Evex)|F(EvexTwoOp)|F(Vec)|F(Vex)|F(Vsib)        , X(K)                          , 163, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #301 [ref=2x]
+  { F(Evex)|F(EvexTwoOp)|F(Vec)|F(Vex)|F(Vsib)        , X(K)                          , 113, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #302 [ref=2x]
+  { F(Evex)|F(Vsib)                                   , X(K)                          , 497, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #303 [ref=4x]
+  { F(Evex)|F(Vsib)                                   , X(K)                          , 498, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #304 [ref=4x]
+  { F(Evex)|F(Vsib)                                   , X(K)                          , 499, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #305 [ref=8x]
+  { F(Evex)|F(EvexTwoOp)|F(Vec)|F(Vex)|F(Vsib)        , X(K)                          , 118, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #306 [ref=2x]
+  { F(Evex)|F(EvexTwoOp)|F(Vec)|F(Vex)|F(Vsib)        , X(K)                          , 218, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #307 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)|X(Z)              , 475, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #308 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)|X(Z)              , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #309 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(SAE)|X(Z)       , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #310 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(SAE)|X(Z)       , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #311 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(SAE)|X(Z)       , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #312 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(SAE)|X(Z)              , 500, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #313 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #314 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #315 [ref=22x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 361, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #316 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 361, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #317 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 501, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #318 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 492, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #319 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 230, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #320 [ref=1x]
+  { F(Vex)                                            , 0                             , 431, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #321 [ref=2x]
+  { F(Vec)|F(Vex)                                     , 0                             , 437, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #322 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 167, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #323 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(SAE)|X(Z)       , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #324 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(SAE)|X(Z)       , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #325 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(SAE)|X(Z)       , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #326 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(SAE)|X(Z)              , 475, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #327 [ref=2x]
+  { 0                                                 , 0                             , 363, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #328 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 79 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #329 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 365, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #330 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 224, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #331 [ref=1x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 79 , 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #332 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 79 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #333 [ref=6x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 238, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #334 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 367, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #335 [ref=4x]
+  { F(Vec)|F(Vex)                                     , 0                             , 502, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #336 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 227, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #337 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 230, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #338 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 233, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #339 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 236, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #340 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 239, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #341 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #342 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 242, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #343 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 369, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #344 [ref=1x]
+  { 0                                                 , 0                             , 371, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #345 [ref=1x]
+  { 0                                                 , 0                             , 373, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #346 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)                        , 245, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #347 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B64)                        , 245, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #348 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #349 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #350 [ref=5x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 191, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #351 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #352 [ref=2x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 191, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #353 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #354 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #355 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #356 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #357 [ref=13x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 503, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #358 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 504, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #359 [ref=1x]
+  { F(Evex)|F(Vec)                                    , 0                             , 505, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #360 [ref=6x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 248, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #361 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 506, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #362 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #363 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)                          , 200, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #364 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)                   , 200, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #365 [ref=2x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(K)                          , 251, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #366 [ref=4x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(B32)|X(K)                   , 251, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #367 [ref=2x]
+  { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex)                 , X(B64)|X(K)                   , 251, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #368 [ref=2x]
+  { F(Vec)|F(Vex)                                     , 0                             , 449, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #369 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 450, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #370 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 451, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #371 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 452, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #372 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)                   , 200, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #373 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #374 [ref=6x]
+  { F(Evex)|F(EvexCompat)|F(PreferEvex)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #375 [ref=4x]
+  { F(Vec)|F(Vex)                                     , 0                             , 195, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #376 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 192, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #377 [ref=2x]
+  { F(Vec)|F(Vex)                                     , 0                             , 171, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #378 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 85 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #379 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 85 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #380 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 175, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #381 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 453, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #382 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 454, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #383 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 507, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #384 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 508, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #385 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 509, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #386 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 510, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #387 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 511, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #388 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 353, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #389 [ref=12x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #390 [ref=8x]
+  { F(Evex)|F(Vec)                                    , 0                             , 512, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #391 [ref=4x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 254, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #392 [ref=6x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 257, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #393 [ref=9x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 260, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #394 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 212, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #395 [ref=4x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 263, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #396 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 206, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #397 [ref=6x]
+  { F(Vec)|F(Vex)                                     , 0                             , 159, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #398 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #399 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #400 [ref=3x]
+  { F(Vec)|F(Vex)                                     , 0                             , 375, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #401 [ref=4x]
+  { F(Evex)|F(Vec)|F(Vsib)                            , X(K)                          , 266, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #402 [ref=2x]
+  { F(Evex)|F(Vec)|F(Vsib)                            , X(K)                          , 377, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #403 [ref=2x]
+  { F(Evex)|F(Vec)|F(Vsib)                            , X(K)                          , 379, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #404 [ref=2x]
+  { F(Evex)|F(Vec)|F(Vsib)                            , X(K)                          , 269, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #405 [ref=2x]
+  { F(Vec)|F(Vex)                                     , 0                             , 381, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #406 [ref=8x]
+  { F(Evex)|F(Vec)                                    , X(K)                          , 272, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #407 [ref=5x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #408 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #409 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 91 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #410 [ref=3x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , 0                             , 221, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #411 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 91 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #412 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 91 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #413 [ref=3x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 97 , 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #414 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(K)|X(Z)                     , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #415 [ref=6x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #416 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #417 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)                   , 272, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #418 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)                   , 272, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #419 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 475, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #420 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #421 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B16)|X(K)|X(Z)              , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #422 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 476, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #423 [ref=2x]
+  { F(Vec)|F(Vex)                                     , 0                             , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #424 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 491, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #425 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(K)|X(Z)                     , 492, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #426 [ref=1x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 221, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #427 [ref=2x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 491, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #428 [ref=1x]
+  { F(EvexTransformable)|F(Vec)|F(Vex)                , 0                             , 492, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #429 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 191, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #430 [ref=1x]
+  { F(Evex)|F(Vec)                                    , X(B32)|X(K)|X(Z)              , 195, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #431 [ref=2x]
+  { F(Evex)|F(Vec)                                    , X(B64)|X(K)|X(Z)              , 195, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #432 [ref=2x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(K)|X(Z)              , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #433 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B32)|X(K)|X(Z)              , 194, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #434 [ref=1x]
+  { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex)               , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 209, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #435 [ref=1x]
+  { F(Vec)|F(Vex)                                     , 0                             , 108, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #436 [ref=2x]
+  { 0                                                 , 0                             , 23 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #437 [ref=2x]
+  { 0                                                 , 0                             , 61 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #438 [ref=2x]
+  { F(Lock)|F(XAcquire)|F(XRelease)                   , 0                             , 58 , 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #439 [ref=1x]
+  { 0                                                 , 0                             , 513, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #440 [ref=1x]
+  { F(Lock)|F(XAcquire)                               , 0                             , 58 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #441 [ref=1x]
+  { 0                                                 , 0                             , 514, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #442 [ref=6x]
+  { 0                                                 , 0                             , 515, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}  // #443 [ref=6x]
+};
+#undef SAME_REG_HINT
+#undef CONTROL_FLOW
+#undef X
+#undef F
+// ----------------------------------------------------------------------------
+// ${InstCommonTable:End}
+
+// x86::InstDB - AdditionalInfoTable
+// =================================
+
+// ${AdditionalInfoTable:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+#define EXT(VAL) uint32_t(CpuFeatures::X86::k##VAL)
+const InstDB::AdditionalInfo InstDB::_additionalInfoTable[] = {
+  { 0, 0, { 0 } }, // #0 [ref=148x]
+  { 0, 1, { 0 } }, // #1 [ref=32x]
+  { 0, 2, { 0 } }, // #2 [ref=2x]
+  { 0, 3, { EXT(ADX) } }, // #3 [ref=1x]
+  { 0, 0, { EXT(SSE2) } }, // #4 [ref=60x]
+  { 0, 0, { EXT(SSE) } }, // #5 [ref=41x]
+  { 0, 0, { EXT(SSE3) } }, // #6 [ref=12x]
+  { 0, 4, { EXT(ADX) } }, // #7 [ref=1x]
+  { 0, 0, { EXT(AESNI) } }, // #8 [ref=6x]
+  { 0, 1, { EXT(BMI) } }, // #9 [ref=6x]
+  { 0, 5, { 0 } }, // #10 [ref=5x]
+  { 0, 0, { EXT(TBM) } }, // #11 [ref=9x]
+  { 0, 0, { EXT(SSE4_1) } }, // #12 [ref=47x]
+  { 0, 0, { EXT(MPX) } }, // #13 [ref=7x]
+  { 0, 6, { 0 } }, // #14 [ref=4x]
+  { 0, 1, { EXT(BMI2) } }, // #15 [ref=1x]
+  { 0, 7, { EXT(SMAP) } }, // #16 [ref=2x]
+  { 0, 8, { 0 } }, // #17 [ref=2x]
+  { 0, 9, { 0 } }, // #18 [ref=2x]
+  { 0, 0, { EXT(CLDEMOTE) } }, // #19 [ref=1x]
+  { 0, 0, { EXT(CLFLUSH) } }, // #20 [ref=1x]
+  { 0, 0, { EXT(CLFLUSHOPT) } }, // #21 [ref=1x]
+  { 0, 0, { EXT(SVM) } }, // #22 [ref=6x]
+  { 0, 10, { 0 } }, // #23 [ref=2x]
+  { 0, 1, { EXT(CET_SS) } }, // #24 [ref=3x]
+  { 0, 0, { EXT(UINTR) } }, // #25 [ref=4x]
+  { 0, 0, { EXT(CLWB) } }, // #26 [ref=1x]
+  { 0, 0, { EXT(CLZERO) } }, // #27 [ref=1x]
+  { 0, 3, { 0 } }, // #28 [ref=1x]
+  { 0, 11, { EXT(CMOV) } }, // #29 [ref=6x]
+  { 0, 12, { EXT(CMOV) } }, // #30 [ref=8x]
+  { 0, 13, { EXT(CMOV) } }, // #31 [ref=6x]
+  { 0, 14, { EXT(CMOV) } }, // #32 [ref=4x]
+  { 0, 15, { EXT(CMOV) } }, // #33 [ref=4x]
+  { 0, 16, { EXT(CMOV) } }, // #34 [ref=2x]
+  { 0, 17, { EXT(CMOV) } }, // #35 [ref=6x]
+  { 0, 18, { EXT(CMOV) } }, // #36 [ref=2x]
+  { 0, 19, { 0 } }, // #37 [ref=2x]
+  { 0, 1, { EXT(I486) } }, // #38 [ref=2x]
+  { 0, 5, { EXT(CMPXCHG16B) } }, // #39 [ref=1x]
+  { 0, 5, { EXT(CMPXCHG8B) } }, // #40 [ref=1x]
+  { 0, 1, { EXT(SSE2) } }, // #41 [ref=2x]
+  { 0, 1, { EXT(SSE) } }, // #42 [ref=2x]
+  { 0, 0, { EXT(I486) } }, // #43 [ref=4x]
+  { 0, 0, { EXT(SSE4_2) } }, // #44 [ref=2x]
+  { 0, 20, { 0 } }, // #45 [ref=2x]
+  { 0, 0, { EXT(MMX) } }, // #46 [ref=1x]
+  { 0, 0, { EXT(CET_IBT) } }, // #47 [ref=2x]
+  { 0, 0, { EXT(ENQCMD) } }, // #48 [ref=2x]
+  { 0, 0, { EXT(SSE4A) } }, // #49 [ref=4x]
+  { 0, 21, { 0 } }, // #50 [ref=4x]
+  { 0, 0, { EXT(3DNOW) } }, // #51 [ref=21x]
+  { 0, 0, { EXT(FXSR) } }, // #52 [ref=4x]
+  { 0, 0, { EXT(SMX) } }, // #53 [ref=1x]
+  { 0, 0, { EXT(GFNI) } }, // #54 [ref=3x]
+  { 0, 0, { EXT(HRESET) } }, // #55 [ref=1x]
+  { 0, 0, { EXT(CET_SS) } }, // #56 [ref=9x]
+  { 0, 16, { 0 } }, // #57 [ref=5x]
+  { 0, 0, { EXT(VMX) } }, // #58 [ref=12x]
+  { 0, 11, { 0 } }, // #59 [ref=8x]
+  { 0, 12, { 0 } }, // #60 [ref=12x]
+  { 0, 13, { 0 } }, // #61 [ref=10x]
+  { 0, 14, { 0 } }, // #62 [ref=8x]
+  { 0, 15, { 0 } }, // #63 [ref=8x]
+  { 0, 17, { 0 } }, // #64 [ref=8x]
+  { 0, 18, { 0 } }, // #65 [ref=4x]
+  { 0, 0, { EXT(AVX512_DQ) } }, // #66 [ref=22x]
+  { 0, 0, { EXT(AVX512_BW) } }, // #67 [ref=20x]
+  { 0, 0, { EXT(AVX512_F) } }, // #68 [ref=36x]
+  { 1, 0, { EXT(AVX512_DQ) } }, // #69 [ref=1x]
+  { 1, 0, { EXT(AVX512_BW) } }, // #70 [ref=2x]
+  { 1, 0, { EXT(AVX512_F) } }, // #71 [ref=1x]
+  { 0, 1, { EXT(AVX512_DQ) } }, // #72 [ref=3x]
+  { 0, 1, { EXT(AVX512_BW) } }, // #73 [ref=4x]
+  { 0, 1, { EXT(AVX512_F) } }, // #74 [ref=1x]
+  { 0, 22, { EXT(LAHFSAHF) } }, // #75 [ref=1x]
+  { 0, 0, { EXT(AMX_TILE) } }, // #76 [ref=7x]
+  { 0, 0, { EXT(LWP) } }, // #77 [ref=4x]
+  { 0, 23, { 0 } }, // #78 [ref=3x]
+  { 0, 1, { EXT(LZCNT) } }, // #79 [ref=1x]
+  { 0, 0, { EXT(MMX2) } }, // #80 [ref=8x]
+  { 0, 1, { EXT(MCOMMIT) } }, // #81 [ref=1x]
+  { 0, 0, { EXT(MONITOR) } }, // #82 [ref=2x]
+  { 0, 0, { EXT(MONITORX) } }, // #83 [ref=2x]
+  { 1, 0, { 0 } }, // #84 [ref=1x]
+  { 1, 0, { EXT(SSE2) } }, // #85 [ref=5x]
+  { 1, 0, { EXT(SSE) } }, // #86 [ref=3x]
+  { 0, 0, { EXT(MOVBE) } }, // #87 [ref=1x]
+  { 0, 0, { EXT(MMX), EXT(SSE2) } }, // #88 [ref=45x]
+  { 0, 0, { EXT(MOVDIR64B) } }, // #89 [ref=1x]
+  { 0, 0, { EXT(MOVDIRI) } }, // #90 [ref=1x]
+  { 1, 0, { EXT(MMX), EXT(SSE2) } }, // #91 [ref=1x]
+  { 0, 0, { EXT(BMI2) } }, // #92 [ref=7x]
+  { 0, 0, { EXT(SSSE3) } }, // #93 [ref=15x]
+  { 0, 0, { EXT(MMX2), EXT(SSE2) } }, // #94 [ref=10x]
+  { 0, 0, { EXT(PCLMULQDQ) } }, // #95 [ref=1x]
+  { 0, 1, { EXT(SSE4_2) } }, // #96 [ref=4x]
+  { 0, 0, { EXT(PCONFIG) } }, // #97 [ref=1x]
+  { 0, 0, { EXT(MMX2), EXT(SSE2), EXT(SSE4_1) } }, // #98 [ref=1x]
+  { 0, 0, { EXT(3DNOW2) } }, // #99 [ref=5x]
+  { 0, 0, { EXT(GEODE) } }, // #100 [ref=2x]
+  { 0, 1, { EXT(POPCNT) } }, // #101 [ref=1x]
+  { 0, 24, { 0 } }, // #102 [ref=3x]
+  { 0, 1, { EXT(PREFETCHW) } }, // #103 [ref=1x]
+  { 0, 1, { EXT(PREFETCHWT1) } }, // #104 [ref=1x]
+  { 0, 20, { EXT(SNP) } }, // #105 [ref=3x]
+  { 0, 1, { EXT(SSE4_1) } }, // #106 [ref=1x]
+  { 0, 0, { EXT(PTWRITE) } }, // #107 [ref=1x]
+  { 0, 25, { 0 } }, // #108 [ref=3x]
+  { 0, 1, { EXT(SNP) } }, // #109 [ref=1x]
+  { 0, 26, { 0 } }, // #110 [ref=2x]
+  { 0, 0, { EXT(FSGSBASE) } }, // #111 [ref=4x]
+  { 0, 0, { EXT(MSR) } }, // #112 [ref=2x]
+  { 0, 0, { EXT(RDPID) } }, // #113 [ref=1x]
+  { 0, 0, { EXT(OSPKE) } }, // #114 [ref=1x]
+  { 0, 0, { EXT(RDPRU) } }, // #115 [ref=1x]
+  { 0, 1, { EXT(RDRAND) } }, // #116 [ref=1x]
+  { 0, 1, { EXT(RDSEED) } }, // #117 [ref=1x]
+  { 0, 0, { EXT(RDTSC) } }, // #118 [ref=1x]
+  { 0, 0, { EXT(RDTSCP) } }, // #119 [ref=1x]
+  { 0, 27, { 0 } }, // #120 [ref=2x]
+  { 0, 28, { EXT(LAHFSAHF) } }, // #121 [ref=1x]
+  { 0, 0, { EXT(SERIALIZE) } }, // #122 [ref=1x]
+  { 0, 0, { EXT(SHA) } }, // #123 [ref=7x]
+  { 0, 0, { EXT(SKINIT) } }, // #124 [ref=2x]
+  { 0, 0, { EXT(AMX_BF16) } }, // #125 [ref=1x]
+  { 0, 0, { EXT(AMX_INT8) } }, // #126 [ref=4x]
+  { 0, 1, { EXT(UINTR) } }, // #127 [ref=1x]
+  { 0, 1, { EXT(WAITPKG) } }, // #128 [ref=2x]
+  { 0, 0, { EXT(WAITPKG) } }, // #129 [ref=1x]
+  { 0, 0, { EXT(AVX512_4FMAPS) } }, // #130 [ref=4x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL) } }, // #131 [ref=42x]
+  { 0, 0, { EXT(AVX512_FP16), EXT(AVX512_VL) } }, // #132 [ref=63x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_F) } }, // #133 [ref=29x]
+  { 0, 0, { EXT(AVX512_FP16) } }, // #134 [ref=43x]
+  { 0, 0, { EXT(AVX) } }, // #135 [ref=35x]
+  { 0, 0, { EXT(AESNI), EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(VAES) } }, // #136 [ref=4x]
+  { 0, 0, { EXT(AESNI), EXT(AVX) } }, // #137 [ref=2x]
+  { 0, 0, { EXT(AVX512_F), EXT(AVX512_VL) } }, // #138 [ref=108x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_DQ), EXT(AVX512_VL) } }, // #139 [ref=8x]
+  { 0, 0, { EXT(AVX512_DQ), EXT(AVX512_VL) } }, // #140 [ref=30x]
+  { 0, 0, { EXT(AVX2) } }, // #141 [ref=7x]
+  { 0, 0, { EXT(AVX), EXT(AVX2), EXT(AVX512_F), EXT(AVX512_VL) } }, // #142 [ref=39x]
+  { 0, 1, { EXT(AVX), EXT(AVX512_F) } }, // #143 [ref=4x]
+  { 0, 0, { EXT(AVX512_BF16), EXT(AVX512_VL) } }, // #144 [ref=3x]
+  { 0, 0, { EXT(AVX512_F), EXT(AVX512_VL), EXT(F16C) } }, // #145 [ref=2x]
+  { 0, 0, { EXT(AVX512_BW), EXT(AVX512_VL) } }, // #146 [ref=24x]
+  { 0, 0, { EXT(AVX512_ERI) } }, // #147 [ref=10x]
+  { 0, 0, { EXT(AVX512_F), EXT(AVX512_VL), EXT(FMA) } }, // #148 [ref=36x]
+  { 0, 0, { EXT(AVX512_F), EXT(FMA) } }, // #149 [ref=24x]
+  { 0, 0, { EXT(FMA4) } }, // #150 [ref=20x]
+  { 0, 0, { EXT(XOP) } }, // #151 [ref=55x]
+  { 0, 0, { EXT(AVX2), EXT(AVX512_F), EXT(AVX512_VL) } }, // #152 [ref=19x]
+  { 0, 0, { EXT(AVX512_PFI) } }, // #153 [ref=16x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(GFNI) } }, // #154 [ref=3x]
+  { 1, 0, { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL) } }, // #155 [ref=4x]
+  { 1, 0, { EXT(AVX) } }, // #156 [ref=2x]
+  { 1, 0, { EXT(AVX512_F), EXT(AVX512_VL) } }, // #157 [ref=4x]
+  { 1, 0, { EXT(AVX512_BW), EXT(AVX512_VL) } }, // #158 [ref=2x]
+  { 1, 0, { EXT(AVX), EXT(AVX512_F) } }, // #159 [ref=3x]
+  { 0, 0, { EXT(AVX), EXT(AVX2) } }, // #160 [ref=17x]
+  { 0, 0, { EXT(AVX512_VP2INTERSECT) } }, // #161 [ref=2x]
+  { 0, 0, { EXT(AVX512_4VNNIW) } }, // #162 [ref=2x]
+  { 0, 0, { EXT(AVX), EXT(AVX2), EXT(AVX512_BW), EXT(AVX512_VL) } }, // #163 [ref=54x]
+  { 0, 0, { EXT(AVX2), EXT(AVX512_BW), EXT(AVX512_VL) } }, // #164 [ref=2x]
+  { 0, 0, { EXT(AVX512_CDI), EXT(AVX512_VL) } }, // #165 [ref=6x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(PCLMULQDQ), EXT(VPCLMULQDQ) } }, // #166 [ref=1x]
+  { 0, 1, { EXT(AVX) } }, // #167 [ref=7x]
+  { 0, 0, { EXT(AVX512_VBMI2), EXT(AVX512_VL) } }, // #168 [ref=16x]
+  { 0, 0, { EXT(AVX512_VL), EXT(AVX512_VNNI), EXT(AVX_VNNI) } }, // #169 [ref=4x]
+  { 0, 0, { EXT(AVX512_VBMI), EXT(AVX512_VL) } }, // #170 [ref=4x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_BW) } }, // #171 [ref=4x]
+  { 0, 0, { EXT(AVX), EXT(AVX512_DQ) } }, // #172 [ref=4x]
+  { 0, 0, { EXT(AVX512_IFMA), EXT(AVX512_VL) } }, // #173 [ref=2x]
+  { 0, 0, { EXT(AVX512_BITALG), EXT(AVX512_VL) } }, // #174 [ref=3x]
+  { 0, 0, { EXT(AVX512_VL), EXT(AVX512_VPOPCNTDQ) } }, // #175 [ref=2x]
+  { 0, 0, { EXT(WBNOINVD) } }, // #176 [ref=1x]
+  { 0, 0, { EXT(RTM) } }, // #177 [ref=3x]
+  { 0, 0, { EXT(XSAVE) } }, // #178 [ref=6x]
+  { 0, 0, { EXT(TSXLDTRK) } }, // #179 [ref=2x]
+  { 0, 0, { EXT(XSAVES) } }, // #180 [ref=4x]
+  { 0, 0, { EXT(XSAVEC) } }, // #181 [ref=2x]
+  { 0, 0, { EXT(XSAVEOPT) } }, // #182 [ref=2x]
+  { 0, 1, { EXT(TSX) } }  // #183 [ref=1x]
+};
+#undef EXT
+
+#define FLAG(VAL) uint32_t(CpuRWFlags::kX86_##VAL)
+const InstDB::RWFlagsInfoTable InstDB::_rwFlagsInfoTable[] = {
+  { 0, 0 }, // #0 [ref=1429x]
+  { 0, FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #1 [ref=84x]
+  { FLAG(CF), FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #2 [ref=2x]
+  { FLAG(CF), FLAG(CF) }, // #3 [ref=2x]
+  { FLAG(OF), FLAG(OF) }, // #4 [ref=1x]
+  { 0, FLAG(ZF) }, // #5 [ref=7x]
+  { 0, FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) }, // #6 [ref=4x]
+  { 0, FLAG(AC) }, // #7 [ref=2x]
+  { 0, FLAG(CF) }, // #8 [ref=2x]
+  { 0, FLAG(DF) }, // #9 [ref=2x]
+  { 0, FLAG(IF) }, // #10 [ref=2x]
+  { FLAG(CF) | FLAG(ZF), 0 }, // #11 [ref=14x]
+  { FLAG(CF), 0 }, // #12 [ref=20x]
+  { FLAG(ZF), 0 }, // #13 [ref=16x]
+  { FLAG(OF) | FLAG(SF) | FLAG(ZF), 0 }, // #14 [ref=12x]
+  { FLAG(OF) | FLAG(SF), 0 }, // #15 [ref=12x]
+  { FLAG(OF), 0 }, // #16 [ref=7x]
+  { FLAG(PF), 0 }, // #17 [ref=14x]
+  { FLAG(SF), 0 }, // #18 [ref=6x]
+  { FLAG(DF), FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #19 [ref=2x]
+  { 0, FLAG(AF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #20 [ref=5x]
+  { 0, FLAG(CF) | FLAG(PF) | FLAG(ZF) }, // #21 [ref=4x]
+  { FLAG(AF) | FLAG(CF) | FLAG(PF) | FLAG(SF) | FLAG(ZF), 0 }, // #22 [ref=1x]
+  { FLAG(DF), 0 }, // #23 [ref=3x]
+  { 0, FLAG(AF) | FLAG(CF) | FLAG(DF) | FLAG(IF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #24 [ref=3x]
+  { FLAG(AF) | FLAG(CF) | FLAG(DF) | FLAG(IF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF), 0 }, // #25 [ref=3x]
+  { FLAG(CF) | FLAG(OF), FLAG(CF) | FLAG(OF) }, // #26 [ref=2x]
+  { 0, FLAG(CF) | FLAG(OF) }, // #27 [ref=2x]
+  { 0, FLAG(AF) | FLAG(CF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }  // #28 [ref=1x]
+};
+#undef FLAG
+
+#define FLAG(VAL) uint32_t(InstRWFlags::k##VAL)
+const InstRWFlags InstDB::_instFlagsTable[] = {
+  InstRWFlags(FLAG(None)), // #0 [ref=1634x]
+  InstRWFlags(FLAG(MovOp))  // #1 [ref=29x]
+};
+#undef FLAG
+// ----------------------------------------------------------------------------
+// ${AdditionalInfoTable:End}
+
+// Inst - NameData
+// ===============
+
+#ifndef ASMJIT_NO_TEXT
+// ${NameData:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const char InstDB::_nameData[] =
+  "\0" "aaa\0" "aad\0" "aam\0" "aas\0" "adc\0" "adcx\0" "adox\0" "arpl\0" "bextr\0" "blcfill\0" "blci\0" "blcic\0"
+  "blcmsk\0" "blcs\0" "blsfill\0" "blsi\0" "blsic\0" "blsmsk\0" "blsr\0" "bndcl\0" "bndcn\0" "bndcu\0" "bndldx\0"
+  "bndmk\0" "bndmov\0" "bndstx\0" "bound\0" "bsf\0" "bsr\0" "bswap\0" "bt\0" "btc\0" "btr\0" "bts\0" "bzhi\0" "cbw\0"
+  "cdq\0" "cdqe\0" "clac\0" "clc\0" "cld\0" "cldemote\0" "clflush\0" "clflushopt\0" "clgi\0" "cli\0" "clrssbsy\0"
+  "clts\0" "clui\0" "clwb\0" "clzero\0" "cmc\0" "cmova\0" "cmovae\0" "cmovc\0" "cmovg\0" "cmovge\0" "cmovl\0"
+  "cmovle\0" "cmovna\0" "cmovnae\0" "cmovnc\0" "cmovng\0" "cmovnge\0" "cmovnl\0" "cmovnle\0" "cmovno\0" "cmovnp\0"
+  "cmovns\0" "cmovnz\0" "cmovo\0" "cmovp\0" "cmovpe\0" "cmovpo\0" "cmovs\0" "cmovz\0" "cmp\0" "cmps\0" "cmpxchg\0"
+  "cmpxchg16b\0" "cmpxchg8b\0" "cpuid\0" "cqo\0" "crc32\0" "cvtpd2pi\0" "cvtpi2pd\0" "cvtpi2ps\0" "cvtps2pi\0"
+  "cvttpd2pi\0" "cvttps2pi\0" "cwd\0" "cwde\0" "daa\0" "das\0" "endbr32\0" "endbr64\0" "enqcmd\0" "enqcmds\0" "f2xm1\0"
+  "fabs\0" "faddp\0" "fbld\0" "fbstp\0" "fchs\0" "fclex\0" "fcmovb\0" "fcmovbe\0" "fcmove\0" "fcmovnb\0" "fcmovnbe\0"
+  "fcmovne\0" "fcmovnu\0" "fcmovu\0" "fcom\0" "fcomi\0" "fcomip\0" "fcomp\0" "fcompp\0" "fcos\0" "fdecstp\0" "fdiv\0"
+  "fdivp\0" "fdivr\0" "fdivrp\0" "femms\0" "ffree\0" "fiadd\0" "ficom\0" "ficomp\0" "fidiv\0" "fidivr\0" "fild\0"
+  "fimul\0" "fincstp\0" "finit\0" "fist\0" "fistp\0" "fisttp\0" "fisub\0" "fisubr\0" "fld\0" "fld1\0" "fldcw\0"
+  "fldenv\0" "fldl2e\0" "fldl2t\0" "fldlg2\0" "fldln2\0" "fldpi\0" "fldz\0" "fmulp\0" "fnclex\0" "fninit\0" "fnop\0"
+  "fnsave\0" "fnstcw\0" "fnstenv\0" "fnstsw\0" "fpatan\0" "fprem\0" "fprem1\0" "fptan\0" "frndint\0" "frstor\0"
+  "fsave\0" "fscale\0" "fsin\0" "fsincos\0" "fsqrt\0" "fst\0" "fstcw\0" "fstenv\0" "fstp\0" "fstsw\0" "fsubp\0"
+  "fsubrp\0" "ftst\0" "fucom\0" "fucomi\0" "fucomip\0" "fucomp\0" "fucompp\0" "fwait\0" "fxam\0" "fxch\0" "fxrstor\0"
+  "fxrstor64\0" "fxsave\0" "fxsave64\0" "fxtract\0" "fyl2x\0" "fyl2xp1\0" "getsec\0" "hlt\0" "hreset\0" "inc\0"
+  "incsspd\0" "incsspq\0" "insertq\0" "int3\0" "into\0" "invept\0" "invlpg\0" "invlpga\0" "invpcid\0" "invvpid\0"
+  "iretd\0" "iretq\0" "ja\0" "jae\0" "jb\0" "jbe\0" "jc\0" "je\0" "jecxz\0" "jg\0" "jge\0" "jl\0" "jle\0" "jna\0"
+  "jnae\0" "jnb\0" "jnbe\0" "jnc\0" "jne\0" "jng\0" "jnge\0" "jnl\0" "jnle\0" "jno\0" "jnp\0" "jns\0" "jnz\0" "jo\0"
+  "jp\0" "jpe\0" "jpo\0" "js\0" "jz\0" "kaddb\0" "kaddd\0" "kaddq\0" "kaddw\0" "kandb\0" "kandd\0" "kandnb\0"
+  "kandnd\0" "kandnq\0" "kandnw\0" "kandq\0" "kandw\0" "kmovb\0" "kmovw\0" "knotb\0" "knotd\0" "knotq\0" "knotw\0"
+  "korb\0" "kord\0" "korq\0" "kortestb\0" "kortestd\0" "kortestq\0" "kortestw\0" "korw\0" "kshiftlb\0" "kshiftld\0"
+  "kshiftlq\0" "kshiftlw\0" "kshiftrb\0" "kshiftrd\0" "kshiftrq\0" "kshiftrw\0" "ktestb\0" "ktestd\0" "ktestq\0"
+  "ktestw\0" "kunpckbw\0" "kunpckdq\0" "kunpckwd\0" "kxnorb\0" "kxnord\0" "kxnorq\0" "kxnorw\0" "kxorb\0" "kxord\0"
+  "kxorq\0" "kxorw\0" "lahf\0" "lar\0" "lcall\0" "lds\0" "ldtilecfg\0" "lea\0" "leave\0" "les\0" "lfence\0" "lfs\0"
+  "lgdt\0" "lgs\0" "lidt\0" "ljmp\0" "lldt\0" "llwpcb\0" "lmsw\0" "lods\0" "loop\0" "loope\0" "loopne\0" "lsl\0"
+  "ltr\0" "lwpins\0" "lwpval\0" "lzcnt\0" "mcommit\0" "mfence\0" "monitorx\0" "movabs\0" "movdir64b\0" "movdiri\0"
+  "movdq2q\0" "movnti\0" "movntq\0" "movntsd\0" "movntss\0" "movq2dq\0" "movsx\0" "movsxd\0" "movzx\0" "mulx\0"
+  "mwaitx\0" "neg\0" "not\0" "out\0" "outs\0" "pavgusb\0" "pconfig\0" "pdep\0" "pext\0" "pf2id\0" "pf2iw\0" "pfacc\0"
+  "pfadd\0" "pfcmpeq\0" "pfcmpge\0" "pfcmpgt\0" "pfmax\0" "pfmin\0" "pfmul\0" "pfnacc\0" "pfpnacc\0" "pfrcp\0"
+  "pfrcpit1\0" "pfrcpit2\0" "pfrcpv\0" "pfrsqit1\0" "pfrsqrt\0" "pfrsqrtv\0" "pfsub\0" "pfsubr\0" "pi2fd\0" "pi2fw\0"
+  "pmulhrw\0" "pop\0" "popa\0" "popad\0" "popcnt\0" "popf\0" "popfd\0" "popfq\0" "prefetch\0" "prefetchnta\0"
+  "prefetcht0\0" "prefetcht1\0" "prefetcht2\0" "prefetchw\0" "prefetchwt1\0" "pshufw\0" "psmash\0" "pswapd\0"
+  "ptwrite\0" "push\0" "pusha\0" "pushad\0" "pushf\0" "pushfd\0" "pushfq\0" "pvalidate\0" "rcl\0" "rcr\0" "rdfsbase\0"
+  "rdgsbase\0" "rdmsr\0" "rdpid\0" "rdpkru\0" "rdpmc\0" "rdpru\0" "rdrand\0" "rdseed\0" "rdsspd\0" "rdsspq\0" "rdtsc\0"
+  "rdtscp\0" "retf\0" "rmpadjust\0" "rmpupdate\0" "rol\0" "ror\0" "rorx\0" "rsm\0" "rstorssp\0" "sahf\0" "sal\0"
+  "sar\0" "sarx\0" "saveprevssp\0" "sbb\0" "scas\0" "senduipi\0" "serialize\0" "seta\0" "setae\0" "setb\0" "setbe\0"
+  "setc\0" "sete\0" "setg\0" "setge\0" "setl\0" "setle\0" "setna\0" "setnae\0" "setnb\0" "setnbe\0" "setnc\0" "setne\0"
+  "setng\0" "setnge\0" "setnl\0" "setnle\0" "setno\0" "setnp\0" "setns\0" "setnz\0" "seto\0" "setp\0" "setpe\0"
+  "setpo\0" "sets\0" "setssbsy\0" "setz\0" "sfence\0" "sgdt\0" "sha1msg1\0" "sha1msg2\0" "sha1nexte\0" "sha1rnds4\0"
+  "sha256msg1\0" "sha256msg2\0" "sha256rnds2\0" "shl\0" "shlx\0" "shr\0" "shrd\0" "shrx\0" "sidt\0" "skinit\0" "sldt\0"
+  "slwpcb\0" "smsw\0" "stac\0" "stc\0" "stgi\0" "sti\0" "stos\0" "str\0" "sttilecfg\0" "swapgs\0" "syscall\0"
+  "sysenter\0" "sysexit\0" "sysexitq\0" "sysret\0" "sysretq\0" "t1mskc\0" "tdpbf16ps\0" "tdpbssd\0" "tdpbsud\0"
+  "tdpbusd\0" "tdpbuud\0" "testui\0" "tileloadd\0" "tileloaddt1\0" "tilerelease\0" "tilestored\0" "tilezero\0"
+  "tpause\0" "tzcnt\0" "tzmsk\0" "ud0\0" "ud1\0" "ud2\0" "uiret\0" "umonitor\0" "umwait\0" "v4fmaddps\0" "v4fmaddss\0"
+  "v4fnmaddps\0" "v4fnmaddss\0" "vaddpd\0" "vaddph\0" "vaddps\0" "vaddsd\0" "vaddsh\0" "vaddss\0" "vaddsubpd\0"
+  "vaddsubps\0" "vaesdec\0" "vaesdeclast\0" "vaesenc\0" "vaesenclast\0" "vaesimc\0" "vaeskeygenassist\0" "valignd\0"
+  "valignq\0" "vandnpd\0" "vandnps\0" "vandpd\0" "vandps\0" "vblendmpd\0" "vblendmps\0" "vblendpd\0" "vblendps\0"
+  "vblendvpd\0" "vblendvps\0" "vbroadcastf128\0" "vbroadcastf32x2\0" "vbroadcastf32x4\0" "vbroadcastf32x8\0"
+  "vbroadcastf64x2\0" "vbroadcastf64x4\0" "vbroadcasti128\0" "vbroadcasti32x2\0" "vbroadcasti32x4\0"
+  "vbroadcasti32x8\0" "vbroadcasti64x2\0" "vbroadcasti64x4\0" "vbroadcastsd\0" "vbroadcastss\0" "vcmppd\0" "vcmpph\0"
+  "vcmpps\0" "vcmpsd\0" "vcmpsh\0" "vcmpss\0" "vcomisd\0" "vcomish\0" "vcomiss\0" "vcompresspd\0" "vcompressps\0"
+  "vcvtdq2pd\0" "vcvtdq2ph\0" "vcvtdq2ps\0" "vcvtne2ps2bf16\0" "vcvtneps2bf16\0" "vcvtpd2dq\0" "vcvtpd2ph\0"
+  "vcvtpd2ps\0" "vcvtpd2qq\0" "vcvtpd2udq\0" "vcvtpd2uqq\0" "vcvtph2dq\0" "vcvtph2pd\0" "vcvtph2ps\0" "vcvtph2psx\0"
+  "vcvtph2qq\0" "vcvtph2udq\0" "vcvtph2uqq\0" "vcvtph2uw\0" "vcvtph2w\0" "vcvtps2dq\0" "vcvtps2pd\0" "vcvtps2ph\0"
+  "vcvtps2phx\0" "vcvtps2qq\0" "vcvtps2udq\0" "vcvtps2uqq\0" "vcvtqq2pd\0" "vcvtqq2ph\0" "vcvtqq2ps\0" "vcvtsd2sh\0"
+  "vcvtsd2si\0" "vcvtsd2ss\0" "vcvtsd2usi\0" "vcvtsh2sd\0" "vcvtsh2si\0" "vcvtsh2ss\0" "vcvtsh2usi\0" "vcvtsi2sd\0"
+  "vcvtsi2sh\0" "vcvtsi2ss\0" "vcvtss2sd\0" "vcvtss2sh\0" "vcvtss2si\0" "vcvtss2usi\0" "vcvttpd2dq\0" "vcvttpd2qq\0"
+  "vcvttpd2udq\0" "vcvttpd2uqq\0" "vcvttph2dq\0" "vcvttph2qq\0" "vcvttph2udq\0" "vcvttph2uqq\0" "vcvttph2uw\0"
+  "vcvttph2w\0" "vcvttps2dq\0" "vcvttps2qq\0" "vcvttps2udq\0" "vcvttps2uqq\0" "vcvttsd2si\0" "vcvttsd2usi\0"
+  "vcvttsh2si\0" "vcvttsh2usi\0" "vcvttss2si\0" "vcvttss2usi\0" "vcvtudq2pd\0" "vcvtudq2ph\0" "vcvtudq2ps\0"
+  "vcvtuqq2pd\0" "vcvtuqq2ph\0" "vcvtuqq2ps\0" "vcvtusi2sd\0" "vcvtusi2sh\0" "vcvtusi2ss\0" "vcvtuw2ph\0" "vcvtw2ph\0"
+  "vdbpsadbw\0" "vdivpd\0" "vdivph\0" "vdivps\0" "vdivsd\0" "vdivsh\0" "vdivss\0" "vdpbf16ps\0" "vdppd\0" "vdpps\0"
+  "verr\0" "verw\0" "vexp2pd\0" "vexp2ps\0" "vexpandpd\0" "vexpandps\0" "vextractf128\0" "vextractf32x4\0"
+  "vextractf32x8\0" "vextractf64x2\0" "vextractf64x4\0" "vextracti128\0" "vextracti32x4\0" "vextracti32x8\0"
+  "vextracti64x2\0" "vextracti64x4\0" "vextractps\0" "vfcmaddcph\0" "vfcmaddcsh\0" "vfcmulcph\0" "vfcmulcsh\0"
+  "vfixupimmpd\0" "vfixupimmps\0" "vfixupimmsd\0" "vfixupimmss\0" "vfmadd132pd\0" "vfmadd132ph\0" "vfmadd132ps\0"
+  "vfmadd132sd\0" "vfmadd132sh\0" "vfmadd132ss\0" "vfmadd213pd\0" "vfmadd213ph\0" "vfmadd213ps\0" "vfmadd213sd\0"
+  "vfmadd213sh\0" "vfmadd213ss\0" "vfmadd231pd\0" "vfmadd231ph\0" "vfmadd231ps\0" "vfmadd231sd\0" "vfmadd231sh\0"
+  "vfmadd231ss\0" "vfmaddcph\0" "vfmaddcsh\0" "vfmaddpd\0" "vfmaddps\0" "vfmaddsd\0" "vfmaddss\0" "vfmaddsub132pd\0"
+  "vfmaddsub132ph\0" "vfmaddsub132ps\0" "vfmaddsub213pd\0" "vfmaddsub213ph\0" "vfmaddsub213ps\0" "vfmaddsub231pd\0"
+  "vfmaddsub231ph\0" "vfmaddsub231ps\0" "vfmaddsubpd\0" "vfmaddsubps\0" "vfmsub132pd\0" "vfmsub132ph\0" "vfmsub132ps\0"
+  "vfmsub132sd\0" "vfmsub132sh\0" "vfmsub132ss\0" "vfmsub213pd\0" "vfmsub213ph\0" "vfmsub213ps\0" "vfmsub213sd\0"
+  "vfmsub213sh\0" "vfmsub213ss\0" "vfmsub231pd\0" "vfmsub231ph\0" "vfmsub231ps\0" "vfmsub231sd\0" "vfmsub231sh\0"
+  "vfmsub231ss\0" "vfmsubadd132pd\0" "vfmsubadd132ph\0" "vfmsubadd132ps\0" "vfmsubadd213pd\0" "vfmsubadd213ph\0"
+  "vfmsubadd213ps\0" "vfmsubadd231pd\0" "vfmsubadd231ph\0" "vfmsubadd231ps\0" "vfmsubaddpd\0" "vfmsubaddps\0"
+  "vfmsubpd\0" "vfmsubps\0" "vfmsubsd\0" "vfmsubss\0" "vfmulcph\0" "vfmulcsh\0" "vfnmadd132pd\0" "vfnmadd132ph\0"
+  "vfnmadd132ps\0" "vfnmadd132sd\0" "vfnmadd132sh\0" "vfnmadd132ss\0" "vfnmadd213pd\0" "vfnmadd213ph\0"
+  "vfnmadd213ps\0" "vfnmadd213sd\0" "vfnmadd213sh\0" "vfnmadd213ss\0" "vfnmadd231pd\0" "vfnmadd231ph\0"
+  "vfnmadd231ps\0" "vfnmadd231sd\0" "vfnmadd231sh\0" "vfnmadd231ss\0" "vfnmaddpd\0" "vfnmaddps\0" "vfnmaddsd\0"
+  "vfnmaddss\0" "vfnmsub132pd\0" "vfnmsub132ph\0" "vfnmsub132ps\0" "vfnmsub132sd\0" "vfnmsub132sh\0" "vfnmsub132ss\0"
+  "vfnmsub213pd\0" "vfnmsub213ph\0" "vfnmsub213ps\0" "vfnmsub213sd\0" "vfnmsub213sh\0" "vfnmsub213ss\0"
+  "vfnmsub231pd\0" "vfnmsub231ph\0" "vfnmsub231ps\0" "vfnmsub231sd\0" "vfnmsub231sh\0" "vfnmsub231ss\0" "vfnmsubpd\0"
+  "vfnmsubps\0" "vfnmsubsd\0" "vfnmsubss\0" "vfpclasspd\0" "vfpclassph\0" "vfpclassps\0" "vfpclasssd\0" "vfpclasssh\0"
+  "vfpclassss\0" "vfrczpd\0" "vfrczps\0" "vfrczsd\0" "vfrczss\0" "vgatherdpd\0" "vgatherdps\0" "vgatherpf0dpd\0"
+  "vgatherpf0dps\0" "vgatherpf0qpd\0" "vgatherpf0qps\0" "vgatherpf1dpd\0" "vgatherpf1dps\0" "vgatherpf1qpd\0"
+  "vgatherpf1qps\0" "vgatherqpd\0" "vgatherqps\0" "vgetexppd\0" "vgetexpph\0" "vgetexpps\0" "vgetexpsd\0" "vgetexpsh\0"
+  "vgetexpss\0" "vgetmantpd\0" "vgetmantph\0" "vgetmantps\0" "vgetmantsd\0" "vgetmantsh\0" "vgetmantss\0"
+  "vgf2p8affineinvqb\0" "vgf2p8affineqb\0" "vgf2p8mulb\0" "vhaddpd\0" "vhaddps\0" "vhsubpd\0" "vhsubps\0"
+  "vinsertf128\0" "vinsertf32x4\0" "vinsertf32x8\0" "vinsertf64x2\0" "vinsertf64x4\0" "vinserti128\0" "vinserti32x4\0"
+  "vinserti32x8\0" "vinserti64x2\0" "vinserti64x4\0" "vinsertps\0" "vlddqu\0" "vldmxcsr\0" "vmaskmovdqu\0"
+  "vmaskmovpd\0" "vmaskmovps\0" "vmaxpd\0" "vmaxph\0" "vmaxps\0" "vmaxsd\0" "vmaxsh\0" "vmaxss\0" "vmcall\0"
+  "vmclear\0" "vmfunc\0" "vminpd\0" "vminph\0" "vminps\0" "vminsd\0" "vminsh\0" "vminss\0" "vmlaunch\0" "vmload\0"
+  "vmmcall\0" "vmovapd\0" "vmovaps\0" "vmovd\0" "vmovddup\0" "vmovdqa\0" "vmovdqa32\0" "vmovdqa64\0" "vmovdqu\0"
+  "vmovdqu16\0" "vmovdqu32\0" "vmovdqu64\0" "vmovdqu8\0" "vmovhlps\0" "vmovhpd\0" "vmovhps\0" "vmovlhps\0" "vmovlpd\0"
+  "vmovlps\0" "vmovmskpd\0" "vmovmskps\0" "vmovntdq\0" "vmovntdqa\0" "vmovntpd\0" "vmovntps\0" "vmovq\0" "vmovsd\0"
+  "vmovsh\0" "vmovshdup\0" "vmovsldup\0" "vmovss\0" "vmovupd\0" "vmovups\0" "vmovw\0" "vmpsadbw\0" "vmptrld\0"
+  "vmptrst\0" "vmread\0" "vmresume\0" "vmrun\0" "vmsave\0" "vmulpd\0" "vmulph\0" "vmulps\0" "vmulsd\0" "vmulsh\0"
+  "vmulss\0" "vmwrite\0" "vmxon\0" "vorpd\0" "vorps\0" "vp2intersectd\0" "vp2intersectq\0" "vp4dpwssd\0" "vp4dpwssds\0"
+  "vpabsb\0" "vpabsd\0" "vpabsq\0" "vpabsw\0" "vpackssdw\0" "vpacksswb\0" "vpackusdw\0" "vpackuswb\0" "vpaddb\0"
+  "vpaddd\0" "vpaddq\0" "vpaddsb\0" "vpaddsw\0" "vpaddusb\0" "vpaddusw\0" "vpaddw\0" "vpalignr\0" "vpand\0" "vpandd\0"
+  "vpandn\0" "vpandnd\0" "vpandnq\0" "vpandq\0" "vpavgb\0" "vpavgw\0" "vpblendd\0" "vpblendmb\0" "vpblendmd\0"
+  "vpblendmq\0" "vpblendmw\0" "vpblendvb\0" "vpblendw\0" "vpbroadcastb\0" "vpbroadcastd\0" "vpbroadcastmb2q\0"
+  "vpbroadcastmw2d\0" "vpbroadcastq\0" "vpbroadcastw\0" "vpclmulqdq\0" "vpcmov\0" "vpcmpb\0" "vpcmpd\0" "vpcmpeqb\0"
+  "vpcmpeqd\0" "vpcmpeqq\0" "vpcmpeqw\0" "vpcmpestri\0" "vpcmpestrm\0" "vpcmpgtb\0" "vpcmpgtd\0" "vpcmpgtq\0"
+  "vpcmpgtw\0" "vpcmpistri\0" "vpcmpistrm\0" "vpcmpq\0" "vpcmpub\0" "vpcmpud\0" "vpcmpuq\0" "vpcmpuw\0" "vpcmpw\0"
+  "vpcomb\0" "vpcomd\0" "vpcompressb\0" "vpcompressd\0" "vpcompressq\0" "vpcompressw\0" "vpcomq\0" "vpcomub\0"
+  "vpcomud\0" "vpcomuq\0" "vpcomuw\0" "vpcomw\0" "vpconflictd\0" "vpconflictq\0" "vpdpbusd\0" "vpdpbusds\0"
+  "vpdpwssd\0" "vpdpwssds\0" "vperm2f128\0" "vperm2i128\0" "vpermb\0" "vpermd\0" "vpermi2b\0" "vpermi2d\0"
+  "vpermi2pd\0" "vpermi2ps\0" "vpermi2q\0" "vpermi2w\0" "vpermil2pd\0" "vpermil2ps\0" "vpermilpd\0" "vpermilps\0"
+  "vpermpd\0" "vpermps\0" "vpermq\0" "vpermt2b\0" "vpermt2d\0" "vpermt2pd\0" "vpermt2ps\0" "vpermt2q\0" "vpermt2w\0"
+  "vpermw\0" "vpexpandb\0" "vpexpandd\0" "vpexpandq\0" "vpexpandw\0" "vpextrb\0" "vpextrd\0" "vpextrq\0" "vpextrw\0"
+  "vpgatherdd\0" "vpgatherdq\0" "vpgatherqd\0" "vpgatherqq\0" "vphaddbd\0" "vphaddbq\0" "vphaddbw\0" "vphaddd\0"
+  "vphadddq\0" "vphaddsw\0" "vphaddubd\0" "vphaddubq\0" "vphaddubw\0" "vphaddudq\0" "vphadduwd\0" "vphadduwq\0"
+  "vphaddw\0" "vphaddwd\0" "vphaddwq\0" "vphminposuw\0" "vphsubbw\0" "vphsubd\0" "vphsubdq\0" "vphsubsw\0" "vphsubw\0"
+  "vphsubwd\0" "vpinsrb\0" "vpinsrd\0" "vpinsrq\0" "vpinsrw\0" "vplzcntd\0" "vplzcntq\0" "vpmacsdd\0" "vpmacsdqh\0"
+  "vpmacsdql\0" "vpmacssdd\0" "vpmacssdqh\0" "vpmacssdql\0" "vpmacsswd\0" "vpmacssww\0" "vpmacswd\0" "vpmacsww\0"
+  "vpmadcsswd\0" "vpmadcswd\0" "vpmadd52huq\0" "vpmadd52luq\0" "vpmaddubsw\0" "vpmaddwd\0" "vpmaskmovd\0"
+  "vpmaskmovq\0" "vpmaxsb\0" "vpmaxsd\0" "vpmaxsq\0" "vpmaxsw\0" "vpmaxub\0" "vpmaxud\0" "vpmaxuq\0" "vpmaxuw\0"
+  "vpminsb\0" "vpminsd\0" "vpminsq\0" "vpminsw\0" "vpminub\0" "vpminud\0" "vpminuq\0" "vpminuw\0" "vpmovb2m\0"
+  "vpmovd2m\0" "vpmovdb\0" "vpmovdw\0" "vpmovm2b\0" "vpmovm2d\0" "vpmovm2q\0" "vpmovm2w\0" "vpmovmskb\0" "vpmovq2m\0"
+  "vpmovqb\0" "vpmovqd\0" "vpmovqw\0" "vpmovsdb\0" "vpmovsdw\0" "vpmovsqb\0" "vpmovsqd\0" "vpmovsqw\0" "vpmovswb\0"
+  "vpmovsxbd\0" "vpmovsxbq\0" "vpmovsxbw\0" "vpmovsxdq\0" "vpmovsxwd\0" "vpmovsxwq\0" "vpmovusdb\0" "vpmovusdw\0"
+  "vpmovusqb\0" "vpmovusqd\0" "vpmovusqw\0" "vpmovuswb\0" "vpmovw2m\0" "vpmovwb\0" "vpmovzxbd\0" "vpmovzxbq\0"
+  "vpmovzxbw\0" "vpmovzxdq\0" "vpmovzxwd\0" "vpmovzxwq\0" "vpmuldq\0" "vpmulhrsw\0" "vpmulhuw\0" "vpmulhw\0"
+  "vpmulld\0" "vpmullq\0" "vpmullw\0" "vpmultishiftqb\0" "vpmuludq\0" "vpopcntb\0" "vpopcntd\0" "vpopcntq\0"
+  "vpopcntw\0" "vpor\0" "vpord\0" "vporq\0" "vpperm\0" "vprold\0" "vprolq\0" "vprolvd\0" "vprolvq\0" "vprord\0"
+  "vprorq\0" "vprorvd\0" "vprorvq\0" "vprotb\0" "vprotd\0" "vprotq\0" "vprotw\0" "vpsadbw\0" "vpscatterdd\0"
+  "vpscatterdq\0" "vpscatterqd\0" "vpscatterqq\0" "vpshab\0" "vpshad\0" "vpshaq\0" "vpshaw\0" "vpshlb\0" "vpshld\0"
+  "vpshldd\0" "vpshldq\0" "vpshldvd\0" "vpshldvq\0" "vpshldvw\0" "vpshldw\0" "vpshlq\0" "vpshlw\0" "vpshrdd\0"
+  "vpshrdq\0" "vpshrdvd\0" "vpshrdvq\0" "vpshrdvw\0" "vpshrdw\0" "vpshufb\0" "vpshufbitqmb\0" "vpshufd\0" "vpshufhw\0"
+  "vpshuflw\0" "vpsignb\0" "vpsignd\0" "vpsignw\0" "vpslld\0" "vpslldq\0" "vpsllq\0" "vpsllvd\0" "vpsllvq\0"
+  "vpsllvw\0" "vpsllw\0" "vpsrad\0" "vpsraq\0" "vpsravd\0" "vpsravq\0" "vpsravw\0" "vpsraw\0" "vpsrld\0" "vpsrldq\0"
+  "vpsrlq\0" "vpsrlvd\0" "vpsrlvq\0" "vpsrlvw\0" "vpsrlw\0" "vpsubb\0" "vpsubd\0" "vpsubq\0" "vpsubsb\0" "vpsubsw\0"
+  "vpsubusb\0" "vpsubusw\0" "vpsubw\0" "vpternlogd\0" "vpternlogq\0" "vptest\0" "vptestmb\0" "vptestmd\0" "vptestmq\0"
+  "vptestmw\0" "vptestnmb\0" "vptestnmd\0" "vptestnmq\0" "vptestnmw\0" "vpunpckhbw\0" "vpunpckhdq\0" "vpunpckhqdq\0"
+  "vpunpckhwd\0" "vpunpcklbw\0" "vpunpckldq\0" "vpunpcklqdq\0" "vpunpcklwd\0" "vpxor\0" "vpxord\0" "vpxorq\0"
+  "vrangepd\0" "vrangeps\0" "vrangesd\0" "vrangess\0" "vrcp14pd\0" "vrcp14ps\0" "vrcp14sd\0" "vrcp14ss\0" "vrcp28pd\0"
+  "vrcp28ps\0" "vrcp28sd\0" "vrcp28ss\0" "vrcpph\0" "vrcpps\0" "vrcpsh\0" "vrcpss\0" "vreducepd\0" "vreduceph\0"
+  "vreduceps\0" "vreducesd\0" "vreducesh\0" "vreducess\0" "vrndscalepd\0" "vrndscaleph\0" "vrndscaleps\0"
+  "vrndscalesd\0" "vrndscalesh\0" "vrndscaless\0" "vroundpd\0" "vroundps\0" "vroundsd\0" "vroundss\0" "vrsqrt14pd\0"
+  "vrsqrt14ps\0" "vrsqrt14sd\0" "vrsqrt14ss\0" "vrsqrt28pd\0" "vrsqrt28ps\0" "vrsqrt28sd\0" "vrsqrt28ss\0" "vrsqrtph\0"
+  "vrsqrtps\0" "vrsqrtsh\0" "vrsqrtss\0" "vscalefpd\0" "vscalefph\0" "vscalefps\0" "vscalefsd\0" "vscalefsh\0"
+  "vscalefss\0" "vscatterdpd\0" "vscatterdps\0" "vscatterpf0dpd\0" "vscatterpf0dps\0" "vscatterpf0qpd\0"
+  "vscatterpf0qps\0" "vscatterpf1dpd\0" "vscatterpf1dps\0" "vscatterpf1qpd\0" "vscatterpf1qps\0" "vscatterqpd\0"
+  "vscatterqps\0" "vshuff32x4\0" "vshuff64x2\0" "vshufi32x4\0" "vshufi64x2\0" "vshufpd\0" "vshufps\0" "vsqrtpd\0"
+  "vsqrtph\0" "vsqrtps\0" "vsqrtsd\0" "vsqrtsh\0" "vsqrtss\0" "vstmxcsr\0" "vsubpd\0" "vsubph\0" "vsubps\0" "vsubsd\0"
+  "vsubsh\0" "vsubss\0" "vtestpd\0" "vtestps\0" "vucomisd\0" "vucomish\0" "vucomiss\0" "vunpckhpd\0" "vunpckhps\0"
+  "vunpcklpd\0" "vunpcklps\0" "vxorpd\0" "vxorps\0" "vzeroall\0" "vzeroupper\0" "wbinvd\0" "wbnoinvd\0" "wrfsbase\0"
+  "wrgsbase\0" "wrmsr\0" "wrssd\0" "wrssq\0" "wrussd\0" "wrussq\0" "xabort\0" "xadd\0" "xbegin\0" "xend\0" "xgetbv\0"
+  "xlatb\0" "xresldtrk\0" "xrstors\0" "xrstors64\0" "xsavec\0" "xsavec64\0" "xsaveopt\0" "xsaveopt64\0" "xsaves\0"
+  "xsaves64\0" "xsetbv\0" "xsusldtrk\0" "xtest";
+
+const InstDB::InstNameIndex InstDB::instNameIndex[26] = {
+  { Inst::kIdAaa          , Inst::kIdArpl          + 1 },
+  { Inst::kIdBextr        , Inst::kIdBzhi          + 1 },
+  { Inst::kIdCall         , Inst::kIdCwde          + 1 },
+  { Inst::kIdDaa          , Inst::kIdDpps          + 1 },
+  { Inst::kIdEmms         , Inst::kIdExtrq         + 1 },
+  { Inst::kIdF2xm1        , Inst::kIdFyl2xp1       + 1 },
+  { Inst::kIdGetsec       , Inst::kIdGf2p8mulb     + 1 },
+  { Inst::kIdHaddpd       , Inst::kIdHsubps        + 1 },
+  { Inst::kIdIdiv         , Inst::kIdIretq         + 1 },
+  { Inst::kIdJa           , Inst::kIdJz            + 1 },
+  { Inst::kIdKaddb        , Inst::kIdKxorw         + 1 },
+  { Inst::kIdLahf         , Inst::kIdLzcnt         + 1 },
+  { Inst::kIdMaskmovdqu   , Inst::kIdMwaitx        + 1 },
+  { Inst::kIdNeg          , Inst::kIdNot           + 1 },
+  { Inst::kIdOr           , Inst::kIdOuts          + 1 },
+  { Inst::kIdPabsb        , Inst::kIdPxor          + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 },
+  { Inst::kIdRcl          , Inst::kIdRstorssp      + 1 },
+  { Inst::kIdSahf         , Inst::kIdSysretq       + 1 },
+  { Inst::kIdT1mskc       , Inst::kIdTzmsk         + 1 },
+  { Inst::kIdUcomisd      , Inst::kIdUnpcklps      + 1 },
+  { Inst::kIdV4fmaddps    , Inst::kIdVzeroupper    + 1 },
+  { Inst::kIdWbinvd       , Inst::kIdWrussq        + 1 },
+  { Inst::kIdXabort       , Inst::kIdXtest         + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 },
+  { Inst::kIdNone         , Inst::kIdNone          + 1 }
+};
+// ----------------------------------------------------------------------------
+// ${NameData:End}
+#endif // !ASMJIT_NO_TEXT
+
+// x86::InstDB - InstSignature & OpSignature
+// =========================================
+
+#ifndef ASMJIT_NO_VALIDATION
+// ${InstSignatureTable:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+#define ROW(count, x86, x64, implicit, o0, o1, o2, o3, o4, o5)       \
+  { count, uint8_t(x86 ? uint8_t(InstDB::Mode::kX86) : uint8_t(0)) | \
+                  (x64 ? uint8_t(InstDB::Mode::kX64) : uint8_t(0)) , \
+    implicit,                                                        \
+    0,                                                               \
+    { o0, o1, o2, o3, o4, o5 }                                       \
+  }
+const InstDB::InstSignature InstDB::_instSignatureTable[] = {
+  ROW(2, 1, 1, 0, 1  , 2  , 0  , 0  , 0  , 0  ), // #0   {r8lo|r8hi|m8|mem, r8lo|r8hi}
+  ROW(2, 1, 1, 0, 3  , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem|sreg, r16}
+  ROW(2, 1, 1, 0, 5  , 6  , 0  , 0  , 0  , 0  ), //      {r32|m32|mem|sreg, r32}
+  ROW(2, 0, 1, 0, 7  , 8  , 0  , 0  , 0  , 0  ), //      {r64|m64|mem|sreg|creg|dreg, r64}
+  ROW(2, 1, 1, 0, 9  , 10 , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi|m8, i8|u8}
+  ROW(2, 1, 1, 0, 11 , 12 , 0  , 0  , 0  , 0  ), //      {r16|m16, i16|u16}
+  ROW(2, 1, 1, 0, 13 , 14 , 0  , 0  , 0  , 0  ), //      {r32|m32, i32|u32}
+  ROW(2, 0, 1, 0, 15 , 16 , 0  , 0  , 0  , 0  ), //      {r64|m64|mem, i32}
+  ROW(2, 0, 1, 0, 8  , 17 , 0  , 0  , 0  , 0  ), //      {r64, i64|u64|m64|mem|sreg|creg|dreg}
+  ROW(2, 1, 1, 0, 2  , 18 , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi, m8|mem}
+  ROW(2, 1, 1, 0, 4  , 19 , 0  , 0  , 0  , 0  ), //      {r16, m16|mem|sreg}
+  ROW(2, 1, 1, 0, 6  , 20 , 0  , 0  , 0  , 0  ), //      {r32, m32|mem|sreg}
+  ROW(2, 1, 1, 0, 21 , 22 , 0  , 0  , 0  , 0  ), //      {m16|mem, sreg}
+  ROW(2, 1, 1, 0, 22 , 21 , 0  , 0  , 0  , 0  ), //      {sreg, m16|mem}
+  ROW(2, 1, 0, 0, 6  , 23 , 0  , 0  , 0  , 0  ), //      {r32, creg|dreg}
+  ROW(2, 1, 0, 0, 23 , 6  , 0  , 0  , 0  , 0  ), //      {creg|dreg, r32}
+  ROW(2, 1, 1, 0, 9  , 10 , 0  , 0  , 0  , 0  ), // #16  {r8lo|r8hi|m8, i8|u8}
+  ROW(2, 1, 1, 0, 11 , 12 , 0  , 0  , 0  , 0  ), //      {r16|m16, i16|u16}
+  ROW(2, 1, 1, 0, 13 , 14 , 0  , 0  , 0  , 0  ), //      {r32|m32, i32|u32}
+  ROW(2, 0, 1, 0, 15 , 24 , 0  , 0  , 0  , 0  ), //      {r64|m64|mem, i32|r64}
+  ROW(2, 1, 1, 0, 25 , 26 , 0  , 0  , 0  , 0  ), //      {r16|m16|r32|m32|r64|m64|mem, i8}
+  ROW(2, 1, 1, 0, 1  , 2  , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi|m8|mem, r8lo|r8hi}
+  ROW(2, 1, 1, 0, 27 , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem, r16}
+  ROW(2, 1, 1, 0, 28 , 6  , 0  , 0  , 0  , 0  ), // #23  {r32|m32|mem, r32}
+  ROW(2, 1, 1, 0, 2  , 18 , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi, m8|mem}
+  ROW(2, 1, 1, 0, 4  , 21 , 0  , 0  , 0  , 0  ), //      {r16, m16|mem}
+  ROW(2, 1, 1, 0, 6  , 29 , 0  , 0  , 0  , 0  ), //      {r32, m32|mem}
+  ROW(2, 0, 1, 0, 8  , 30 , 0  , 0  , 0  , 0  ), //      {r64, m64|mem}
+  ROW(2, 1, 1, 0, 31 , 10 , 0  , 0  , 0  , 0  ), // #28  {r8lo|r8hi|m8|r16|m16|r32|m32|r64|m64|mem, i8|u8}
+  ROW(2, 1, 1, 0, 11 , 12 , 0  , 0  , 0  , 0  ), //      {r16|m16, i16|u16}
+  ROW(2, 1, 1, 0, 13 , 14 , 0  , 0  , 0  , 0  ), //      {r32|m32, i32|u32}
+  ROW(2, 0, 1, 0, 8  , 32 , 0  , 0  , 0  , 0  ), //      {r64, u32|i32|r64|m64|mem}
+  ROW(2, 0, 1, 0, 30 , 24 , 0  , 0  , 0  , 0  ), //      {m64|mem, i32|r64}
+  ROW(2, 1, 1, 0, 1  , 2  , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi|m8|mem, r8lo|r8hi}
+  ROW(2, 1, 1, 0, 27 , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem, r16}
+  ROW(2, 1, 1, 0, 28 , 6  , 0  , 0  , 0  , 0  ), //      {r32|m32|mem, r32}
+  ROW(2, 1, 1, 0, 2  , 18 , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi, m8|mem}
+  ROW(2, 1, 1, 0, 4  , 21 , 0  , 0  , 0  , 0  ), //      {r16, m16|mem}
+  ROW(2, 1, 1, 0, 6  , 29 , 0  , 0  , 0  , 0  ), //      {r32, m32|mem}
+  ROW(2, 1, 1, 1, 33 , 1  , 0  , 0  , 0  , 0  ), // #39  {<ax>, r8lo|r8hi|m8|mem}
+  ROW(3, 1, 1, 2, 34 , 33 , 27 , 0  , 0  , 0  ), //      {<dx>, <ax>, r16|m16|mem}
+  ROW(3, 1, 1, 2, 35 , 36 , 28 , 0  , 0  , 0  ), //      {<edx>, <eax>, r32|m32|mem}
+  ROW(3, 0, 1, 2, 37 , 38 , 15 , 0  , 0  , 0  ), //      {<rdx>, <rax>, r64|m64|mem}
+  ROW(2, 1, 1, 0, 4  , 39 , 0  , 0  , 0  , 0  ), //      {r16, r16|m16|mem|i8|i16}
+  ROW(2, 1, 1, 0, 6  , 40 , 0  , 0  , 0  , 0  ), //      {r32, r32|m32|mem|i8|i32}
+  ROW(2, 0, 1, 0, 8  , 41 , 0  , 0  , 0  , 0  ), //      {r64, r64|m64|mem|i8|i32}
+  ROW(3, 1, 1, 0, 4  , 27 , 42 , 0  , 0  , 0  ), //      {r16, r16|m16|mem, i8|i16|u16}
+  ROW(3, 1, 1, 0, 6  , 28 , 43 , 0  , 0  , 0  ), //      {r32, r32|m32|mem, i8|i32|u32}
+  ROW(3, 0, 1, 0, 8  , 15 , 44 , 0  , 0  , 0  ), //      {r64, r64|m64|mem, i8|i32}
+  ROW(2, 0, 1, 0, 8  , 45 , 0  , 0  , 0  , 0  ), // #49  {r64, i64|u64}
+  ROW(2, 0, 1, 0, 46 , 18 , 0  , 0  , 0  , 0  ), //      {al, m8|mem}
+  ROW(2, 0, 1, 0, 47 , 21 , 0  , 0  , 0  , 0  ), //      {ax, m16|mem}
+  ROW(2, 0, 1, 0, 48 , 29 , 0  , 0  , 0  , 0  ), //      {eax, m32|mem}
+  ROW(2, 0, 1, 0, 49 , 30 , 0  , 0  , 0  , 0  ), //      {rax, m64|mem}
+  ROW(2, 0, 1, 0, 18 , 46 , 0  , 0  , 0  , 0  ), //      {m8|mem, al}
+  ROW(2, 0, 1, 0, 21 , 47 , 0  , 0  , 0  , 0  ), //      {m16|mem, ax}
+  ROW(2, 0, 1, 0, 29 , 48 , 0  , 0  , 0  , 0  ), //      {m32|mem, eax}
+  ROW(2, 0, 1, 0, 30 , 49 , 0  , 0  , 0  , 0  ), //      {m64|mem, rax}
+  ROW(2, 1, 1, 0, 1  , 2  , 0  , 0  , 0  , 0  ), // #58  {r8lo|r8hi|m8|mem, r8lo|r8hi}
+  ROW(2, 1, 1, 0, 27 , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem, r16}
+  ROW(2, 1, 1, 0, 28 , 6  , 0  , 0  , 0  , 0  ), //      {r32|m32|mem, r32}
+  ROW(2, 0, 1, 0, 15 , 8  , 0  , 0  , 0  , 0  ), // #61  {r64|m64|mem, r64}
+  ROW(2, 1, 1, 0, 2  , 18 , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi, m8|mem}
+  ROW(2, 1, 1, 0, 4  , 21 , 0  , 0  , 0  , 0  ), //      {r16, m16|mem}
+  ROW(2, 1, 1, 0, 6  , 29 , 0  , 0  , 0  , 0  ), //      {r32, m32|mem}
+  ROW(2, 0, 1, 0, 8  , 30 , 0  , 0  , 0  , 0  ), //      {r64, m64|mem}
+  ROW(2, 1, 1, 0, 9  , 10 , 0  , 0  , 0  , 0  ), // #66  {r8lo|r8hi|m8, i8|u8}
+  ROW(2, 1, 1, 0, 11 , 12 , 0  , 0  , 0  , 0  ), //      {r16|m16, i16|u16}
+  ROW(2, 1, 1, 0, 13 , 14 , 0  , 0  , 0  , 0  ), //      {r32|m32, i32|u32}
+  ROW(2, 0, 1, 0, 15 , 24 , 0  , 0  , 0  , 0  ), //      {r64|m64|mem, i32|r64}
+  ROW(2, 1, 1, 0, 1  , 2  , 0  , 0  , 0  , 0  ), //      {r8lo|r8hi|m8|mem, r8lo|r8hi}
+  ROW(2, 1, 1, 0, 27 , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem, r16}
+  ROW(2, 1, 1, 0, 28 , 6  , 0  , 0  , 0  , 0  ), //      {r32|m32|mem, r32}
+  ROW(2, 1, 1, 0, 4  , 21 , 0  , 0  , 0  , 0  ), // #73  {r16, m16|mem}
+  ROW(2, 1, 1, 0, 6  , 29 , 0  , 0  , 0  , 0  ), //      {r32, m32|mem}
+  ROW(2, 0, 1, 0, 8  , 30 , 0  , 0  , 0  , 0  ), //      {r64, m64|mem}
+  ROW(2, 1, 1, 0, 21 , 4  , 0  , 0  , 0  , 0  ), //      {m16|mem, r16}
+  ROW(2, 1, 1, 0, 29 , 6  , 0  , 0  , 0  , 0  ), // #77  {m32|mem, r32}
+  ROW(2, 0, 1, 0, 30 , 8  , 0  , 0  , 0  , 0  ), //      {m64|mem, r64}
+  ROW(2, 1, 1, 0, 50 , 51 , 0  , 0  , 0  , 0  ), // #79  {xmm, xmm|m128|mem}
+  ROW(2, 1, 1, 0, 52 , 50 , 0  , 0  , 0  , 0  ), // #80  {m128|mem, xmm}
+  ROW(2, 1, 1, 0, 53 , 54 , 0  , 0  , 0  , 0  ), //      {ymm, ymm|m256|mem}
+  ROW(2, 1, 1, 0, 55 , 53 , 0  , 0  , 0  , 0  ), //      {m256|mem, ymm}
+  ROW(2, 1, 1, 0, 56 , 57 , 0  , 0  , 0  , 0  ), // #83  {zmm, zmm|m512|mem}
+  ROW(2, 1, 1, 0, 58 , 56 , 0  , 0  , 0  , 0  ), //      {m512|mem, zmm}
+  ROW(3, 1, 1, 0, 50 , 50 , 59 , 0  , 0  , 0  ), // #85  {xmm, xmm, xmm|m128|mem|i8|u8}
+  ROW(3, 1, 1, 0, 50 , 52 , 10 , 0  , 0  , 0  ), //      {xmm, m128|mem, i8|u8}
+  ROW(3, 1, 1, 0, 53 , 53 , 60 , 0  , 0  , 0  ), //      {ymm, ymm, ymm|m256|mem|i8|u8}
+  ROW(3, 1, 1, 0, 53 , 55 , 10 , 0  , 0  , 0  ), //      {ymm, m256|mem, i8|u8}
+  ROW(3, 1, 1, 0, 56 , 56 , 61 , 0  , 0  , 0  ), //      {zmm, zmm, zmm|m512|mem|i8|u8}
+  ROW(3, 1, 1, 0, 56 , 58 , 10 , 0  , 0  , 0  ), //      {zmm, m512|mem, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 50 , 59 , 0  , 0  , 0  ), // #91  {xmm, xmm, i8|u8|xmm|m128|mem}
+  ROW(3, 1, 1, 0, 53 , 53 , 59 , 0  , 0  , 0  ), //      {ymm, ymm, i8|u8|xmm|m128|mem}
+  ROW(3, 1, 1, 0, 50 , 52 , 10 , 0  , 0  , 0  ), //      {xmm, m128|mem, i8|u8}
+  ROW(3, 1, 1, 0, 53 , 55 , 10 , 0  , 0  , 0  ), //      {ymm, m256|mem, i8|u8}
+  ROW(3, 1, 1, 0, 56 , 56 , 59 , 0  , 0  , 0  ), //      {zmm, zmm, xmm|m128|mem|i8|u8}
+  ROW(3, 1, 1, 0, 56 , 58 , 10 , 0  , 0  , 0  ), //      {zmm, m512|mem, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 50 , 59 , 0  , 0  , 0  ), // #97  {xmm, xmm, xmm|m128|mem|i8|u8}
+  ROW(3, 1, 1, 0, 50 , 52 , 10 , 0  , 0  , 0  ), //      {xmm, m128|mem, i8|u8}
+  ROW(3, 1, 1, 0, 53 , 53 , 59 , 0  , 0  , 0  ), //      {ymm, ymm, xmm|m128|mem|i8|u8}
+  ROW(3, 1, 1, 0, 53 , 55 , 10 , 0  , 0  , 0  ), //      {ymm, m256|mem, i8|u8}
+  ROW(3, 1, 1, 0, 56 , 56 , 59 , 0  , 0  , 0  ), //      {zmm, zmm, xmm|m128|mem|i8|u8}
+  ROW(3, 1, 1, 0, 56 , 58 , 10 , 0  , 0  , 0  ), //      {zmm, m512|mem, i8|u8}
+  ROW(2, 1, 1, 0, 62 , 63 , 0  , 0  , 0  , 0  ), // #103 {mm, mm|m64|mem|r64}
+  ROW(2, 1, 1, 0, 15 , 64 , 0  , 0  , 0  , 0  ), //      {m64|mem|r64, mm|xmm}
+  ROW(2, 0, 1, 0, 50 , 15 , 0  , 0  , 0  , 0  ), //      {xmm, r64|m64|mem}
+  ROW(2, 1, 1, 0, 50 , 65 , 0  , 0  , 0  , 0  ), // #106 {xmm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 30 , 50 , 0  , 0  , 0  , 0  ), // #107 {m64|mem, xmm}
+  ROW(0, 1, 1, 0, 0  , 0  , 0  , 0  , 0  , 0  ), // #108 {}
+  ROW(1, 1, 1, 0, 66 , 0  , 0  , 0  , 0  , 0  ), //      {r16|m16|r32|m32|r64|m64}
+  ROW(2, 1, 1, 0, 27 , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem, r16}
+  ROW(2, 1, 1, 0, 28 , 6  , 0  , 0  , 0  , 0  ), //      {r32|m32|mem, r32}
+  ROW(2, 1, 1, 0, 15 , 8  , 0  , 0  , 0  , 0  ), //      {r64|m64|mem, r64}
+  ROW(3, 1, 1, 0, 50 , 67 , 50 , 0  , 0  , 0  ), // #113 {xmm, vm32x, xmm}
+  ROW(3, 1, 1, 0, 53 , 68 , 53 , 0  , 0  , 0  ), //      {ymm, vm32y, ymm}
+  ROW(2, 1, 1, 0, 50 , 67 , 0  , 0  , 0  , 0  ), //      {xmm, vm32x}
+  ROW(2, 1, 1, 0, 53 , 68 , 0  , 0  , 0  , 0  ), //      {ymm, vm32y}
+  ROW(2, 1, 1, 0, 56 , 69 , 0  , 0  , 0  , 0  ), //      {zmm, vm32z}
+  ROW(3, 1, 1, 0, 50 , 70 , 50 , 0  , 0  , 0  ), // #118 {xmm, vm64x, xmm}
+  ROW(3, 1, 1, 0, 53 , 71 , 53 , 0  , 0  , 0  ), //      {ymm, vm64y, ymm}
+  ROW(2, 1, 1, 0, 50 , 70 , 0  , 0  , 0  , 0  ), //      {xmm, vm64x}
+  ROW(2, 1, 1, 0, 53 , 71 , 0  , 0  , 0  , 0  ), //      {ymm, vm64y}
+  ROW(2, 1, 1, 0, 56 , 72 , 0  , 0  , 0  , 0  ), //      {zmm, vm64z}
+  ROW(2, 1, 1, 0, 25 , 10 , 0  , 0  , 0  , 0  ), // #123 {r16|m16|r32|m32|r64|m64|mem, i8|u8}
+  ROW(2, 1, 1, 0, 27 , 4  , 0  , 0  , 0  , 0  ), //      {r16|m16|mem, r16}
+  ROW(2, 1, 1, 0, 28 , 6  , 0  , 0  , 0  , 0  ), //      {r32|m32|mem, r32}
+  ROW(2, 0, 1, 0, 15 , 8  , 0  , 0  , 0  , 0  ), //      {r64|m64|mem, r64}
+  ROW(2, 1, 1, 2, 73 , 74 , 0  , 0  , 0  , 0  ), // #127 {<ds:[m8|memBase|zsi]>, <es:[m8|memBase|zdi]>}
+  ROW(2, 1, 1, 2, 75 , 76 , 0  , 0  , 0  , 0  ), //      {<ds:[m16|memBase|zsi]>, <es:[m16|memBase|zdi]>}
+  ROW(2, 1, 1, 2, 77 , 78 , 0  , 0  , 0  , 0  ), //      {<ds:[m32|memBase|zsi]>, <es:[m32|memBase|zdi]>}
+  ROW(2, 0, 1, 2, 79 , 80 , 0  , 0  , 0  , 0  ), //      {<ds:[m64|memBase|zsi]>, <es:[m64|memBase|zdi]>}
+  ROW(3, 1, 1, 1, 1  , 2  , 81 , 0  , 0  , 0  ), // #131 {r8lo|r8hi|m8|mem, r8lo|r8hi, <al>}
+  ROW(3, 1, 1, 1, 27 , 4  , 33 , 0  , 0  , 0  ), //      {r16|m16|mem, r16, <ax>}
+  ROW(3, 1, 1, 1, 28 , 6  , 36 , 0  , 0  , 0  ), //      {r32|m32|mem, r32, <eax>}
+  ROW(3, 0, 1, 1, 15 , 8  , 38 , 0  , 0  , 0  ), //      {r64|m64|mem, r64, <rax>}
+  ROW(2, 1, 1, 2, 81 , 82 , 0  , 0  , 0  , 0  ), // #135 {<al>, <ds:[m8|memBase|zsi|mem]>}
+  ROW(2, 1, 1, 2, 33 , 83 , 0  , 0  , 0  , 0  ), //      {<ax>, <ds:[m16|memBase|zsi|mem]>}
+  ROW(2, 1, 1, 2, 36 , 84 , 0  , 0  , 0  , 0  ), //      {<eax>, <ds:[m32|memBase|zsi|mem]>}
+  ROW(2, 0, 1, 2, 38 , 85 , 0  , 0  , 0  , 0  ), //      {<rax>, <ds:[m64|memBase|zsi|mem]>}
+  ROW(2, 1, 1, 2, 74 , 73 , 0  , 0  , 0  , 0  ), // #139 {<es:[m8|memBase|zdi]>, <ds:[m8|memBase|zsi]>}
+  ROW(2, 1, 1, 2, 76 , 75 , 0  , 0  , 0  , 0  ), //      {<es:[m16|memBase|zdi]>, <ds:[m16|memBase|zsi]>}
+  ROW(2, 1, 1, 2, 78 , 77 , 0  , 0  , 0  , 0  ), //      {<es:[m32|memBase|zdi]>, <ds:[m32|memBase|zsi]>}
+  ROW(2, 0, 1, 2, 80 , 79 , 0  , 0  , 0  , 0  ), //      {<es:[m64|memBase|zdi]>, <ds:[m64|memBase|zsi]>}
+  ROW(1, 1, 1, 0, 86 , 0  , 0  , 0  , 0  , 0  ), // #143 {r16|m16|r64|m64}
+  ROW(1, 1, 0, 0, 13 , 0  , 0  , 0  , 0  , 0  ), //      {r32|m32}
+  ROW(1, 1, 0, 0, 87 , 0  , 0  , 0  , 0  , 0  ), //      {ds|es|ss}
+  ROW(1, 1, 1, 0, 88 , 0  , 0  , 0  , 0  , 0  ), //      {fs|gs}
+  ROW(1, 1, 1, 0, 89 , 0  , 0  , 0  , 0  , 0  ), // #147 {r16|m16|r64|m64|i8|i16|i32}
+  ROW(1, 1, 0, 0, 90 , 0  , 0  , 0  , 0  , 0  ), //      {r32|m32|i32|u32}
+  ROW(1, 1, 0, 0, 91 , 0  , 0  , 0  , 0  , 0  ), //      {cs|ss|ds|es}
+  ROW(1, 1, 1, 0, 88 , 0  , 0  , 0  , 0  , 0  ), //      {fs|gs}
+  ROW(2, 1, 1, 2, 81 , 92 , 0  , 0  , 0  , 0  ), // #151 {<al>, <es:[m8|memBase|zdi|mem]>}
+  ROW(2, 1, 1, 2, 33 , 93 , 0  , 0  , 0  , 0  ), //      {<ax>, <es:[m16|memBase|zdi|mem]>}
+  ROW(2, 1, 1, 2, 36 , 94 , 0  , 0  , 0  , 0  ), //      {<eax>, <es:[m32|memBase|zdi|mem]>}
+  ROW(2, 0, 1, 2, 38 , 95 , 0  , 0  , 0  , 0  ), //      {<rax>, <es:[m64|memBase|zdi|mem]>}
+  ROW(2, 1, 1, 2, 92 , 81 , 0  , 0  , 0  , 0  ), // #155 {<es:[m8|memBase|zdi|mem]>, <al>}
+  ROW(2, 1, 1, 2, 93 , 33 , 0  , 0  , 0  , 0  ), //      {<es:[m16|memBase|zdi|mem]>, <ax>}
+  ROW(2, 1, 1, 2, 94 , 36 , 0  , 0  , 0  , 0  ), //      {<es:[m32|memBase|zdi|mem]>, <eax>}
+  ROW(2, 0, 1, 2, 95 , 38 , 0  , 0  , 0  , 0  ), //      {<es:[m64|memBase|zdi|mem]>, <rax>}
+  ROW(4, 1, 1, 0, 50 , 50 , 50 , 51 , 0  , 0  ), // #159 {xmm, xmm, xmm, xmm|m128|mem}
+  ROW(4, 1, 1, 0, 50 , 50 , 52 , 50 , 0  , 0  ), //      {xmm, xmm, m128|mem, xmm}
+  ROW(4, 1, 1, 0, 53 , 53 , 53 , 54 , 0  , 0  ), //      {ymm, ymm, ymm, ymm|m256|mem}
+  ROW(4, 1, 1, 0, 53 , 53 , 55 , 53 , 0  , 0  ), //      {ymm, ymm, m256|mem, ymm}
+  ROW(3, 1, 1, 0, 50 , 67 , 50 , 0  , 0  , 0  ), // #163 {xmm, vm32x, xmm}
+  ROW(3, 1, 1, 0, 53 , 67 , 53 , 0  , 0  , 0  ), //      {ymm, vm32x, ymm}
+  ROW(2, 1, 1, 0, 96 , 67 , 0  , 0  , 0  , 0  ), //      {xmm|ymm, vm32x}
+  ROW(2, 1, 1, 0, 56 , 68 , 0  , 0  , 0  , 0  ), //      {zmm, vm32y}
+  ROW(3, 1, 1, 0, 52 , 50 , 50 , 0  , 0  , 0  ), // #167 {m128|mem, xmm, xmm}
+  ROW(3, 1, 1, 0, 55 , 53 , 53 , 0  , 0  , 0  ), //      {m256|mem, ymm, ymm}
+  ROW(3, 1, 1, 0, 50 , 50 , 52 , 0  , 0  , 0  ), //      {xmm, xmm, m128|mem}
+  ROW(3, 1, 1, 0, 53 , 53 , 55 , 0  , 0  , 0  ), //      {ymm, ymm, m256|mem}
+  ROW(5, 1, 1, 0, 50 , 50 , 51 , 50 , 97 , 0  ), // #171 {xmm, xmm, xmm|m128|mem, xmm, i4|u4}
+  ROW(5, 1, 1, 0, 50 , 50 , 50 , 52 , 97 , 0  ), //      {xmm, xmm, xmm, m128|mem, i4|u4}
+  ROW(5, 1, 1, 0, 53 , 53 , 54 , 53 , 97 , 0  ), //      {ymm, ymm, ymm|m256|mem, ymm, i4|u4}
+  ROW(5, 1, 1, 0, 53 , 53 , 53 , 55 , 97 , 0  ), //      {ymm, ymm, ymm, m256|mem, i4|u4}
+  ROW(3, 1, 1, 0, 53 , 54 , 10 , 0  , 0  , 0  ), // #175 {ymm, ymm|m256|mem, i8|u8}
+  ROW(3, 1, 1, 0, 53 , 53 , 54 , 0  , 0  , 0  ), //      {ymm, ymm, ymm|m256|mem}
+  ROW(3, 1, 1, 0, 56 , 56 , 61 , 0  , 0  , 0  ), //      {zmm, zmm, zmm|m512|mem|i8|u8}
+  ROW(3, 1, 1, 0, 56 , 58 , 10 , 0  , 0  , 0  ), //      {zmm, m512|mem, i8|u8}
+  ROW(2, 1, 1, 0, 4  , 27 , 0  , 0  , 0  , 0  ), // #179 {r16, r16|m16|mem}
+  ROW(2, 1, 1, 0, 6  , 28 , 0  , 0  , 0  , 0  ), // #180 {r32, r32|m32|mem}
+  ROW(2, 0, 1, 0, 8  , 15 , 0  , 0  , 0  , 0  ), //      {r64, r64|m64|mem}
+  ROW(1, 1, 1, 0, 98 , 0  , 0  , 0  , 0  , 0  ), // #182 {m32|m64}
+  ROW(2, 1, 1, 0, 99 , 100, 0  , 0  , 0  , 0  ), //      {st0, st}
+  ROW(2, 1, 1, 0, 100, 99 , 0  , 0  , 0  , 0  ), //      {st, st0}
+  ROW(2, 1, 1, 0, 4  , 29 , 0  , 0  , 0  , 0  ), // #185 {r16, m32|mem}
+  ROW(2, 1, 1, 0, 6  , 101, 0  , 0  , 0  , 0  ), //      {r32, m48|mem}
+  ROW(2, 0, 1, 0, 8  , 102, 0  , 0  , 0  , 0  ), //      {r64, m80|mem}
+  ROW(3, 1, 1, 0, 27 , 4  , 103, 0  , 0  , 0  ), // #188 {r16|m16|mem, r16, cl|i8|u8}
+  ROW(3, 1, 1, 0, 28 , 6  , 103, 0  , 0  , 0  ), //      {r32|m32|mem, r32, cl|i8|u8}
+  ROW(3, 0, 1, 0, 15 , 8  , 103, 0  , 0  , 0  ), //      {r64|m64|mem, r64, cl|i8|u8}
+  ROW(3, 1, 1, 0, 50 , 50 , 51 , 0  , 0  , 0  ), // #191 {xmm, xmm, xmm|m128|mem}
+  ROW(3, 1, 1, 0, 53 , 53 , 54 , 0  , 0  , 0  ), // #192 {ymm, ymm, ymm|m256|mem}
+  ROW(3, 1, 1, 0, 56 , 56 , 57 , 0  , 0  , 0  ), //      {zmm, zmm, zmm|m512|mem}
+  ROW(4, 1, 1, 0, 50 , 50 , 51 , 10 , 0  , 0  ), // #194 {xmm, xmm, xmm|m128|mem, i8|u8}
+  ROW(4, 1, 1, 0, 53 , 53 , 54 , 10 , 0  , 0  ), // #195 {ymm, ymm, ymm|m256|mem, i8|u8}
+  ROW(4, 1, 1, 0, 56 , 56 , 57 , 10 , 0  , 0  ), //      {zmm, zmm, zmm|m512|mem, i8|u8}
+  ROW(4, 1, 1, 0, 104, 50 , 51 , 10 , 0  , 0  ), // #197 {xmm|k, xmm, xmm|m128|mem, i8|u8}
+  ROW(4, 1, 1, 0, 105, 53 , 54 , 10 , 0  , 0  ), //      {ymm|k, ymm, ymm|m256|mem, i8|u8}
+  ROW(4, 1, 1, 0, 106, 56 , 57 , 10 , 0  , 0  ), //      {k, zmm, zmm|m512|mem, i8|u8}
+  ROW(4, 1, 1, 0, 106, 50 , 51 , 10 , 0  , 0  ), // #200 {k, xmm, xmm|m128|mem, i8|u8}
+  ROW(4, 1, 1, 0, 106, 53 , 54 , 10 , 0  , 0  ), //      {k, ymm, ymm|m256|mem, i8|u8}
+  ROW(4, 1, 1, 0, 106, 56 , 57 , 10 , 0  , 0  ), //      {k, zmm, zmm|m512|mem, i8|u8}
+  ROW(2, 1, 1, 0, 51 , 50 , 0  , 0  , 0  , 0  ), // #203 {xmm|m128|mem, xmm}
+  ROW(2, 1, 1, 0, 54 , 53 , 0  , 0  , 0  , 0  ), //      {ymm|m256|mem, ymm}
+  ROW(2, 1, 1, 0, 57 , 56 , 0  , 0  , 0  , 0  ), //      {zmm|m512|mem, zmm}
+  ROW(2, 1, 1, 0, 50 , 65 , 0  , 0  , 0  , 0  ), // #206 {xmm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 53 , 51 , 0  , 0  , 0  , 0  ), //      {ymm, xmm|m128|mem}
+  ROW(2, 1, 1, 0, 56 , 54 , 0  , 0  , 0  , 0  ), //      {zmm, ymm|m256|mem}
+  ROW(2, 1, 1, 0, 50 , 51 , 0  , 0  , 0  , 0  ), // #209 {xmm, xmm|m128|mem}
+  ROW(2, 1, 1, 0, 53 , 54 , 0  , 0  , 0  , 0  ), //      {ymm, ymm|m256|mem}
+  ROW(2, 1, 1, 0, 56 , 57 , 0  , 0  , 0  , 0  ), //      {zmm, zmm|m512|mem}
+  ROW(2, 1, 1, 0, 50 , 107, 0  , 0  , 0  , 0  ), // #212 {xmm, xmm|m32|mem}
+  ROW(2, 1, 1, 0, 53 , 65 , 0  , 0  , 0  , 0  ), //      {ymm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 56 , 51 , 0  , 0  , 0  , 0  ), //      {zmm, xmm|m128|mem}
+  ROW(3, 1, 1, 0, 65 , 50 , 10 , 0  , 0  , 0  ), // #215 {xmm|m64|mem, xmm, i8|u8}
+  ROW(3, 1, 1, 0, 51 , 53 , 10 , 0  , 0  , 0  ), // #216 {xmm|m128|mem, ymm, i8|u8}
+  ROW(3, 1, 1, 0, 54 , 56 , 10 , 0  , 0  , 0  ), // #217 {ymm|m256|mem, zmm, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 108, 50 , 0  , 0  , 0  ), // #218 {xmm, vm64x|vm64y, xmm}
+  ROW(2, 1, 1, 0, 50 , 108, 0  , 0  , 0  , 0  ), //      {xmm, vm64x|vm64y}
+  ROW(2, 1, 1, 0, 53 , 72 , 0  , 0  , 0  , 0  ), //      {ymm, vm64z}
+  ROW(3, 1, 1, 0, 50 , 51 , 10 , 0  , 0  , 0  ), // #221 {xmm, xmm|m128|mem, i8|u8}
+  ROW(3, 1, 1, 0, 53 , 54 , 10 , 0  , 0  , 0  ), //      {ymm, ymm|m256|mem, i8|u8}
+  ROW(3, 1, 1, 0, 56 , 57 , 10 , 0  , 0  , 0  ), //      {zmm, zmm|m512|mem, i8|u8}
+  ROW(2, 1, 1, 0, 50 , 65 , 0  , 0  , 0  , 0  ), // #224 {xmm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 53 , 54 , 0  , 0  , 0  , 0  ), //      {ymm, ymm|m256|mem}
+  ROW(2, 1, 1, 0, 56 , 57 , 0  , 0  , 0  , 0  ), //      {zmm, zmm|m512|mem}
+  ROW(2, 1, 1, 0, 52 , 50 , 0  , 0  , 0  , 0  ), // #227 {m128|mem, xmm}
+  ROW(2, 1, 1, 0, 55 , 53 , 0  , 0  , 0  , 0  ), //      {m256|mem, ymm}
+  ROW(2, 1, 1, 0, 58 , 56 , 0  , 0  , 0  , 0  ), //      {m512|mem, zmm}
+  ROW(2, 1, 1, 0, 50 , 52 , 0  , 0  , 0  , 0  ), // #230 {xmm, m128|mem}
+  ROW(2, 1, 1, 0, 53 , 55 , 0  , 0  , 0  , 0  ), //      {ymm, m256|mem}
+  ROW(2, 1, 1, 0, 56 , 58 , 0  , 0  , 0  , 0  ), //      {zmm, m512|mem}
+  ROW(2, 0, 1, 0, 15 , 50 , 0  , 0  , 0  , 0  ), // #233 {r64|m64|mem, xmm}
+  ROW(2, 1, 1, 0, 50 , 109, 0  , 0  , 0  , 0  ), //      {xmm, xmm|m64|mem|r64}
+  ROW(2, 1, 1, 0, 30 , 50 , 0  , 0  , 0  , 0  ), //      {m64|mem, xmm}
+  ROW(2, 1, 1, 0, 30 , 50 , 0  , 0  , 0  , 0  ), // #236 {m64|mem, xmm}
+  ROW(2, 1, 1, 0, 50 , 30 , 0  , 0  , 0  , 0  ), //      {xmm, m64|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 50 , 0  , 0  , 0  ), // #238 {xmm, xmm, xmm}
+  ROW(2, 1, 1, 0, 21 , 50 , 0  , 0  , 0  , 0  ), // #239 {m16|mem, xmm}
+  ROW(2, 1, 1, 0, 50 , 21 , 0  , 0  , 0  , 0  ), //      {xmm, m16|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 50 , 0  , 0  , 0  ), //      {xmm, xmm, xmm}
+  ROW(2, 1, 1, 0, 29 , 50 , 0  , 0  , 0  , 0  ), // #242 {m32|mem, xmm}
+  ROW(2, 1, 1, 0, 50 , 29 , 0  , 0  , 0  , 0  ), //      {xmm, m32|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 50 , 0  , 0  , 0  ), //      {xmm, xmm, xmm}
+  ROW(4, 1, 1, 0, 106, 106, 50 , 51 , 0  , 0  ), // #245 {k, k, xmm, xmm|m128|mem}
+  ROW(4, 1, 1, 0, 106, 106, 53 , 54 , 0  , 0  ), //      {k, k, ymm, ymm|m256|mem}
+  ROW(4, 1, 1, 0, 106, 106, 56 , 57 , 0  , 0  ), //      {k, k, zmm, zmm|m512|mem}
+  ROW(2, 1, 1, 0, 96 , 109, 0  , 0  , 0  , 0  ), // #248 {xmm|ymm, xmm|m64|mem|r64}
+  ROW(2, 0, 1, 0, 56 , 8  , 0  , 0  , 0  , 0  ), //      {zmm, r64}
+  ROW(2, 1, 1, 0, 56 , 65 , 0  , 0  , 0  , 0  ), //      {zmm, xmm|m64|mem}
+  ROW(3, 1, 1, 0, 104, 50 , 51 , 0  , 0  , 0  ), // #251 {xmm|k, xmm, xmm|m128|mem}
+  ROW(3, 1, 1, 0, 105, 53 , 54 , 0  , 0  , 0  ), //      {ymm|k, ymm, ymm|m256|mem}
+  ROW(3, 1, 1, 0, 106, 56 , 57 , 0  , 0  , 0  ), //      {k, zmm, zmm|m512|mem}
+  ROW(2, 1, 1, 0, 107, 50 , 0  , 0  , 0  , 0  ), // #254 {xmm|m32|mem, xmm}
+  ROW(2, 1, 1, 0, 65 , 53 , 0  , 0  , 0  , 0  ), //      {xmm|m64|mem, ymm}
+  ROW(2, 1, 1, 0, 51 , 56 , 0  , 0  , 0  , 0  ), //      {xmm|m128|mem, zmm}
+  ROW(2, 1, 1, 0, 65 , 50 , 0  , 0  , 0  , 0  ), // #257 {xmm|m64|mem, xmm}
+  ROW(2, 1, 1, 0, 51 , 53 , 0  , 0  , 0  , 0  ), //      {xmm|m128|mem, ymm}
+  ROW(2, 1, 1, 0, 54 , 56 , 0  , 0  , 0  , 0  ), //      {ymm|m256|mem, zmm}
+  ROW(2, 1, 1, 0, 110, 50 , 0  , 0  , 0  , 0  ), // #260 {xmm|m16|mem, xmm}
+  ROW(2, 1, 1, 0, 107, 53 , 0  , 0  , 0  , 0  ), //      {xmm|m32|mem, ymm}
+  ROW(2, 1, 1, 0, 65 , 56 , 0  , 0  , 0  , 0  ), //      {xmm|m64|mem, zmm}
+  ROW(2, 1, 1, 0, 50 , 110, 0  , 0  , 0  , 0  ), // #263 {xmm, xmm|m16|mem}
+  ROW(2, 1, 1, 0, 53 , 107, 0  , 0  , 0  , 0  ), //      {ymm, xmm|m32|mem}
+  ROW(2, 1, 1, 0, 56 , 65 , 0  , 0  , 0  , 0  ), //      {zmm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 67 , 50 , 0  , 0  , 0  , 0  ), // #266 {vm32x, xmm}
+  ROW(2, 1, 1, 0, 68 , 53 , 0  , 0  , 0  , 0  ), //      {vm32y, ymm}
+  ROW(2, 1, 1, 0, 69 , 56 , 0  , 0  , 0  , 0  ), //      {vm32z, zmm}
+  ROW(2, 1, 1, 0, 70 , 50 , 0  , 0  , 0  , 0  ), // #269 {vm64x, xmm}
+  ROW(2, 1, 1, 0, 71 , 53 , 0  , 0  , 0  , 0  ), //      {vm64y, ymm}
+  ROW(2, 1, 1, 0, 72 , 56 , 0  , 0  , 0  , 0  ), //      {vm64z, zmm}
+  ROW(3, 1, 1, 0, 106, 50 , 51 , 0  , 0  , 0  ), // #272 {k, xmm, xmm|m128|mem}
+  ROW(3, 1, 1, 0, 106, 53 , 54 , 0  , 0  , 0  ), //      {k, ymm, ymm|m256|mem}
+  ROW(3, 1, 1, 0, 106, 56 , 57 , 0  , 0  , 0  ), //      {k, zmm, zmm|m512|mem}
+  ROW(3, 1, 1, 0, 6  , 6  , 28 , 0  , 0  , 0  ), // #275 {r32, r32, r32|m32|mem}
+  ROW(3, 0, 1, 0, 8  , 8  , 15 , 0  , 0  , 0  ), //      {r64, r64, r64|m64|mem}
+  ROW(3, 1, 1, 0, 6  , 28 , 6  , 0  , 0  , 0  ), // #277 {r32, r32|m32|mem, r32}
+  ROW(3, 0, 1, 0, 8  , 15 , 8  , 0  , 0  , 0  ), //      {r64, r64|m64|mem, r64}
+  ROW(2, 1, 0, 0, 111, 28 , 0  , 0  , 0  , 0  ), // #279 {bnd, r32|m32|mem}
+  ROW(2, 0, 1, 0, 111, 15 , 0  , 0  , 0  , 0  ), //      {bnd, r64|m64|mem}
+  ROW(2, 1, 1, 0, 111, 112, 0  , 0  , 0  , 0  ), // #281 {bnd, bnd|mem}
+  ROW(2, 1, 1, 0, 113, 111, 0  , 0  , 0  , 0  ), //      {mem, bnd}
+  ROW(2, 1, 0, 0, 4  , 29 , 0  , 0  , 0  , 0  ), // #283 {r16, m32|mem}
+  ROW(2, 1, 0, 0, 6  , 30 , 0  , 0  , 0  , 0  ), //      {r32, m64|mem}
+  ROW(1, 1, 0, 0, 114, 0  , 0  , 0  , 0  , 0  ), // #285 {rel16|r16|m16|r32|m32}
+  ROW(1, 1, 1, 0, 115, 0  , 0  , 0  , 0  , 0  ), //      {rel32|r64|m64|mem}
+  ROW(2, 1, 1, 0, 6  , 116, 0  , 0  , 0  , 0  ), // #287 {r32, r8lo|r8hi|m8|r16|m16|r32|m32}
+  ROW(2, 0, 1, 0, 8  , 117, 0  , 0  , 0  , 0  ), //      {r64, r8lo|r8hi|m8|r64|m64}
+  ROW(1, 1, 0, 0, 118, 0  , 0  , 0  , 0  , 0  ), // #289 {r16|r32}
+  ROW(1, 1, 1, 0, 31 , 0  , 0  , 0  , 0  , 0  ), // #290 {r8lo|r8hi|m8|r16|m16|r32|m32|r64|m64|mem}
+  ROW(2, 1, 0, 0, 119, 58 , 0  , 0  , 0  , 0  ), // #291 {es:[mem|m512|memBase], m512|mem}
+  ROW(2, 0, 1, 0, 119, 58 , 0  , 0  , 0  , 0  ), //      {es:[mem|m512|memBase], m512|mem}
+  ROW(3, 1, 1, 0, 50 , 10 , 10 , 0  , 0  , 0  ), // #293 {xmm, i8|u8, i8|u8}
+  ROW(2, 1, 1, 0, 50 , 50 , 0  , 0  , 0  , 0  ), // #294 {xmm, xmm}
+  ROW(0, 1, 1, 0, 0  , 0  , 0  , 0  , 0  , 0  ), // #295 {}
+  ROW(1, 1, 1, 0, 100, 0  , 0  , 0  , 0  , 0  ), // #296 {st}
+  ROW(0, 1, 1, 0, 0  , 0  , 0  , 0  , 0  , 0  ), // #297 {}
+  ROW(1, 1, 1, 0, 120, 0  , 0  , 0  , 0  , 0  ), // #298 {m32|m64|st}
+  ROW(2, 1, 1, 0, 50 , 50 , 0  , 0  , 0  , 0  ), // #299 {xmm, xmm}
+  ROW(4, 1, 1, 0, 50 , 50 , 10 , 10 , 0  , 0  ), //      {xmm, xmm, i8|u8, i8|u8}
+  ROW(2, 1, 0, 0, 6  , 52 , 0  , 0  , 0  , 0  ), // #301 {r32, m128|mem}
+  ROW(2, 0, 1, 0, 8  , 52 , 0  , 0  , 0  , 0  ), //      {r64, m128|mem}
+  ROW(2, 1, 0, 2, 36 , 121, 0  , 0  , 0  , 0  ), // #303 {<eax>, <ecx>}
+  ROW(2, 0, 1, 2, 122, 121, 0  , 0  , 0  , 0  ), //      {<eax|rax>, <ecx>}
+  ROW(1, 1, 1, 0, 123, 0  , 0  , 0  , 0  , 0  ), // #305 {rel8|rel32}
+  ROW(1, 1, 0, 0, 124, 0  , 0  , 0  , 0  , 0  ), //      {rel16}
+  ROW(2, 1, 0, 1, 125, 126, 0  , 0  , 0  , 0  ), // #307 {<cx|ecx>, rel8}
+  ROW(2, 0, 1, 1, 127, 126, 0  , 0  , 0  , 0  ), //      {<ecx|rcx>, rel8}
+  ROW(1, 1, 1, 0, 128, 0  , 0  , 0  , 0  , 0  ), // #309 {rel8|rel32|r64|m64|mem}
+  ROW(1, 1, 0, 0, 129, 0  , 0  , 0  , 0  , 0  ), //      {rel16|r32|m32|mem}
+  ROW(2, 1, 1, 0, 106, 130, 0  , 0  , 0  , 0  ), // #311 {k, k|m8|mem|r32}
+  ROW(2, 1, 1, 0, 131, 106, 0  , 0  , 0  , 0  ), //      {m8|mem|r32, k}
+  ROW(2, 1, 1, 0, 106, 132, 0  , 0  , 0  , 0  ), // #313 {k, k|m32|mem|r32}
+  ROW(2, 1, 1, 0, 28 , 106, 0  , 0  , 0  , 0  ), //      {m32|mem|r32, k}
+  ROW(2, 1, 1, 0, 106, 133, 0  , 0  , 0  , 0  ), // #315 {k, k|m64|mem|r64}
+  ROW(2, 1, 1, 0, 15 , 106, 0  , 0  , 0  , 0  ), //      {m64|mem|r64, k}
+  ROW(2, 1, 1, 0, 106, 134, 0  , 0  , 0  , 0  ), // #317 {k, k|m16|mem|r32}
+  ROW(2, 1, 1, 0, 135, 106, 0  , 0  , 0  , 0  ), //      {m16|mem|r32, k}
+  ROW(2, 1, 1, 0, 4  , 27 , 0  , 0  , 0  , 0  ), // #319 {r16, r16|m16|mem}
+  ROW(2, 1, 1, 0, 6  , 135, 0  , 0  , 0  , 0  ), //      {r32, r32|m16|mem}
+  ROW(2, 1, 0, 0, 136, 137, 0  , 0  , 0  , 0  ), // #321 {i16, i16|i32}
+  ROW(1, 1, 1, 0, 138, 0  , 0  , 0  , 0  , 0  ), //      {m32|m48|m80|mem}
+  ROW(2, 1, 0, 0, 4  , 29 , 0  , 0  , 0  , 0  ), // #323 {r16, m32|mem}
+  ROW(2, 1, 0, 0, 6  , 101, 0  , 0  , 0  , 0  ), //      {r32, m48|mem}
+  ROW(2, 1, 1, 0, 4  , 27 , 0  , 0  , 0  , 0  ), // #325 {r16, r16|m16|mem}
+  ROW(2, 1, 1, 0, 139, 135, 0  , 0  , 0  , 0  ), //      {r32|r64, r32|m16|mem}
+  ROW(2, 1, 1, 0, 64 , 28 , 0  , 0  , 0  , 0  ), // #327 {mm|xmm, r32|m32|mem}
+  ROW(2, 1, 1, 0, 28 , 64 , 0  , 0  , 0  , 0  ), //      {r32|m32|mem, mm|xmm}
+  ROW(2, 1, 1, 0, 50 , 107, 0  , 0  , 0  , 0  ), // #329 {xmm, xmm|m32|mem}
+  ROW(2, 1, 1, 0, 29 , 50 , 0  , 0  , 0  , 0  ), //      {m32|mem, xmm}
+  ROW(2, 1, 1, 0, 4  , 9  , 0  , 0  , 0  , 0  ), // #331 {r16, r8lo|r8hi|m8}
+  ROW(2, 1, 1, 0, 139, 140, 0  , 0  , 0  , 0  ), //      {r32|r64, r8lo|r8hi|m8|r16|m16}
+  ROW(2, 0, 1, 0, 4  , 27 , 0  , 0  , 0  , 0  ), // #333 {r16, r16|m16|mem}
+  ROW(2, 0, 1, 0, 139, 28 , 0  , 0  , 0  , 0  ), //      {r32|r64, r32|m32|mem}
+  ROW(4, 1, 1, 1, 6  , 6  , 28 , 35 , 0  , 0  ), // #335 {r32, r32, r32|m32|mem, <edx>}
+  ROW(4, 0, 1, 1, 8  , 8  , 15 , 37 , 0  , 0  ), //      {r64, r64, r64|m64|mem, <rdx>}
+  ROW(2, 1, 1, 0, 62 , 141, 0  , 0  , 0  , 0  ), // #337 {mm, mm|m64|mem}
+  ROW(2, 1, 1, 0, 50 , 51 , 0  , 0  , 0  , 0  ), //      {xmm, xmm|m128|mem}
+  ROW(3, 1, 1, 0, 62 , 141, 10 , 0  , 0  , 0  ), // #339 {mm, mm|m64|mem, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 51 , 10 , 0  , 0  , 0  ), //      {xmm, xmm|m128|mem, i8|u8}
+  ROW(3, 1, 1, 0, 6  , 64 , 10 , 0  , 0  , 0  ), // #341 {r32, mm|xmm, i8|u8}
+  ROW(3, 1, 1, 0, 21 , 50 , 10 , 0  , 0  , 0  ), //      {m16|mem, xmm, i8|u8}
+  ROW(2, 1, 1, 0, 62 , 142, 0  , 0  , 0  , 0  ), // #343 {mm, i8|u8|mm|m64|mem}
+  ROW(2, 1, 1, 0, 50 , 59 , 0  , 0  , 0  , 0  ), //      {xmm, i8|u8|xmm|m128|mem}
+  ROW(2, 1, 1, 0, 62 , 143, 0  , 0  , 0  , 0  ), // #345 {mm, mm|m32|mem}
+  ROW(2, 1, 1, 0, 50 , 51 , 0  , 0  , 0  , 0  ), //      {xmm, xmm|m128|mem}
+  ROW(1, 1, 0, 0, 6  , 0  , 0  , 0  , 0  , 0  ), // #347 {r32}
+  ROW(1, 0, 1, 0, 8  , 0  , 0  , 0  , 0  , 0  ), // #348 {r64}
+  ROW(0, 1, 1, 0, 0  , 0  , 0  , 0  , 0  , 0  ), // #349 {}
+  ROW(1, 1, 1, 0, 144, 0  , 0  , 0  , 0  , 0  ), //      {u16}
+  ROW(3, 1, 1, 0, 6  , 28 , 10 , 0  , 0  , 0  ), // #351 {r32, r32|m32|mem, i8|u8}
+  ROW(3, 0, 1, 0, 8  , 15 , 10 , 0  , 0  , 0  ), //      {r64, r64|m64|mem, i8|u8}
+  ROW(4, 1, 1, 0, 50 , 50 , 51 , 50 , 0  , 0  ), // #353 {xmm, xmm, xmm|m128|mem, xmm}
+  ROW(4, 1, 1, 0, 53 , 53 , 54 , 53 , 0  , 0  ), //      {ymm, ymm, ymm|m256|mem, ymm}
+  ROW(2, 1, 1, 0, 50 , 145, 0  , 0  , 0  , 0  ), // #355 {xmm, xmm|m128|ymm|m256}
+  ROW(2, 1, 1, 0, 53 , 57 , 0  , 0  , 0  , 0  ), //      {ymm, zmm|m512|mem}
+  ROW(4, 1, 1, 0, 50 , 50 , 50 , 65 , 0  , 0  ), // #357 {xmm, xmm, xmm, xmm|m64|mem}
+  ROW(4, 1, 1, 0, 50 , 50 , 30 , 50 , 0  , 0  ), //      {xmm, xmm, m64|mem, xmm}
+  ROW(4, 1, 1, 0, 50 , 50 , 50 , 107, 0  , 0  ), // #359 {xmm, xmm, xmm, xmm|m32|mem}
+  ROW(4, 1, 1, 0, 50 , 50 , 29 , 50 , 0  , 0  ), //      {xmm, xmm, m32|mem, xmm}
+  ROW(4, 1, 1, 0, 53 , 53 , 51 , 10 , 0  , 0  ), // #361 {ymm, ymm, xmm|m128|mem, i8|u8}
+  ROW(4, 1, 1, 0, 56 , 56 , 51 , 10 , 0  , 0  ), //      {zmm, zmm, xmm|m128|mem, i8|u8}
+  ROW(1, 1, 0, 1, 36 , 0  , 0  , 0  , 0  , 0  ), // #363 {<eax>}
+  ROW(1, 0, 1, 1, 38 , 0  , 0  , 0  , 0  , 0  ), // #364 {<rax>}
+  ROW(2, 1, 1, 0, 28 , 50 , 0  , 0  , 0  , 0  ), // #365 {r32|m32|mem, xmm}
+  ROW(2, 1, 1, 0, 50 , 28 , 0  , 0  , 0  , 0  ), //      {xmm, r32|m32|mem}
+  ROW(2, 1, 1, 0, 30 , 50 , 0  , 0  , 0  , 0  ), // #367 {m64|mem, xmm}
+  ROW(3, 1, 1, 0, 50 , 50 , 30 , 0  , 0  , 0  ), //      {xmm, xmm, m64|mem}
+  ROW(2, 1, 1, 0, 135, 50 , 0  , 0  , 0  , 0  ), // #369 {r32|m16|mem, xmm}
+  ROW(2, 1, 1, 0, 50 , 135, 0  , 0  , 0  , 0  ), //      {xmm, r32|m16|mem}
+  ROW(2, 1, 0, 0, 28 , 6  , 0  , 0  , 0  , 0  ), // #371 {r32|m32|mem, r32}
+  ROW(2, 0, 1, 0, 15 , 8  , 0  , 0  , 0  , 0  ), //      {r64|m64|mem, r64}
+  ROW(2, 1, 0, 0, 6  , 28 , 0  , 0  , 0  , 0  ), // #373 {r32, r32|m32|mem}
+  ROW(2, 0, 1, 0, 8  , 15 , 0  , 0  , 0  , 0  ), //      {r64, r64|m64|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 59 , 0  , 0  , 0  ), // #375 {xmm, xmm, xmm|m128|mem|i8|u8}
+  ROW(3, 1, 1, 0, 50 , 52 , 146, 0  , 0  , 0  ), //      {xmm, m128|mem, i8|u8|xmm}
+  ROW(2, 1, 1, 0, 67 , 96 , 0  , 0  , 0  , 0  ), // #377 {vm32x, xmm|ymm}
+  ROW(2, 1, 1, 0, 68 , 56 , 0  , 0  , 0  , 0  ), //      {vm32y, zmm}
+  ROW(2, 1, 1, 0, 108, 50 , 0  , 0  , 0  , 0  ), // #379 {vm64x|vm64y, xmm}
+  ROW(2, 1, 1, 0, 72 , 53 , 0  , 0  , 0  , 0  ), //      {vm64z, ymm}
+  ROW(3, 1, 1, 0, 50 , 50 , 51 , 0  , 0  , 0  ), // #381 {xmm, xmm, xmm|m128|mem}
+  ROW(3, 1, 1, 0, 50 , 52 , 50 , 0  , 0  , 0  ), //      {xmm, m128|mem, xmm}
+  ROW(1, 1, 0, 1, 33 , 0  , 0  , 0  , 0  , 0  ), // #383 {<ax>}
+  ROW(2, 1, 0, 1, 33 , 10 , 0  , 0  , 0  , 0  ), // #384 {<ax>, i8|u8}
+  ROW(2, 1, 0, 0, 27 , 4  , 0  , 0  , 0  , 0  ), // #385 {r16|m16|mem, r16}
+  ROW(3, 1, 1, 1, 50 , 51 , 147, 0  , 0  , 0  ), // #386 {xmm, xmm|m128|mem, <xmm0>}
+  ROW(2, 1, 1, 0, 111, 148, 0  , 0  , 0  , 0  ), // #387 {bnd, mib}
+  ROW(2, 1, 1, 0, 111, 113, 0  , 0  , 0  , 0  ), // #388 {bnd, mem}
+  ROW(2, 1, 1, 0, 148, 111, 0  , 0  , 0  , 0  ), // #389 {mib, bnd}
+  ROW(1, 1, 1, 0, 149, 0  , 0  , 0  , 0  , 0  ), // #390 {r16|r32|r64}
+  ROW(1, 1, 1, 1, 33 , 0  , 0  , 0  , 0  , 0  ), // #391 {<ax>}
+  ROW(2, 1, 1, 2, 35 , 36 , 0  , 0  , 0  , 0  ), // #392 {<edx>, <eax>}
+  ROW(1, 1, 1, 0, 150, 0  , 0  , 0  , 0  , 0  ), // #393 {mem|m8|m16|m32|m48|m64|m80|m128|m256|m512|m1024}
+  ROW(1, 1, 1, 0, 30 , 0  , 0  , 0  , 0  , 0  ), // #394 {m64|mem}
+  ROW(0, 0, 1, 0, 0  , 0  , 0  , 0  , 0  , 0  ), // #395 {}
+  ROW(1, 1, 1, 1, 151, 0  , 0  , 0  , 0  , 0  ), // #396 {<ds:[mem|m512|memBase|zax]>}
+  ROW(3, 1, 1, 0, 50 , 65 , 10 , 0  , 0  , 0  ), // #397 {xmm, xmm|m64|mem, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 107, 10 , 0  , 0  , 0  ), // #398 {xmm, xmm|m32|mem, i8|u8}
+  ROW(5, 0, 1, 4, 52 , 37 , 38 , 152, 153, 0  ), // #399 {m128|mem, <rdx>, <rax>, <rcx>, <rbx>}
+  ROW(5, 1, 1, 4, 30 , 35 , 36 , 121, 154, 0  ), // #400 {m64|mem, <edx>, <eax>, <ecx>, <ebx>}
+  ROW(4, 1, 1, 4, 36 , 154, 121, 35 , 0  , 0  ), // #401 {<eax>, <ebx>, <ecx>, <edx>}
+  ROW(2, 0, 1, 2, 37 , 38 , 0  , 0  , 0  , 0  ), // #402 {<rdx>, <rax>}
+  ROW(2, 1, 1, 0, 62 , 51 , 0  , 0  , 0  , 0  ), // #403 {mm, xmm|m128|mem}
+  ROW(2, 1, 1, 0, 50 , 141, 0  , 0  , 0  , 0  ), // #404 {xmm, mm|m64|mem}
+  ROW(2, 1, 1, 0, 62 , 65 , 0  , 0  , 0  , 0  ), // #405 {mm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 139, 65 , 0  , 0  , 0  , 0  ), // #406 {r32|r64, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 50 , 155, 0  , 0  , 0  , 0  ), // #407 {xmm, r32|m32|mem|r64|m64}
+  ROW(2, 1, 1, 0, 139, 107, 0  , 0  , 0  , 0  ), // #408 {r32|r64, xmm|m32|mem}
+  ROW(2, 1, 1, 2, 34 , 33 , 0  , 0  , 0  , 0  ), // #409 {<dx>, <ax>}
+  ROW(1, 1, 1, 1, 36 , 0  , 0  , 0  , 0  , 0  ), // #410 {<eax>}
+  ROW(2, 1, 1, 0, 12 , 10 , 0  , 0  , 0  , 0  ), // #411 {i16|u16, i8|u8}
+  ROW(3, 1, 1, 0, 28 , 50 , 10 , 0  , 0  , 0  ), // #412 {r32|m32|mem, xmm, i8|u8}
+  ROW(1, 1, 1, 0, 102, 0  , 0  , 0  , 0  , 0  ), // #413 {m80|mem}
+  ROW(1, 1, 1, 0, 156, 0  , 0  , 0  , 0  , 0  ), // #414 {m16|m32}
+  ROW(1, 1, 1, 0, 157, 0  , 0  , 0  , 0  , 0  ), // #415 {m16|m32|m64}
+  ROW(1, 1, 1, 0, 158, 0  , 0  , 0  , 0  , 0  ), // #416 {m32|m64|m80|st}
+  ROW(1, 1, 1, 0, 21 , 0  , 0  , 0  , 0  , 0  ), // #417 {m16|mem}
+  ROW(1, 1, 1, 0, 113, 0  , 0  , 0  , 0  , 0  ), // #418 {mem}
+  ROW(1, 1, 1, 0, 159, 0  , 0  , 0  , 0  , 0  ), // #419 {ax|m16|mem}
+  ROW(1, 0, 1, 0, 113, 0  , 0  , 0  , 0  , 0  ), // #420 {mem}
+  ROW(2, 1, 1, 1, 10 , 36 , 0  , 0  , 0  , 0  ), // #421 {i8|u8, <eax>}
+  ROW(2, 1, 1, 0, 160, 161, 0  , 0  , 0  , 0  ), // #422 {al|ax|eax, i8|u8|dx}
+  ROW(1, 1, 1, 0, 6  , 0  , 0  , 0  , 0  , 0  ), // #423 {r32}
+  ROW(2, 1, 1, 0, 162, 163, 0  , 0  , 0  , 0  ), // #424 {es:[m8|memBase|zdi|m16|m32], dx}
+  ROW(1, 1, 1, 0, 10 , 0  , 0  , 0  , 0  , 0  ), // #425 {i8|u8}
+  ROW(0, 1, 0, 0, 0  , 0  , 0  , 0  , 0  , 0  ), // #426 {}
+  ROW(3, 1, 1, 0, 106, 106, 106, 0  , 0  , 0  ), // #427 {k, k, k}
+  ROW(2, 1, 1, 0, 106, 106, 0  , 0  , 0  , 0  ), // #428 {k, k}
+  ROW(3, 1, 1, 0, 106, 106, 10 , 0  , 0  , 0  ), // #429 {k, k, i8|u8}
+  ROW(1, 1, 1, 1, 164, 0  , 0  , 0  , 0  , 0  ), // #430 {<ah>}
+  ROW(1, 1, 1, 0, 29 , 0  , 0  , 0  , 0  , 0  ), // #431 {m32|mem}
+  ROW(1, 0, 1, 0, 58 , 0  , 0  , 0  , 0  , 0  ), // #432 {m512|mem}
+  ROW(2, 1, 1, 0, 149, 150, 0  , 0  , 0  , 0  ), // #433 {r16|r32|r64, mem|m8|m16|m32|m48|m64|m80|m128|m256|m512|m1024}
+  ROW(1, 1, 1, 0, 27 , 0  , 0  , 0  , 0  , 0  ), // #434 {r16|m16|mem}
+  ROW(1, 1, 1, 0, 139, 0  , 0  , 0  , 0  , 0  ), // #435 {r32|r64}
+  ROW(3, 1, 1, 0, 139, 28 , 14 , 0  , 0  , 0  ), // #436 {r32|r64, r32|m32|mem, i32|u32}
+  ROW(3, 1, 1, 1, 50 , 50 , 165, 0  , 0  , 0  ), // #437 {xmm, xmm, <ds:[mem|m128|memBase|zdi]>}
+  ROW(3, 1, 1, 1, 62 , 62 , 166, 0  , 0  , 0  ), // #438 {mm, mm, <ds:[mem|m64|memBase|zdi]>}
+  ROW(3, 1, 1, 3, 167, 121, 35 , 0  , 0  , 0  ), // #439 {<ds:[mem|memBase|zax]>, <ecx>, <edx>}
+  ROW(2, 1, 1, 0, 119, 58 , 0  , 0  , 0  , 0  ), // #440 {es:[mem|m512|memBase], m512|mem}
+  ROW(2, 1, 1, 0, 62 , 50 , 0  , 0  , 0  , 0  ), // #441 {mm, xmm}
+  ROW(2, 1, 1, 0, 6  , 50 , 0  , 0  , 0  , 0  ), // #442 {r32, xmm}
+  ROW(2, 1, 1, 0, 30 , 62 , 0  , 0  , 0  , 0  ), // #443 {m64|mem, mm}
+  ROW(2, 1, 1, 0, 50 , 62 , 0  , 0  , 0  , 0  ), // #444 {xmm, mm}
+  ROW(2, 1, 1, 2, 36 , 121, 0  , 0  , 0  , 0  ), // #445 {<eax>, <ecx>}
+  ROW(3, 1, 1, 3, 36 , 121, 154, 0  , 0  , 0  ), // #446 {<eax>, <ecx>, <ebx>}
+  ROW(2, 1, 1, 0, 168, 160, 0  , 0  , 0  , 0  ), // #447 {u8|dx, al|ax|eax}
+  ROW(2, 1, 1, 0, 163, 169, 0  , 0  , 0  , 0  ), // #448 {dx, ds:[m8|memBase|zsi|m16|m32]}
+  ROW(6, 1, 1, 3, 50 , 51 , 10 , 121, 36 , 35 ), // #449 {xmm, xmm|m128|mem, i8|u8, <ecx>, <eax>, <edx>}
+  ROW(6, 1, 1, 3, 50 , 51 , 10 , 147, 36 , 35 ), // #450 {xmm, xmm|m128|mem, i8|u8, <xmm0>, <eax>, <edx>}
+  ROW(4, 1, 1, 1, 50 , 51 , 10 , 121, 0  , 0  ), // #451 {xmm, xmm|m128|mem, i8|u8, <ecx>}
+  ROW(4, 1, 1, 1, 50 , 51 , 10 , 147, 0  , 0  ), // #452 {xmm, xmm|m128|mem, i8|u8, <xmm0>}
+  ROW(3, 1, 1, 0, 131, 50 , 10 , 0  , 0  , 0  ), // #453 {r32|m8|mem, xmm, i8|u8}
+  ROW(3, 0, 1, 0, 15 , 50 , 10 , 0  , 0  , 0  ), // #454 {r64|m64|mem, xmm, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 131, 10 , 0  , 0  , 0  ), // #455 {xmm, r32|m8|mem, i8|u8}
+  ROW(3, 1, 1, 0, 50 , 28 , 10 , 0  , 0  , 0  ), // #456 {xmm, r32|m32|mem, i8|u8}
+  ROW(3, 0, 1, 0, 50 , 15 , 10 , 0  , 0  , 0  ), // #457 {xmm, r64|m64|mem, i8|u8}
+  ROW(3, 1, 1, 0, 64 , 135, 10 , 0  , 0  , 0  ), // #458 {mm|xmm, r32|m16|mem, i8|u8}
+  ROW(2, 1, 1, 0, 6  , 64 , 0  , 0  , 0  , 0  ), // #459 {r32, mm|xmm}
+  ROW(2, 1, 1, 0, 50 , 10 , 0  , 0  , 0  , 0  ), // #460 {xmm, i8|u8}
+  ROW(1, 1, 1, 0, 155, 0  , 0  , 0  , 0  , 0  ), // #461 {r32|m32|mem|r64|m64}
+  ROW(2, 1, 1, 0, 31 , 103, 0  , 0  , 0  , 0  ), // #462 {r8lo|r8hi|m8|r16|m16|r32|m32|r64|m64|mem, cl|i8|u8}
+  ROW(1, 0, 1, 0, 139, 0  , 0  , 0  , 0  , 0  ), // #463 {r32|r64}
+  ROW(3, 1, 1, 3, 35 , 36 , 121, 0  , 0  , 0  ), // #464 {<edx>, <eax>, <ecx>}
+  ROW(1, 1, 1, 0, 1  , 0  , 0  , 0  , 0  , 0  ), // #465 {r8lo|r8hi|m8|mem}
+  ROW(1, 1, 1, 0, 170, 0  , 0  , 0  , 0  , 0  ), // #466 {r16|m16|mem|r32|r64}
+  ROW(3, 0, 1, 0, 171, 171, 171, 0  , 0  , 0  ), // #467 {tmm, tmm, tmm}
+  ROW(2, 0, 1, 0, 171, 172, 0  , 0  , 0  , 0  ), // #468 {tmm, tmem}
+  ROW(2, 0, 1, 0, 172, 171, 0  , 0  , 0  , 0  ), // #469 {tmem, tmm}
+  ROW(1, 0, 1, 0, 171, 0  , 0  , 0  , 0  , 0  ), // #470 {tmm}
+  ROW(3, 1, 1, 2, 6  , 35 , 36 , 0  , 0  , 0  ), // #471 {r32, <edx>, <eax>}
+  ROW(1, 1, 1, 0, 173, 0  , 0  , 0  , 0  , 0  ), // #472 {ds:[mem|memBase]}
+  ROW(6, 1, 1, 0, 56 , 56 , 56 , 56 , 56 , 52 ), // #473 {zmm, zmm, zmm, zmm, zmm, m128|mem}
+  ROW(6, 1, 1, 0, 50 , 50 , 50 , 50 , 50 , 52 ), // #474 {xmm, xmm, xmm, xmm, xmm, m128|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 65 , 0  , 0  , 0  ), // #475 {xmm, xmm, xmm|m64|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 110, 0  , 0  , 0  ), // #476 {xmm, xmm, xmm|m16|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 107, 0  , 0  , 0  ), // #477 {xmm, xmm, xmm|m32|mem}
+  ROW(2, 1, 1, 0, 53 , 52 , 0  , 0  , 0  , 0  ), // #478 {ymm, m128|mem}
+  ROW(2, 1, 1, 0, 174, 65 , 0  , 0  , 0  , 0  ), // #479 {ymm|zmm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 174, 52 , 0  , 0  , 0  , 0  ), // #480 {ymm|zmm, m128|mem}
+  ROW(2, 1, 1, 0, 56 , 55 , 0  , 0  , 0  , 0  ), // #481 {zmm, m256|mem}
+  ROW(2, 1, 1, 0, 175, 65 , 0  , 0  , 0  , 0  ), // #482 {xmm|ymm|zmm, xmm|m64|mem}
+  ROW(2, 1, 1, 0, 175, 107, 0  , 0  , 0  , 0  ), // #483 {xmm|ymm|zmm, m32|mem|xmm}
+  ROW(4, 1, 1, 0, 104, 50 , 65 , 10 , 0  , 0  ), // #484 {xmm|k, xmm, xmm|m64|mem, i8|u8}
+  ROW(4, 1, 1, 0, 106, 50 , 110, 10 , 0  , 0  ), // #485 {k, xmm, xmm|m16|mem, i8|u8}
+  ROW(4, 1, 1, 0, 104, 50 , 107, 10 , 0  , 0  ), // #486 {xmm|k, xmm, xmm|m32|mem, i8|u8}
+  ROW(2, 1, 1, 0, 50 , 176, 0  , 0  , 0  , 0  ), // #487 {xmm, xmm|m128|ymm|m256|zmm|m512}
+  ROW(2, 1, 1, 0, 139, 110, 0  , 0  , 0  , 0  ), // #488 {r32|r64, xmm|m16|mem}
+  ROW(3, 1, 1, 0, 50 , 50 , 155, 0  , 0  , 0  ), // #489 {xmm, xmm, r32|m32|mem|r64|m64}
+  ROW(3, 1, 1, 0, 51 , 174, 10 , 0  , 0  , 0  ), // #490 {xmm|m128|mem, ymm|zmm, i8|u8}
+  ROW(4, 1, 1, 0, 50 , 50 , 65 , 10 , 0  , 0  ), // #491 {xmm, xmm, xmm|m64|mem, i8|u8}
+  ROW(4, 1, 1, 0, 50 , 50 , 107, 10 , 0  , 0  ), // #492 {xmm, xmm, xmm|m32|mem, i8|u8}
+  ROW(3, 1, 1, 0, 106, 176, 10 , 0  , 0  , 0  ), // #493 {k, xmm|m128|ymm|m256|zmm|m512, i8|u8}
+  ROW(3, 1, 1, 0, 106, 65 , 10 , 0  , 0  , 0  ), // #494 {k, xmm|m64|mem, i8|u8}
+  ROW(3, 1, 1, 0, 106, 110, 10 , 0  , 0  , 0  ), // #495 {k, xmm|m16|mem, i8|u8}
+  ROW(3, 1, 1, 0, 106, 107, 10 , 0  , 0  , 0  ), // #496 {k, xmm|m32|mem, i8|u8}
+  ROW(1, 1, 1, 0, 68 , 0  , 0  , 0  , 0  , 0  ), // #497 {vm32y}
+  ROW(1, 1, 1, 0, 69 , 0  , 0  , 0  , 0  , 0  ), // #498 {vm32z}
+  ROW(1, 1, 1, 0, 72 , 0  , 0  , 0  , 0  , 0  ), // #499 {vm64z}
+  ROW(4, 1, 1, 0, 50 , 50 , 110, 10 , 0  , 0  ), // #500 {xmm, xmm, xmm|m16|mem, i8|u8}
+  ROW(4, 1, 1, 0, 56 , 56 , 54 , 10 , 0  , 0  ), // #501 {zmm, zmm, ymm|m256|mem, i8|u8}
+  ROW(2, 1, 1, 0, 6  , 96 , 0  , 0  , 0  , 0  ), // #502 {r32, xmm|ymm}
+  ROW(2, 1, 1, 0, 175, 177, 0  , 0  , 0  , 0  ), // #503 {xmm|ymm|zmm, xmm|m8|mem|r32}
+  ROW(2, 1, 1, 0, 175, 178, 0  , 0  , 0  , 0  ), // #504 {xmm|ymm|zmm, xmm|m32|mem|r32}
+  ROW(2, 1, 1, 0, 175, 106, 0  , 0  , 0  , 0  ), // #505 {xmm|ymm|zmm, k}
+  ROW(2, 1, 1, 0, 175, 179, 0  , 0  , 0  , 0  ), // #506 {xmm|ymm|zmm, xmm|m16|mem|r32}
+  ROW(3, 1, 1, 0, 135, 50 , 10 , 0  , 0  , 0  ), // #507 {r32|m16|mem, xmm, i8|u8}
+  ROW(4, 1, 1, 0, 50 , 50 , 131, 10 , 0  , 0  ), // #508 {xmm, xmm, r32|m8|mem, i8|u8}
+  ROW(4, 1, 1, 0, 50 , 50 , 28 , 10 , 0  , 0  ), // #509 {xmm, xmm, r32|m32|mem, i8|u8}
+  ROW(4, 0, 1, 0, 50 , 50 , 15 , 10 , 0  , 0  ), // #510 {xmm, xmm, r64|m64|mem, i8|u8}
+  ROW(4, 1, 1, 0, 50 , 50 , 135, 10 , 0  , 0  ), // #511 {xmm, xmm, r32|m16|mem, i8|u8}
+  ROW(2, 1, 1, 0, 106, 175, 0  , 0  , 0  , 0  ), // #512 {k, xmm|ymm|zmm}
+  ROW(1, 1, 1, 0, 124, 0  , 0  , 0  , 0  , 0  ), // #513 {rel16|rel32}
+  ROW(3, 1, 1, 2, 113, 35 , 36 , 0  , 0  , 0  ), // #514 {mem, <edx>, <eax>}
+  ROW(3, 0, 1, 2, 113, 35 , 36 , 0  , 0  , 0  )  // #515 {mem, <edx>, <eax>}
+};
+#undef ROW
+
+#define ROW(opFlags, regId) { opFlags, uint8_t(regId) }
+#define F(VAL) uint64_t(InstDB::OpFlags::k##VAL)
+const InstDB::OpSignature InstDB::_opSignatureTable[] = {
+  ROW(0, 0xFF),
+  ROW(F(RegGpbLo) | F(RegGpbHi) | F(MemUnspecified) | F(Mem8), 0x00),
+  ROW(F(RegGpbLo) | F(RegGpbHi), 0x00),
+  ROW(F(RegGpw) | F(RegSReg) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegGpw), 0x00),
+  ROW(F(RegGpd) | F(RegSReg) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(RegGpd), 0x00),
+  ROW(F(RegGpq) | F(RegSReg) | F(RegCReg) | F(RegDReg) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegGpq), 0x00),
+  ROW(F(RegGpbLo) | F(RegGpbHi) | F(Mem8), 0x00),
+  ROW(F(ImmI8) | F(ImmU8), 0x00),
+  ROW(F(RegGpw) | F(Mem16), 0x00),
+  ROW(F(ImmI16) | F(ImmU16), 0x00),
+  ROW(F(RegGpd) | F(Mem32), 0x00),
+  ROW(F(ImmI32) | F(ImmU32), 0x00),
+  ROW(F(RegGpq) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(ImmI32), 0x00),
+  ROW(F(RegSReg) | F(RegCReg) | F(RegDReg) | F(MemUnspecified) | F(Mem64) | F(ImmI64) | F(ImmU64), 0x00),
+  ROW(F(MemUnspecified) | F(Mem8), 0x00),
+  ROW(F(RegSReg) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegSReg) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegSReg), 0x00),
+  ROW(F(RegCReg) | F(RegDReg), 0x00),
+  ROW(F(RegGpq) | F(ImmI32), 0x00),
+  ROW(F(RegGpw) | F(RegGpd) | F(RegGpq) | F(MemUnspecified) | F(Mem16) | F(Mem32) | F(Mem64), 0x00),
+  ROW(F(ImmI8), 0x00),
+  ROW(F(RegGpw) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegGpd) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegGpbLo) | F(RegGpbHi) | F(RegGpw) | F(RegGpd) | F(RegGpq) | F(MemUnspecified) | F(Mem8) | F(Mem16) | F(Mem32) | F(Mem64), 0x00),
+  ROW(F(RegGpq) | F(MemUnspecified) | F(Mem64) | F(ImmI32) | F(ImmU32), 0x00),
+  ROW(F(RegGpw) | F(FlagImplicit), 0x01),
+  ROW(F(RegGpw) | F(FlagImplicit), 0x04),
+  ROW(F(RegGpd) | F(FlagImplicit), 0x04),
+  ROW(F(RegGpd) | F(FlagImplicit), 0x01),
+  ROW(F(RegGpq) | F(FlagImplicit), 0x04),
+  ROW(F(RegGpq) | F(FlagImplicit), 0x01),
+  ROW(F(RegGpw) | F(MemUnspecified) | F(Mem16) | F(ImmI8) | F(ImmI16), 0x00),
+  ROW(F(RegGpd) | F(MemUnspecified) | F(Mem32) | F(ImmI8) | F(ImmI32), 0x00),
+  ROW(F(RegGpq) | F(MemUnspecified) | F(Mem64) | F(ImmI8) | F(ImmI32), 0x00),
+  ROW(F(ImmI8) | F(ImmI16) | F(ImmU16), 0x00),
+  ROW(F(ImmI8) | F(ImmI32) | F(ImmU32), 0x00),
+  ROW(F(ImmI8) | F(ImmI32), 0x00),
+  ROW(F(ImmI64) | F(ImmU64), 0x00),
+  ROW(F(RegGpbLo), 0x01),
+  ROW(F(RegGpw), 0x01),
+  ROW(F(RegGpd), 0x01),
+  ROW(F(RegGpq), 0x01),
+  ROW(F(RegXmm), 0x00),
+  ROW(F(RegXmm) | F(MemUnspecified) | F(Mem128), 0x00),
+  ROW(F(MemUnspecified) | F(Mem128), 0x00),
+  ROW(F(RegYmm), 0x00),
+  ROW(F(RegYmm) | F(MemUnspecified) | F(Mem256), 0x00),
+  ROW(F(MemUnspecified) | F(Mem256), 0x00),
+  ROW(F(RegZmm), 0x00),
+  ROW(F(RegZmm) | F(MemUnspecified) | F(Mem512), 0x00),
+  ROW(F(MemUnspecified) | F(Mem512), 0x00),
+  ROW(F(RegXmm) | F(MemUnspecified) | F(Mem128) | F(ImmI8) | F(ImmU8), 0x00),
+  ROW(F(RegYmm) | F(MemUnspecified) | F(Mem256) | F(ImmI8) | F(ImmU8), 0x00),
+  ROW(F(RegZmm) | F(MemUnspecified) | F(Mem512) | F(ImmI8) | F(ImmU8), 0x00),
+  ROW(F(RegMm), 0x00),
+  ROW(F(RegGpq) | F(RegMm) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegXmm) | F(RegMm), 0x00),
+  ROW(F(RegXmm) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegGpw) | F(RegGpd) | F(RegGpq) | F(Mem16) | F(Mem32) | F(Mem64), 0x00),
+  ROW(F(Vm32x), 0x00),
+  ROW(F(Vm32y), 0x00),
+  ROW(F(Vm32z), 0x00),
+  ROW(F(Vm64x), 0x00),
+  ROW(F(Vm64y), 0x00),
+  ROW(F(Vm64z), 0x00),
+  ROW(F(Mem8) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(Mem8) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(Mem16) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(Mem16) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(Mem32) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(Mem32) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(Mem64) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(Mem64) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(RegGpbLo) | F(FlagImplicit), 0x01),
+  ROW(F(MemUnspecified) | F(Mem8) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(MemUnspecified) | F(Mem16) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(MemUnspecified) | F(Mem32) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(MemUnspecified) | F(Mem64) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x40),
+  ROW(F(RegGpw) | F(RegGpq) | F(Mem16) | F(Mem64), 0x00),
+  ROW(F(RegSReg), 0x1A),
+  ROW(F(RegSReg), 0x60),
+  ROW(F(RegGpw) | F(RegGpq) | F(Mem16) | F(Mem64) | F(ImmI8) | F(ImmI16) | F(ImmI32), 0x00),
+  ROW(F(RegGpd) | F(Mem32) | F(ImmI32) | F(ImmU32), 0x00),
+  ROW(F(RegSReg), 0x1E),
+  ROW(F(MemUnspecified) | F(Mem8) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(MemUnspecified) | F(Mem16) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(MemUnspecified) | F(Mem32) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(MemUnspecified) | F(Mem64) | F(FlagMemBase) | F(FlagMemEs) | F(FlagImplicit), 0x80),
+  ROW(F(RegXmm) | F(RegYmm), 0x00),
+  ROW(F(ImmI4) | F(ImmU4), 0x00),
+  ROW(F(Mem32) | F(Mem64), 0x00),
+  ROW(F(RegSt), 0x01),
+  ROW(F(RegSt), 0x00),
+  ROW(F(MemUnspecified) | F(Mem48), 0x00),
+  ROW(F(MemUnspecified) | F(Mem80), 0x00),
+  ROW(F(RegGpbLo) | F(ImmI8) | F(ImmU8), 0x02),
+  ROW(F(RegXmm) | F(RegKReg), 0x00),
+  ROW(F(RegYmm) | F(RegKReg), 0x00),
+  ROW(F(RegKReg), 0x00),
+  ROW(F(RegXmm) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(Vm64x) | F(Vm64y), 0x00),
+  ROW(F(RegGpq) | F(RegXmm) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegXmm) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegBnd), 0x00),
+  ROW(F(RegBnd) | F(MemUnspecified), 0x00),
+  ROW(F(MemUnspecified), 0x00),
+  ROW(F(RegGpw) | F(RegGpd) | F(Mem16) | F(Mem32) | F(ImmI32) | F(ImmI64) | F(Rel32), 0x00),
+  ROW(F(RegGpq) | F(MemUnspecified) | F(Mem64) | F(ImmI32) | F(ImmI64) | F(Rel32), 0x00),
+  ROW(F(RegGpbLo) | F(RegGpbHi) | F(RegGpw) | F(RegGpd) | F(Mem8) | F(Mem16) | F(Mem32), 0x00),
+  ROW(F(RegGpbLo) | F(RegGpbHi) | F(RegGpq) | F(Mem8) | F(Mem64), 0x00),
+  ROW(F(RegGpw) | F(RegGpd), 0x00),
+  ROW(F(MemUnspecified) | F(Mem512) | F(FlagMemBase) | F(FlagMemEs), 0x00),
+  ROW(F(RegSt) | F(Mem32) | F(Mem64), 0x00),
+  ROW(F(RegGpd) | F(FlagImplicit), 0x02),
+  ROW(F(RegGpd) | F(RegGpq) | F(FlagImplicit), 0x01),
+  ROW(F(ImmI32) | F(ImmI64) | F(Rel8) | F(Rel32), 0x00),
+  ROW(F(ImmI32) | F(ImmI64) | F(Rel32), 0x00),
+  ROW(F(RegGpw) | F(RegGpd) | F(FlagImplicit), 0x02),
+  ROW(F(ImmI32) | F(ImmI64) | F(Rel8), 0x00),
+  ROW(F(RegGpd) | F(RegGpq) | F(FlagImplicit), 0x02),
+  ROW(F(RegGpq) | F(MemUnspecified) | F(Mem64) | F(ImmI32) | F(ImmI64) | F(Rel8) | F(Rel32), 0x00),
+  ROW(F(RegGpd) | F(MemUnspecified) | F(Mem32) | F(ImmI32) | F(ImmI64) | F(Rel32), 0x00),
+  ROW(F(RegGpd) | F(RegKReg) | F(MemUnspecified) | F(Mem8), 0x00),
+  ROW(F(RegGpd) | F(MemUnspecified) | F(Mem8), 0x00),
+  ROW(F(RegGpd) | F(RegKReg) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(RegGpq) | F(RegKReg) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegGpd) | F(RegKReg) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegGpd) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(ImmI16), 0x00),
+  ROW(F(ImmI16) | F(ImmI32), 0x00),
+  ROW(F(MemUnspecified) | F(Mem32) | F(Mem48) | F(Mem80), 0x00),
+  ROW(F(RegGpd) | F(RegGpq), 0x00),
+  ROW(F(RegGpbLo) | F(RegGpbHi) | F(RegGpw) | F(Mem8) | F(Mem16), 0x00),
+  ROW(F(RegMm) | F(MemUnspecified) | F(Mem64), 0x00),
+  ROW(F(RegMm) | F(MemUnspecified) | F(Mem64) | F(ImmI8) | F(ImmU8), 0x00),
+  ROW(F(RegMm) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(ImmU16), 0x00),
+  ROW(F(RegXmm) | F(RegYmm) | F(Mem128) | F(Mem256), 0x00),
+  ROW(F(RegXmm) | F(ImmI8) | F(ImmU8), 0x00),
+  ROW(F(RegXmm) | F(FlagImplicit), 0x01),
+  ROW(F(MemUnspecified) | F(FlagMib), 0x00),
+  ROW(F(RegGpw) | F(RegGpd) | F(RegGpq), 0x00),
+  ROW(F(MemUnspecified) | F(Mem8) | F(Mem16) | F(Mem32) | F(Mem48) | F(Mem64) | F(Mem80) | F(Mem128) | F(Mem256) | F(Mem512) | F(Mem1024), 0x00),
+  ROW(F(MemUnspecified) | F(Mem512) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x01),
+  ROW(F(RegGpq) | F(FlagImplicit), 0x02),
+  ROW(F(RegGpq) | F(FlagImplicit), 0x08),
+  ROW(F(RegGpd) | F(FlagImplicit), 0x08),
+  ROW(F(RegGpd) | F(RegGpq) | F(MemUnspecified) | F(Mem32) | F(Mem64), 0x00),
+  ROW(F(Mem16) | F(Mem32), 0x00),
+  ROW(F(Mem16) | F(Mem32) | F(Mem64), 0x00),
+  ROW(F(RegSt) | F(Mem32) | F(Mem64) | F(Mem80), 0x00),
+  ROW(F(RegGpw) | F(MemUnspecified) | F(Mem16), 0x01),
+  ROW(F(RegGpbLo) | F(RegGpw) | F(RegGpd), 0x01),
+  ROW(F(RegGpw) | F(ImmI8) | F(ImmU8), 0x04),
+  ROW(F(Mem8) | F(Mem16) | F(Mem32) | F(FlagMemBase) | F(FlagMemEs), 0x80),
+  ROW(F(RegGpw), 0x04),
+  ROW(F(RegGpbHi) | F(FlagImplicit), 0x01),
+  ROW(F(MemUnspecified) | F(Mem128) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x80),
+  ROW(F(MemUnspecified) | F(Mem64) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x80),
+  ROW(F(MemUnspecified) | F(FlagMemBase) | F(FlagMemDs) | F(FlagImplicit), 0x01),
+  ROW(F(RegGpw) | F(ImmU8), 0x04),
+  ROW(F(Mem8) | F(Mem16) | F(Mem32) | F(FlagMemBase) | F(FlagMemDs), 0x40),
+  ROW(F(RegGpw) | F(RegGpd) | F(RegGpq) | F(MemUnspecified) | F(Mem16), 0x00),
+  ROW(F(RegTmm), 0x00),
+  ROW(F(MemUnspecified) | F(FlagTMem), 0x00),
+  ROW(F(MemUnspecified) | F(FlagMemBase) | F(FlagMemDs), 0x00),
+  ROW(F(RegYmm) | F(RegZmm), 0x00),
+  ROW(F(RegXmm) | F(RegYmm) | F(RegZmm), 0x00),
+  ROW(F(RegXmm) | F(RegYmm) | F(RegZmm) | F(Mem128) | F(Mem256) | F(Mem512), 0x00),
+  ROW(F(RegGpd) | F(RegXmm) | F(MemUnspecified) | F(Mem8), 0x00),
+  ROW(F(RegGpd) | F(RegXmm) | F(MemUnspecified) | F(Mem32), 0x00),
+  ROW(F(RegGpd) | F(RegXmm) | F(MemUnspecified) | F(Mem16), 0x00)
+};
+#undef F
+#undef ROW
+// ----------------------------------------------------------------------------
+// ${InstSignatureTable:End}
+#endif // !ASMJIT_NO_VALIDATION
+
+// x86::InstInternal - QueryRWInfo
+// ===============================
+
+// ${InstRWInfoTable:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+const uint8_t InstDB::rwInfoIndexA[Inst::_kIdCount] = {
+  0, 0, 1, 1, 0, 2, 3, 2, 4, 4, 5, 6, 4, 4, 3, 4, 4, 4, 4, 7, 0, 2, 0, 4, 4, 4,
+  4, 8, 0, 9, 9, 9, 9, 9, 0, 0, 0, 0, 9, 9, 9, 9, 9, 10, 10, 10, 11, 11, 12, 13,
+  14, 9, 9, 0, 15, 16, 16, 16, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 18, 0, 0, 19, 0, 0, 0, 0, 0, 20, 21, 0, 22, 23, 24, 7, 25,
+  25, 25, 24, 26, 7, 24, 27, 28, 29, 30, 31, 32, 33, 25, 25, 7, 27, 28, 33, 34,
+  0, 0, 0, 0, 35, 4, 4, 5, 6, 0, 0, 0, 0, 0, 36, 36, 0, 0, 37, 0, 0, 38, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 38, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 38,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 0, 39, 4,
+  4, 35, 40, 41, 0, 0, 0, 42, 0, 37, 0, 0, 0, 0, 43, 0, 44, 43, 43, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 47, 48, 49, 50, 51,
+  52, 53, 0, 0, 0, 54, 55, 56, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, 55, 56, 57, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 0, 59, 0, 60, 0, 61, 0, 60, 0, 60, 0, 60,
+  0, 0, 0, 0, 0, 62, 63, 63, 63, 58, 60, 0, 0, 0, 9, 0, 0, 4, 4, 5, 6, 0, 0, 4,
+  4, 5, 6, 0, 0, 64, 65, 66, 66, 67, 47, 24, 36, 67, 52, 66, 66, 68, 69, 69, 70,
+  71, 71, 72, 72, 59, 59, 67, 59, 59, 71, 71, 73, 48, 52, 74, 75, 7, 7, 76, 77,
+  9, 66, 66, 77, 0, 35, 4, 4, 5, 6, 0, 78, 0, 0, 79, 0, 2, 4, 4, 80, 81, 9, 9,
+  9, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 0, 3, 82, 3, 0, 0, 0, 3, 3,
+  4, 3, 0, 0, 3, 3, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 83, 27, 27, 82, 82, 82, 82, 82,
+  82, 82, 82, 82, 82, 27, 82, 82, 82, 27, 27, 82, 82, 82, 3, 3, 3, 84, 3, 3, 3,
+  27, 27, 0, 0, 0, 0, 3, 3, 4, 4, 3, 3, 4, 4, 4, 4, 3, 3, 4, 4, 85, 86, 87, 24,
+  24, 24, 86, 86, 87, 24, 24, 24, 86, 4, 3, 82, 3, 3, 4, 3, 3, 0, 0, 0, 9, 0,
+  0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 3, 3, 3, 3, 88, 3, 3, 0, 3, 3,
+  3, 88, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 27, 89, 0, 3, 3, 4, 3, 90, 90, 4, 90, 0,
+  0, 0, 0, 0, 0, 0, 3, 91, 7, 92, 91, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 93, 0, 0,
+  0, 0, 0, 91, 91, 0, 0, 0, 0, 0, 0, 7, 92, 0, 0, 91, 91, 0, 0, 2, 94, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 4, 4, 4, 0, 4, 4, 0, 91, 0, 0, 91, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 7, 7, 26, 92, 0, 0, 0, 0, 0, 0, 95, 0, 0, 0, 2, 4, 4, 5, 6, 0, 0, 0, 0, 0,
+  0, 0, 9, 0, 0, 0, 0, 0, 15, 0, 96, 96, 0, 97, 0, 0, 9, 9, 20, 21, 98, 98, 0,
+  0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 28, 100, 101, 100, 101, 99, 28, 100, 101,
+  100, 101, 102, 103, 0, 0, 0, 0, 0, 0, 20, 104, 21, 105, 105, 106, 77, 9, 0, 77,
+  107, 108, 107, 9, 107, 9, 109, 110, 106, 109, 110, 109, 110, 9, 9, 9, 106,
+  0, 77, 106, 9, 106, 9, 108, 107, 0, 28, 0, 28, 0, 111, 0, 111, 0, 0, 0, 0, 0,
+  33, 33, 107, 9, 107, 9, 109, 110, 109, 110, 9, 9, 9, 106, 9, 106, 28, 28, 111,
+  111, 33, 33, 106, 77, 9, 9, 108, 107, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 112, 112, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 27, 113, 60, 60,
+  0, 0, 0, 0, 0, 0, 0, 0, 60, 114, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 115, 47, 116, 115, 115, 115, 115, 115,
+  115, 115, 115, 0, 117, 117, 0, 71, 71, 118, 119, 67, 67, 67, 67, 120, 71, 121,
+  9, 9, 73, 115, 115, 49, 0, 0, 0, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 0,
+  0, 0, 0, 0, 0, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 33, 124, 124, 28, 125, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 105, 105, 105, 105, 0,
+  0, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 0, 0, 0, 0, 60, 60, 114, 60, 7, 7, 7,
+  0, 7, 0, 7, 7, 7, 7, 7, 7, 0, 7, 7, 84, 7, 0, 7, 0, 0, 7, 0, 0, 0, 0, 9, 9, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 126, 126, 127, 128, 124, 124, 124, 124, 85, 126, 129, 128,
+  127, 127, 128, 129, 128, 127, 128, 130, 131, 106, 106, 106, 130, 127, 128,
+  129, 128, 127, 128, 126, 128, 130, 131, 106, 106, 106, 130, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 67,
+  132, 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 112, 112, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 112, 112, 0, 0, 9, 9, 0, 0, 0,
+  0, 0, 0, 0, 0, 67, 67, 0, 0, 0, 0, 0, 0, 0, 0, 67, 132, 0, 0, 0, 0, 0, 0, 9,
+  9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 122, 20, 104, 21, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 133, 134, 133, 134, 0, 135, 0, 136, 0, 0, 0, 2, 4, 4, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+const uint8_t InstDB::rwInfoIndexB[Inst::_kIdCount] = {
+  0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0,
+  0, 0, 4, 0, 0, 0, 0, 0, 5, 5, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 7, 0, 0, 0, 0, 4, 8, 1, 0, 9, 0, 0, 0, 10, 10, 10, 0, 0, 11, 0, 0, 10, 12,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 5, 5, 0, 13, 14, 15, 16, 17, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 1, 1, 20, 21, 0, 0,
+  0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 22, 23, 0, 0, 24, 25, 26, 27, 0, 0, 25, 25, 25,
+  25, 25, 25, 25, 25, 28, 29, 29, 28, 0, 0, 0, 24, 25, 24, 25, 0, 25, 24, 24, 24,
+  24, 24, 24, 24, 0, 0, 30, 30, 30, 24, 24, 28, 0, 31, 10, 0, 0, 0, 0, 0, 0, 24,
+  25, 0, 0, 0, 32, 33, 32, 34, 0, 0, 0, 0, 0, 10, 32, 0, 0, 0, 0, 35, 33, 32,
+  35, 34, 24, 25, 24, 25, 0, 29, 29, 29, 29, 0, 0, 0, 25, 10, 10, 32, 32, 0, 0,
+  0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 21, 36, 0, 20, 37, 38, 0, 39, 40, 0, 0, 0, 0,
+  0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 41, 42, 41, 42, 43,
+  44, 43, 44, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 0, 0, 0, 0, 44, 45, 46, 47,
+  48, 45, 46, 47, 48, 0, 0, 0, 0, 49, 50, 51, 41, 42, 43, 44, 41, 42, 43, 44, 52,
+  0, 24, 0, 53, 0, 54, 0, 0, 0, 0, 0, 10, 0, 10, 24, 55, 56, 55, 0, 0, 0, 0,
+  0, 0, 55, 57, 57, 0, 58, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 60, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 61, 0, 0, 61, 0, 0, 0, 0, 0, 5, 62, 0, 0, 0, 0, 63, 0, 64, 20, 65, 20, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 0, 0, 0,
+  6, 5, 5, 0, 0, 0, 0, 67, 68, 0, 0, 0, 0, 69, 70, 0, 3, 3, 71, 22, 72, 73, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 74, 39, 75, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 0, 0, 0, 0, 0, 0, 0, 10,
+  10, 10, 10, 10, 10, 10, 0, 0, 2, 2, 2, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 79, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 80, 81, 80, 81, 81, 81, 80, 80, 82, 83, 0, 84,
+  0, 0, 0, 0, 0, 0, 85, 2, 2, 86, 87, 0, 0, 0, 11, 88, 0, 0, 4, 0, 0, 0, 89, 0,
+  90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90,
+  90, 90, 90, 90, 90, 90, 90, 90, 90, 0, 90, 0, 32, 0, 0, 0, 5, 0, 0, 6, 0, 91,
+  4, 0, 91, 4, 5, 5, 32, 19, 92, 80, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 93, 0, 92,
+  94, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 95, 95, 95, 95, 0, 0, 0, 0,
+  0, 0, 96, 97, 0, 0, 0, 0, 0, 0, 0, 0, 56, 97, 0, 0, 0, 0, 98, 99, 98, 99, 3,
+  3, 3, 100, 101, 102, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 103,
+  103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 104, 3, 105, 106, 107, 0,
+  0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  108, 0, 0, 0, 0, 0, 0, 0, 109, 0, 110, 0, 111, 0, 111, 0, 112, 113, 114, 115,
+  116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 112, 113, 114, 0, 0, 3, 3, 3, 3, 100, 111, 102, 3, 117, 3, 55, 55,
+  0, 0, 0, 0, 118, 119, 120, 119, 120, 118, 119, 120, 119, 120, 22, 121, 122, 121,
+  122, 121, 121, 123, 124, 121, 121, 121, 125, 126, 127, 121, 121, 121, 125,
+  126, 127, 121, 121, 121, 125, 126, 127, 121, 122, 128, 128, 129, 130, 121, 121,
+  121, 121, 121, 121, 121, 121, 121, 128, 128, 121, 121, 121, 125, 131, 127, 121,
+  121, 121, 125, 131, 127, 121, 121, 121, 125, 131, 127, 121, 121, 121, 121,
+  121, 121, 121, 121, 121, 128, 128, 128, 128, 129, 130, 121, 122, 121, 121, 121,
+  125, 126, 127, 121, 121, 121, 125, 126, 127, 121, 121, 121, 125, 126, 127,
+  128, 128, 129, 130, 121, 121, 121, 125, 131, 127, 121, 121, 121, 125, 131, 127,
+  121, 121, 121, 132, 131, 133, 128, 128, 129, 130, 134, 134, 134, 78, 135, 136,
+  0, 0, 0, 0, 137, 138, 10, 10, 10, 10, 10, 10, 10, 10, 138, 139, 0, 0, 0, 140,
+  141, 142, 85, 85, 85, 140, 141, 142, 3, 3, 3, 3, 3, 3, 3, 143, 144, 145, 144,
+  145, 143, 144, 145, 144, 145, 102, 0, 53, 58, 146, 146, 3, 3, 3, 100, 101,
+  102, 0, 147, 0, 3, 3, 3, 100, 101, 102, 0, 148, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 149, 150, 150, 151, 152, 152, 0, 0, 0, 0, 0, 0, 0, 153, 154, 0, 0, 155,
+  0, 0, 0, 3, 11, 147, 0, 0, 156, 148, 3, 3, 3, 100, 101, 102, 0, 11, 3, 3, 157,
+  157, 158, 158, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 103, 3, 0, 0, 0, 0, 0, 0, 3, 128, 104, 104, 3,
+  3, 3, 3, 67, 68, 3, 3, 3, 3, 69, 70, 104, 104, 104, 104, 104, 104, 117, 117, 0,
+  0, 0, 0, 117, 117, 117, 117, 117, 117, 0, 0, 121, 121, 121, 121, 159, 159, 3,
+  3, 3, 121, 3, 3, 121, 121, 128, 128, 160, 160, 160, 3, 160, 3, 121, 121, 121,
+  121, 121, 3, 0, 0, 0, 0, 71, 22, 72, 161, 138, 137, 139, 138, 0, 0, 0, 3, 0,
+  3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 3, 3, 0, 162, 102, 100, 101, 0, 0,
+  163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 121, 121, 3, 3, 146,
+  146, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 3, 3, 3, 164, 85, 85,
+  3, 3, 85, 85, 3, 3, 165, 165, 165, 165, 3, 0, 0, 0, 0, 165, 165, 165, 165, 165,
+  165, 3, 3, 121, 121, 121, 3, 165, 165, 3, 3, 121, 121, 121, 3, 3, 104, 85, 85,
+  85, 3, 3, 3, 166, 167, 166, 3, 3, 3, 168, 166, 169, 3, 3, 3, 168, 166, 167,
+  166, 3, 3, 3, 168, 3, 3, 3, 3, 3, 3, 3, 3, 121, 121, 0, 104, 104, 104, 104, 104,
+  104, 104, 104, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 140, 142, 0, 0, 140,
+  142, 0, 0, 140, 142, 0, 0, 141, 142, 85, 85, 85, 140, 141, 142, 85, 85, 85, 140,
+  141, 142, 85, 85, 140, 142, 0, 0, 140, 142, 0, 0, 140, 142, 0, 0, 141, 142,
+  3, 3, 3, 100, 101, 102, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 3, 3, 3,
+  3, 3, 3, 0, 0, 0, 140, 141, 142, 93, 3, 3, 3, 100, 101, 102, 0, 0, 0, 0, 0, 3,
+  3, 3, 3, 3, 3, 0, 0, 0, 0, 56, 56, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 0, 0,
+  0, 0, 0, 171, 171, 171, 171, 172, 172, 172, 172, 172, 172, 172, 172, 170, 0,
+  0
+};
+
+const InstDB::RWInfo InstDB::rwInfoA[] = {
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #0 [ref=1007x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 1 , 0 , 0 , 0 , 0 , 0  } }, // #1 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 1 , { 2 , 3 , 0 , 0 , 0 , 0  } }, // #2 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 2 , 3 , 0 , 0 , 0 , 0  } }, // #3 [ref=96x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 4 , 5 , 0 , 0 , 0 , 0  } }, // #4 [ref=55x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 6 , 7 , 0 , 0 , 0 , 0  } }, // #5 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 8 , 9 , 0 , 0 , 0 , 0  } }, // #6 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 10, 5 , 0 , 0 , 0 , 0  } }, // #7 [ref=26x]
+  { InstDB::RWInfo::kCategoryGeneric   , 7 , { 12, 13, 0 , 0 , 0 , 0  } }, // #8 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 11, 3 , 0 , 0 , 0 , 0  } }, // #9 [ref=75x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 5 , 3 , 0 , 0 , 0 , 0  } }, // #10 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 10, 3 , 0 , 0 , 0 , 0  } }, // #11 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 9 , { 10, 5 , 0 , 0 , 0 , 0  } }, // #12 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 15, 5 , 0 , 0 , 0 , 0  } }, // #13 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 3 , 3 , 0 , 0 , 0 , 0  } }, // #14 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 3 , 3 , 0 , 0 , 0 , 0  } }, // #15 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 2 , 3 , 0 , 0 , 0 , 0  } }, // #16 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 16, 17, 0 , 0 , 0 , 0  } }, // #17 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 1 , { 3 , 3 , 0 , 0 , 0 , 0  } }, // #18 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 20, 21, 0 , 0 , 0 , 0  } }, // #19 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 7 , 7 , 0 , 0 , 0 , 0  } }, // #20 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 9 , 9 , 0 , 0 , 0 , 0  } }, // #21 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 33, 34, 0 , 0 , 0 , 0  } }, // #22 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 14, { 2 , 3 , 0 , 0 , 0 , 0  } }, // #23 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 10, 7 , 0 , 0 , 0 , 0  } }, // #24 [ref=10x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 35, 5 , 0 , 0 , 0 , 0  } }, // #25 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 36, 7 , 0 , 0 , 0 , 0  } }, // #26 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 35, 7 , 0 , 0 , 0 , 0  } }, // #27 [ref=11x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 11, 7 , 0 , 0 , 0 , 0  } }, // #28 [ref=9x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 37, 7 , 0 , 0 , 0 , 0  } }, // #29 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 14, { 36, 3 , 0 , 0 , 0 , 0  } }, // #30 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 14, { 37, 3 , 0 , 0 , 0 , 0  } }, // #31 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 36, 9 , 0 , 0 , 0 , 0  } }, // #32 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 11, 9 , 0 , 0 , 0 , 0  } }, // #33 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 38, 39, 0 , 0 , 0 , 0  } }, // #34 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 15, { 1 , 40, 0 , 0 , 0 , 0  } }, // #35 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 16, { 11, 43, 0 , 0 , 0 , 0  } }, // #36 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 4 , 5 , 0 , 0 , 0 , 0  } }, // #37 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 45, 46, 0 , 0 , 0 , 0  } }, // #38 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 0 , 50, 0 , 0 , 0 , 0  } }, // #39 [ref=1x]
+  { InstDB::RWInfo::kCategoryImul      , 2 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #40 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 51, 52, 0 , 0 , 0 , 0  } }, // #41 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 54, 52, 0 , 0 , 0 , 0  } }, // #42 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 3 , 5 , 0 , 0 , 0 , 0  } }, // #43 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 22, 29, 0 , 0 , 0 , 0  } }, // #44 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 55, 0 , 0 , 0 , 0 , 0  } }, // #45 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 23, { 56, 40, 0 , 0 , 0 , 0  } }, // #46 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 24, { 44, 9 , 0 , 0 , 0 , 0  } }, // #47 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 25, { 35, 7 , 0 , 0 , 0 , 0  } }, // #48 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 26, { 48, 13, 0 , 0 , 0 , 0  } }, // #49 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 56, 40, 0 , 0 , 0 , 0  } }, // #50 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 44, 9 , 0 , 0 , 0 , 0  } }, // #51 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 35, 7 , 0 , 0 , 0 , 0  } }, // #52 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 48, 13, 0 , 0 , 0 , 0  } }, // #53 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 40, 40, 0 , 0 , 0 , 0  } }, // #54 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 9 , 9 , 0 , 0 , 0 , 0  } }, // #55 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 7 , 7 , 0 , 0 , 0 , 0  } }, // #56 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 13, 13, 0 , 0 , 0 , 0  } }, // #57 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 11, 3 , 0 , 0 , 0 , 0  } }, // #58 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 10, 5 , 0 , 0 , 0 , 0  } }, // #59 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 2 , 3 , 0 , 0 , 0 , 0  } }, // #60 [ref=11x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 11, 3 , 0 , 0 , 0 , 0  } }, // #61 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 51, 20, 0 , 0 , 0 , 0  } }, // #62 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 58, 0 , 0 , 0 , 0 , 0  } }, // #63 [ref=3x]
+  { InstDB::RWInfo::kCategoryMov       , 29, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #64 [ref=1x]
+  { InstDB::RWInfo::kCategoryMovabs    , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #65 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 30, { 10, 5 , 0 , 0 , 0 , 0  } }, // #66 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 11, 3 , 0 , 0 , 0 , 0  } }, // #67 [ref=14x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 36, 61, 0 , 0 , 0 , 0  } }, // #68 [ref=1x]
+  { InstDB::RWInfo::kCategoryMovh64    , 12, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #69 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 62, 7 , 0 , 0 , 0 , 0  } }, // #70 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 35, 7 , 0 , 0 , 0 , 0  } }, // #71 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 56, 5 , 0 , 0 , 0 , 0  } }, // #72 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 28, { 44, 9 , 0 , 0 , 0 , 0  } }, // #73 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 63, 20, 0 , 0 , 0 , 0  } }, // #74 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 31, { 35, 7 , 0 , 0 , 0 , 0  } }, // #75 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 33, { 44, 9 , 0 , 0 , 0 , 0  } }, // #76 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 14, { 11, 3 , 0 , 0 , 0 , 0  } }, // #77 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 17, 29, 0 , 0 , 0 , 0  } }, // #78 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 11, { 3 , 3 , 0 , 0 , 0 , 0  } }, // #79 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 52, 22, 0 , 0 , 0 , 0  } }, // #80 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 52, 66, 0 , 0 , 0 , 0  } }, // #81 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 26, 7 , 0 , 0 , 0 , 0  } }, // #82 [ref=18x]
+  { InstDB::RWInfo::kCategoryGeneric   , 36, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #83 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 69, 5 , 0 , 0 , 0 , 0  } }, // #84 [ref=2x]
+  { InstDB::RWInfo::kCategoryVmov1_8   , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #85 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 10, 9 , 0 , 0 , 0 , 0  } }, // #86 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 10, 13, 0 , 0 , 0 , 0  } }, // #87 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 4 , 0 , 0 , 0 , 0 , 0  } }, // #88 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 5 , 5 , 0 , 0 , 0 , 0  } }, // #89 [ref=1x]
+  { InstDB::RWInfo::kCategoryPunpcklxx , 38, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #90 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 2 , 71, 0 , 0 , 0 , 0  } }, // #91 [ref=8x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 37, 9 , 0 , 0 , 0 , 0  } }, // #92 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 16, 50, 0 , 0 , 0 , 0  } }, // #93 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 22, 21, 0 , 0 , 0 , 0  } }, // #94 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 63, 22, 0 , 0 , 0 , 0  } }, // #95 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 74, 3 , 0 , 0 , 0 , 0  } }, // #96 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 11, 43, 0 , 0 , 0 , 0  } }, // #97 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 53, 9 , 0 , 0 , 0 , 0  } }, // #98 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 80, 5 , 0 , 0 , 0 , 0  } }, // #99 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 11, 5 , 0 , 0 , 0 , 0  } }, // #100 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 43, { 74, 81, 0 , 0 , 0 , 0  } }, // #101 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 44, { 11, 7 , 0 , 0 , 0 , 0  } }, // #102 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 45, { 11, 9 , 0 , 0 , 0 , 0  } }, // #103 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 13, 13, 0 , 0 , 0 , 0  } }, // #104 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 11, { 11, 3 , 0 , 0 , 0 , 0  } }, // #105 [ref=7x]
+  { InstDB::RWInfo::kCategoryVmov2_1   , 46, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #106 [ref=14x]
+  { InstDB::RWInfo::kCategoryVmov1_2   , 14, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #107 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 14, { 10, 3 , 0 , 0 , 0 , 0  } }, // #108 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 46, { 11, 3 , 0 , 0 , 0 , 0  } }, // #109 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 47, { 11, 5 , 0 , 0 , 0 , 0  } }, // #110 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 11, 5 , 0 , 0 , 0 , 0  } }, // #111 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 51, { 74, 43, 0 , 0 , 0 , 0  } }, // #112 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 44, 9 , 0 , 0 , 0 , 0  } }, // #113 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 18, { 2 , 3 , 0 , 0 , 0 , 0  } }, // #114 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 58, { 11, 3 , 0 , 0 , 0 , 0  } }, // #115 [ref=12x]
+  { InstDB::RWInfo::kCategoryVmovddup  , 38, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #116 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 35, 61, 0 , 0 , 0 , 0  } }, // #117 [ref=2x]
+  { InstDB::RWInfo::kCategoryVmovmskpd , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #118 [ref=1x]
+  { InstDB::RWInfo::kCategoryVmovmskps , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #119 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 59, { 35, 7 , 0 , 0 , 0 , 0  } }, // #120 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 21, { 48, 13, 0 , 0 , 0 , 0  } }, // #121 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 3 , 3 , 0 , 0 , 0 , 0  } }, // #122 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 15, { 11, 40, 0 , 0 , 0 , 0  } }, // #123 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 11, 7 , 0 , 0 , 0 , 0  } }, // #124 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 11, 13, 0 , 0 , 0 , 0  } }, // #125 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 35, 3 , 0 , 0 , 0 , 0  } }, // #126 [ref=4x]
+  { InstDB::RWInfo::kCategoryVmov1_4   , 62, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #127 [ref=6x]
+  { InstDB::RWInfo::kCategoryVmov1_2   , 48, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #128 [ref=9x]
+  { InstDB::RWInfo::kCategoryVmov1_8   , 63, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #129 [ref=3x]
+  { InstDB::RWInfo::kCategoryVmov4_1   , 47, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #130 [ref=4x]
+  { InstDB::RWInfo::kCategoryVmov8_1   , 64, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #131 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 18, { 11, 3 , 0 , 0 , 0 , 0  } }, // #132 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 17, { 44, 9 , 0 , 0 , 0 , 0  } }, // #133 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 35, { 35, 7 , 0 , 0 , 0 , 0  } }, // #134 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 11, { 2 , 2 , 0 , 0 , 0 , 0  } }, // #135 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 58, { 2 , 2 , 0 , 0 , 0 , 0  } }  // #136 [ref=1x]
+};
+
+const InstDB::RWInfo InstDB::rwInfoB[] = {
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #0 [ref=773x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 1 , 0 , 0 , 0 , 0 , 0  } }, // #1 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 10, 5 , 0 , 0 , 0 , 0  } }, // #2 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 11, 3 , 3 , 0 , 0 , 0  } }, // #3 [ref=193x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 11, 3 , 3 , 0 , 0 , 0  } }, // #4 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 4 , 5 , 0 , 0 , 0 , 0  } }, // #5 [ref=14x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 4 , 5 , 14, 0 , 0 , 0  } }, // #6 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 2 , 0 , 0 , 0 , 0 , 0  } }, // #7 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 11, { 3 , 0 , 0 , 0 , 0 , 0  } }, // #8 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 18, 0 , 0 , 0 , 0 , 0  } }, // #9 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 3 , 0 , 0 , 0 , 0 , 0  } }, // #10 [ref=34x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 7 , 0 , 0 , 0 , 0 , 0  } }, // #11 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 19, 0 , 0 , 0 , 0 , 0  } }, // #12 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 6 , 7 , 0 , 0 , 0 , 0  } }, // #13 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 8 , 9 , 0 , 0 , 0 , 0  } }, // #14 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 11, { 2 , 3 , 22, 0 , 0 , 0  } }, // #15 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 4 , 23, 18, 24, 25, 0  } }, // #16 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 26, 27, 28, 29, 30, 0  } }, // #17 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 28, 31, 32, 16, 0 , 0  } }, // #18 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 28, 0 , 0 , 0 , 0 , 0  } }, // #19 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 2 , 0 , 0 , 0 , 0 , 0  } }, // #20 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 41, 42, 3 , 0 , 0 , 0  } }, // #21 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 17, { 44, 5 , 0 , 0 , 0 , 0  } }, // #22 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 4 , 0 , 0 , 0 , 0 , 0  } }, // #23 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 18, { 3 , 0 , 0 , 0 , 0 , 0  } }, // #24 [ref=17x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 45, 0 , 0 , 0 , 0 , 0  } }, // #25 [ref=16x]
+  { InstDB::RWInfo::kCategoryGeneric   , 19, { 46, 0 , 0 , 0 , 0 , 0  } }, // #26 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 19, { 47, 0 , 0 , 0 , 0 , 0  } }, // #27 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 20, { 3 , 0 , 0 , 0 , 0 , 0  } }, // #28 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 46, 0 , 0 , 0 , 0 , 0  } }, // #29 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 18, { 11, 0 , 0 , 0 , 0 , 0  } }, // #30 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 21, { 13, 0 , 0 , 0 , 0 , 0  } }, // #31 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 11, 0 , 0 , 0 , 0 , 0  } }, // #32 [ref=8x]
+  { InstDB::RWInfo::kCategoryGeneric   , 21, { 48, 0 , 0 , 0 , 0 , 0  } }, // #33 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 7 , { 49, 0 , 0 , 0 , 0 , 0  } }, // #34 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 20, { 11, 0 , 0 , 0 , 0 , 0  } }, // #35 [ref=2x]
+  { InstDB::RWInfo::kCategoryImul      , 22, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #36 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 53, 0 , 0 , 0 , 0 , 0  } }, // #37 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 26, 0 , 0 , 0 , 0 , 0  } }, // #38 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 4 , 9 , 0 , 0 , 0 , 0  } }, // #39 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 4 , 5 , 0 , 0 , 0 , 0  } }, // #40 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 56, 40, 40, 0 , 0 , 0  } }, // #41 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 44, 9 , 9 , 0 , 0 , 0  } }, // #42 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 35, 7 , 7 , 0 , 0 , 0  } }, // #43 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 48, 13, 13, 0 , 0 , 0  } }, // #44 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 56, 40, 0 , 0 , 0 , 0  } }, // #45 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 44, 9 , 0 , 0 , 0 , 0  } }, // #46 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 35, 7 , 0 , 0 , 0 , 0  } }, // #47 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 48, 13, 0 , 0 , 0 , 0  } }, // #48 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 48, 40, 40, 0 , 0 , 0  } }, // #49 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 35, 9 , 9 , 0 , 0 , 0  } }, // #50 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 44, 13, 13, 0 , 0 , 0  } }, // #51 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 57, 0 , 0 , 0 , 0 , 0  } }, // #52 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 28, { 9 , 0 , 0 , 0 , 0 , 0  } }, // #53 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 16, { 43, 0 , 0 , 0 , 0 , 0  } }, // #54 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 7 , { 13, 0 , 0 , 0 , 0 , 0  } }, // #55 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 3 , 0 , 0 , 0 , 0 , 0  } }, // #56 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 3 , 9 , 0 , 0 , 0 , 0  } }, // #57 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 5 , 5 , 59, 0 , 0 , 0  } }, // #58 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 7 , 7 , 59, 0 , 0 , 0  } }, // #59 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 19, 29, 60, 0 , 0 , 0  } }, // #60 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 32, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #61 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 64, 42, 3 , 0 , 0 , 0  } }, // #62 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 11, 11, 3 , 65, 0 , 0  } }, // #63 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 17, 29, 30, 0 , 0 , 0  } }, // #64 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 3 , 0 , 0 , 0 , 0 , 0  } }, // #65 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 2 , 3 , 0 , 0 , 0 , 0  } }, // #66 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 5 , 5 , 0 , 67, 17, 60 } }, // #67 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 5 , 5 , 0 , 68, 17, 60 } }, // #68 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 5 , 5 , 0 , 67, 0 , 0  } }, // #69 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 3 , { 5 , 5 , 0 , 68, 0 , 0  } }, // #70 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 34, { 56, 5 , 0 , 0 , 0 , 0  } }, // #71 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 35, { 35, 5 , 0 , 0 , 0 , 0  } }, // #72 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 37, { 48, 3 , 0 , 0 , 0 , 0  } }, // #73 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 15, { 4 , 40, 0 , 0 , 0 , 0  } }, // #74 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 4 , 7 , 0 , 0 , 0 , 0  } }, // #75 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 2 , 13, 0 , 0 , 0 , 0  } }, // #76 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 70, 0 , 0 , 0 , 0 , 0  } }, // #77 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 35, 7 , 0 , 0 , 0 , 0  } }, // #78 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 10, { 65, 0 , 0 , 0 , 0 , 0  } }, // #79 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 11, 0 , 0 , 0 , 0 , 0  } }, // #80 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 16, 50, 29, 0 , 0 , 0  } }, // #81 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 44, 0 , 0 , 0 , 0 , 0  } }, // #82 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 35, 0 , 0 , 0 , 0 , 0  } }, // #83 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 16, 50, 67, 0 , 0 , 0  } }, // #84 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 2 , { 11, 3 , 0 , 0 , 0 , 0  } }, // #85 [ref=19x]
+  { InstDB::RWInfo::kCategoryGeneric   , 4 , { 36, 7 , 0 , 0 , 0 , 0  } }, // #86 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 37, 9 , 0 , 0 , 0 , 0  } }, // #87 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 72, 0 , 0 , 0 , 0 , 0  } }, // #88 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 7 , 0 , 0 , 0 , 0 , 0  } }, // #89 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 34, { 73, 0 , 0 , 0 , 0 , 0  } }, // #90 [ref=30x]
+  { InstDB::RWInfo::kCategoryGeneric   , 11, { 2 , 3 , 71, 0 , 0 , 0  } }, // #91 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 39, { 11, 0 , 0 , 0 , 0 , 0  } }, // #92 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 28, { 44, 0 , 0 , 0 , 0 , 0  } }, // #93 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 16, { 74, 0 , 0 , 0 , 0 , 0  } }, // #94 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 75, 43, 43, 0 , 0 , 0  } }, // #95 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 74, 0 , 0 , 0 , 0 , 0  } }, // #96 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 9 , 60, 17, 0 , 0 , 0  } }, // #97 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 75, 76, 77, 77, 77, 5  } }, // #98 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 4 , 78, 79, 79, 79, 5  } }, // #99 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 10, 5 , 7 , 0 , 0 , 0  } }, // #100 [ref=8x]
+  { InstDB::RWInfo::kCategoryGeneric   , 41, { 10, 5 , 13, 0 , 0 , 0  } }, // #101 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 10, 5 , 9 , 0 , 0 , 0  } }, // #102 [ref=9x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 11, 3 , 3 , 3 , 0 , 0  } }, // #103 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 35, 3 , 3 , 0 , 0 , 0  } }, // #104 [ref=18x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 11, 5 , 7 , 0 , 0 , 0  } }, // #105 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 41, { 35, 13, 13, 0 , 0 , 0  } }, // #106 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 11, 5 , 9 , 0 , 0 , 0  } }, // #107 [ref=1x]
+  { InstDB::RWInfo::kCategoryVmov1_2   , 48, { 0 , 0 , 0 , 0 , 0 , 0  } }, // #108 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 10, 5 , 5 , 0 , 0 , 0  } }, // #109 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 10, 82, 7 , 0 , 0 , 0  } }, // #110 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 41, { 10, 5 , 5 , 0 , 0 , 0  } }, // #111 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 49, { 10, 61, 3 , 0 , 0 , 0  } }, // #112 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 49, { 10, 3 , 3 , 0 , 0 , 0  } }, // #113 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 49, { 10, 82, 3 , 0 , 0 , 0  } }, // #114 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 10, 61, 9 , 0 , 0 , 0  } }, // #115 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 10, 5 , 5 , 0 , 0 , 0  } }, // #116 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 50, { 10, 5 , 5 , 0 , 0 , 0  } }, // #117 [ref=9x]
+  { InstDB::RWInfo::kCategoryGeneric   , 52, { 10, 81, 0 , 0 , 0 , 0  } }, // #118 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 52, { 10, 3 , 0 , 0 , 0 , 0  } }, // #119 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 53, { 80, 43, 0 , 0 , 0 , 0  } }, // #120 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 6 , { 2 , 3 , 3 , 0 , 0 , 0  } }, // #121 [ref=82x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 4 , 5 , 5 , 0 , 0 , 0  } }, // #122 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 4 , 61, 7 , 0 , 0 , 0  } }, // #123 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 4 , 82, 9 , 0 , 0 , 0  } }, // #124 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 6 , 7 , 7 , 0 , 0 , 0  } }, // #125 [ref=11x]
+  { InstDB::RWInfo::kCategoryGeneric   , 41, { 4 , 5 , 5 , 0 , 0 , 0  } }, // #126 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 8 , 9 , 9 , 0 , 0 , 0  } }, // #127 [ref=11x]
+  { InstDB::RWInfo::kCategoryGeneric   , 54, { 11, 3 , 3 , 3 , 0 , 0  } }, // #128 [ref=15x]
+  { InstDB::RWInfo::kCategoryGeneric   , 55, { 35, 7 , 7 , 7 , 0 , 0  } }, // #129 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 56, { 44, 9 , 9 , 9 , 0 , 0  } }, // #130 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 41, { 4 , 5 , 13, 0 , 0 , 0  } }, // #131 [ref=6x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 26, 7 , 7 , 0 , 0 , 0  } }, // #132 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 53, 9 , 9 , 0 , 0 , 0  } }, // #133 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 14, { 35, 3 , 0 , 0 , 0 , 0  } }, // #134 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 27, { 35, 13, 0 , 0 , 0 , 0  } }, // #135 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 5 , { 35, 9 , 0 , 0 , 0 , 0  } }, // #136 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 2 , 3 , 2 , 0 , 0 , 0  } }, // #137 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 2 , 3 , 2 , 0 , 0 , 0  } }, // #138 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 18, { 4 , 3 , 4 , 0 , 0 , 0  } }, // #139 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 40, { 10, 61, 7 , 0 , 0 , 0  } }, // #140 [ref=11x]
+  { InstDB::RWInfo::kCategoryGeneric   , 41, { 10, 83, 13, 0 , 0 , 0  } }, // #141 [ref=7x]
+  { InstDB::RWInfo::kCategoryGeneric   , 42, { 10, 82, 9 , 0 , 0 , 0  } }, // #142 [ref=13x]
+  { InstDB::RWInfo::kCategoryGeneric   , 50, { 80, 81, 5 , 0 , 0 , 0  } }, // #143 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 50, { 11, 3 , 5 , 0 , 0 , 0  } }, // #144 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 57, { 74, 43, 81, 0 , 0 , 0  } }, // #145 [ref=4x]
+  { InstDB::RWInfo::kCategoryVmaskmov  , 0 , { 0 , 0 , 0 , 0 , 0 , 0  } }, // #146 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 35, 0 , 0 , 0 , 0 , 0  } }, // #147 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 22, 0 , 0 , 0 , 0 , 0  } }, // #148 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 10, 61, 61, 0 , 0 , 0  } }, // #149 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 10, 7 , 7 , 0 , 0 , 0  } }, // #150 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 10, 7 , 7 , 0 , 0 , 0  } }, // #151 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 12, { 10, 61, 7 , 0 , 0 , 0  } }, // #152 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 10, 61, 7 , 0 , 0 , 0  } }, // #153 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 10, 83, 13, 0 , 0 , 0  } }, // #154 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 10, 82, 9 , 0 , 0 , 0  } }, // #155 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 84, 0 , 0 , 0 , 0 , 0  } }, // #156 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 60, { 85, 86, 3 , 3 , 0 , 0  } }, // #157 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 13, { 74, 76, 77, 77, 77, 5  } }, // #158 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 57, { 80, 81, 81, 0 , 0 , 0  } }, // #159 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 22, { 11, 3 , 3 , 0 , 0 , 0  } }, // #160 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 7 , { 48, 5 , 0 , 0 , 0 , 0  } }, // #161 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 61, { 10, 5 , 40, 0 , 0 , 0  } }, // #162 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 50, { 10, 5 , 5 , 5 , 0 , 0  } }, // #163 [ref=12x]
+  { InstDB::RWInfo::kCategoryGeneric   , 65, { 10, 5 , 5 , 5 , 0 , 0  } }, // #164 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 66, { 10, 5 , 5 , 0 , 0 , 0  } }, // #165 [ref=12x]
+  { InstDB::RWInfo::kCategoryGeneric   , 67, { 11, 3 , 5 , 0 , 0 , 0  } }, // #166 [ref=5x]
+  { InstDB::RWInfo::kCategoryGeneric   , 68, { 11, 3 , 0 , 0 , 0 , 0  } }, // #167 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 69, { 11, 3 , 5 , 0 , 0 , 0  } }, // #168 [ref=3x]
+  { InstDB::RWInfo::kCategoryGeneric   , 22, { 11, 3 , 5 , 0 , 0 , 0  } }, // #169 [ref=1x]
+  { InstDB::RWInfo::kCategoryGeneric   , 0 , { 60, 17, 29, 0 , 0 , 0  } }, // #170 [ref=2x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 3 , 60, 17, 0 , 0 , 0  } }, // #171 [ref=4x]
+  { InstDB::RWInfo::kCategoryGeneric   , 8 , { 11, 60, 17, 0 , 0 , 0  } }  // #172 [ref=8x]
+};
+
+const InstDB::RWInfoOp InstDB::rwInfoOp[] = {
+  { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kNone }, // #0 [ref=16519x]
+  { 0x0000000000000003u, 0x0000000000000003u, 0x00, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kRegPhysId }, // #1 [ref=10x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #2 [ref=236x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #3 [ref=1077x]
+  { 0x000000000000FFFFu, 0x000000000000FFFFu, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #4 [ref=108x]
+  { 0x000000000000FFFFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #5 [ref=348x]
+  { 0x00000000000000FFu, 0x00000000000000FFu, 0xFF, 0, { 0 }, OpRWFlags::kRW }, // #6 [ref=18x]
+  { 0x00000000000000FFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #7 [ref=186x]
+  { 0x000000000000000Fu, 0x000000000000000Fu, 0xFF, 0, { 0 }, OpRWFlags::kRW }, // #8 [ref=18x]
+  { 0x000000000000000Fu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #9 [ref=135x]
+  { 0x0000000000000000u, 0x000000000000FFFFu, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #10 [ref=184x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #11 [ref=455x]
+  { 0x0000000000000003u, 0x0000000000000003u, 0xFF, 0, { 0 }, OpRWFlags::kRW }, // #12 [ref=1x]
+  { 0x0000000000000003u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #13 [ref=63x]
+  { 0x000000000000FFFFu, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #14 [ref=4x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kMemBaseWrite | OpRWFlags::kMemIndexWrite }, // #15 [ref=1x]
+  { 0x0000000000000000u, 0x000000000000000Fu, 0x02, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #16 [ref=9x]
+  { 0x000000000000000Fu, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #17 [ref=23x]
+  { 0x00000000000000FFu, 0x00000000000000FFu, 0x00, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #18 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kMemPhysId }, // #19 [ref=3x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x06, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kMemBaseRW | OpRWFlags::kMemBasePostModify | OpRWFlags::kMemPhysId }, // #20 [ref=3x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x07, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kMemBaseRW | OpRWFlags::kMemBasePostModify | OpRWFlags::kMemPhysId }, // #21 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #22 [ref=7x]
+  { 0x00000000000000FFu, 0x00000000000000FFu, 0x02, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #23 [ref=1x]
+  { 0x00000000000000FFu, 0x0000000000000000u, 0x01, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #24 [ref=1x]
+  { 0x00000000000000FFu, 0x0000000000000000u, 0x03, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #25 [ref=1x]
+  { 0x00000000000000FFu, 0x00000000000000FFu, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #26 [ref=21x]
+  { 0x000000000000000Fu, 0x000000000000000Fu, 0x02, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #27 [ref=1x]
+  { 0x000000000000000Fu, 0x000000000000000Fu, 0x00, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #28 [ref=4x]
+  { 0x000000000000000Fu, 0x0000000000000000u, 0x01, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #29 [ref=13x]
+  { 0x000000000000000Fu, 0x0000000000000000u, 0x03, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #30 [ref=2x]
+  { 0x0000000000000000u, 0x000000000000000Fu, 0x03, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #31 [ref=1x]
+  { 0x000000000000000Fu, 0x000000000000000Fu, 0x01, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #32 [ref=1x]
+  { 0x0000000000000000u, 0x00000000000000FFu, 0x02, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #33 [ref=1x]
+  { 0x00000000000000FFu, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #34 [ref=1x]
+  { 0x0000000000000000u, 0x00000000000000FFu, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #35 [ref=82x]
+  { 0x0000000000000000u, 0x00000000000000FFu, 0xFF, 0, { 0 }, OpRWFlags::kWrite }, // #36 [ref=6x]
+  { 0x0000000000000000u, 0x000000000000000Fu, 0xFF, 0, { 0 }, OpRWFlags::kWrite }, // #37 [ref=6x]
+  { 0x0000000000000000u, 0x0000000000000003u, 0x02, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kRegPhysId }, // #38 [ref=1x]
+  { 0x0000000000000003u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #39 [ref=1x]
+  { 0x0000000000000001u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #40 [ref=28x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x02, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kRegPhysId | OpRWFlags::kZExt }, // #41 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kRegPhysId | OpRWFlags::kZExt }, // #42 [ref=3x]
+  { 0xFFFFFFFFFFFFFFFFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #43 [ref=29x]
+  { 0x0000000000000000u, 0x000000000000000Fu, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #44 [ref=30x]
+  { 0x00000000000003FFu, 0x00000000000003FFu, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #45 [ref=22x]
+  { 0x00000000000003FFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #46 [ref=13x]
+  { 0x0000000000000000u, 0x00000000000003FFu, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #47 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000003u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #48 [ref=17x]
+  { 0x0000000000000000u, 0x0000000000000003u, 0x00, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kRegPhysId | OpRWFlags::kZExt }, // #49 [ref=2x]
+  { 0x0000000000000000u, 0x000000000000000Fu, 0x00, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #50 [ref=8x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kRegPhysId | OpRWFlags::kZExt }, // #51 [ref=2x]
+  { 0x0000000000000003u, 0x0000000000000000u, 0x02, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #52 [ref=4x]
+  { 0x000000000000000Fu, 0x000000000000000Fu, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #53 [ref=4x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x07, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kMemPhysId }, // #54 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x01, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #55 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000001u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #56 [ref=14x]
+  { 0x0000000000000000u, 0x0000000000000001u, 0x00, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kRegPhysId }, // #57 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x01, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kRegPhysId | OpRWFlags::kZExt }, // #58 [ref=3x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x07, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kMemPhysId }, // #59 [ref=3x]
+  { 0x000000000000000Fu, 0x0000000000000000u, 0x02, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #60 [ref=22x]
+  { 0x000000000000FF00u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #61 [ref=23x]
+  { 0x0000000000000000u, 0x000000000000FF00u, 0xFF, 0, { 0 }, OpRWFlags::kWrite }, // #62 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x07, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kMemBaseRW | OpRWFlags::kMemBasePostModify | OpRWFlags::kMemPhysId }, // #63 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x02, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kRegPhysId | OpRWFlags::kZExt }, // #64 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x02, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #65 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x06, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kMemPhysId }, // #66 [ref=1x]
+  { 0x0000000000000000u, 0x000000000000000Fu, 0x01, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #67 [ref=5x]
+  { 0x0000000000000000u, 0x000000000000FFFFu, 0x00, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #68 [ref=4x]
+  { 0x0000000000000000u, 0x0000000000000007u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #69 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x04, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #70 [ref=1x]
+  { 0x0000000000000001u, 0x0000000000000000u, 0x01, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #71 [ref=10x]
+  { 0x0000000000000001u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #72 [ref=1x]
+  { 0x0000000000000000u, 0x0000000000000001u, 0xFF, 0, { 0 }, OpRWFlags::kWrite }, // #73 [ref=30x]
+  { 0x0000000000000000u, 0xFFFFFFFFFFFFFFFFu, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #74 [ref=20x]
+  { 0xFFFFFFFFFFFFFFFFu, 0xFFFFFFFFFFFFFFFFu, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #75 [ref=7x]
+  { 0xFFFFFFFFFFFFFFFFu, 0x0000000000000000u, 0xFF, 4, { 0 }, OpRWFlags::kRead }, // #76 [ref=4x]
+  { 0xFFFFFFFFFFFFFFFFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kConsecutive }, // #77 [ref=12x]
+  { 0x000000000000FFFFu, 0x0000000000000000u, 0xFF, 4, { 0 }, OpRWFlags::kRead }, // #78 [ref=2x]
+  { 0x000000000000FFFFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kConsecutive }, // #79 [ref=6x]
+  { 0x0000000000000000u, 0x00000000FFFFFFFFu, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #80 [ref=10x]
+  { 0x00000000FFFFFFFFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #81 [ref=16x]
+  { 0x000000000000FFF0u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #82 [ref=18x]
+  { 0x000000000000FFFCu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #83 [ref=8x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt | OpRWFlags::kRegPhysId }, // #84 [ref=1x]
+  { 0x0000000000000000u, 0x00000000000000FFu, 0xFF, 2, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #85 [ref=2x]
+  { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt | OpRWFlags::kConsecutive }  // #86 [ref=2x]
+};
+
+const InstDB::RWInfoRm InstDB::rwInfoRm[] = {
+  { InstDB::RWInfoRm::kCategoryNone      , 0x00, 0 , 0, 0 }, // #0 [ref=1997x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x03, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #1 [ref=8x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , 0, 0 }, // #2 [ref=204x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 16, 0, 0 }, // #3 [ref=122x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 8 , 0, 0 }, // #4 [ref=66x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 4 , 0, 0 }, // #5 [ref=35x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x04, 0 , 0, 0 }, // #6 [ref=300x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 2 , 0, 0 }, // #7 [ref=9x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 0 , 0, 0 }, // #8 [ref=63x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 0 , 0, 0 }, // #9 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x01, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #10 [ref=21x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x01, 0 , 0, 0 }, // #11 [ref=14x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 8 , 0, 0 }, // #12 [ref=22x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 16, 0, 0 }, // #13 [ref=21x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #14 [ref=22x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 1 , 0, 0 }, // #15 [ref=5x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 64, 0, 0 }, // #16 [ref=5x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 4 , 0, 0 }, // #17 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryNone      , 0x00, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #18 [ref=26x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 10, 0, 0 }, // #19 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryNone      , 0x01, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #20 [ref=5x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 2 , 0, 0 }, // #21 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x06, 0 , 0, 0 }, // #22 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 1 , 0, 0 }, // #23 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 4 , 0, 0 }, // #24 [ref=3x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 8 , 0, 0 }, // #25 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 2 , 0, 0 }, // #26 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 2 , 0, 0 }, // #27 [ref=13x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 4 , 0, 0 }, // #28 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryNone      , 0x03, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #29 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 16, 0, 0 }, // #30 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 8 , InstDB::RWInfoRm::kFlagMovssMovsd, 0 }, // #31 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryNone      , 0x00, 0 , InstDB::RWInfoRm::kFlagMovssMovsd, 0 }, // #32 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 4 , InstDB::RWInfoRm::kFlagMovssMovsd, 0 }, // #33 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 1 , 0, 0 }, // #34 [ref=32x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 8 , 0, 0 }, // #35 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryNone      , 0x00, 0 , InstDB::RWInfoRm::kFlagPextrw, 0 }, // #36 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 2 , InstDB::RWInfoRm::kFlagPextrw, uint32_t(CpuFeatures::X86::kSSE4_1) }, // #37 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryNone      , 0x02, 0 , 0, 0 }, // #38 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 2 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #39 [ref=3x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x04, 8 , 0, 0 }, // #40 [ref=35x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x04, 2 , 0, 0 }, // #41 [ref=30x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x04, 4 , 0, 0 }, // #42 [ref=42x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x00, 32, 0, 0 }, // #43 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 8 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #44 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 4 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #45 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryHalf      , 0x02, 0 , 0, 0 }, // #46 [ref=19x]
+  { InstDB::RWInfoRm::kCategoryQuarter   , 0x02, 0 , 0, 0 }, // #47 [ref=9x]
+  { InstDB::RWInfoRm::kCategoryHalf      , 0x01, 0 , 0, 0 }, // #48 [ref=10x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x04, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #49 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x04, 16, 0, 0 }, // #50 [ref=27x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x02, 64, 0, 0 }, // #51 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 16, 0, 0 }, // #52 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x01, 32, 0, 0 }, // #53 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x0C, 0 , 0, 0 }, // #54 [ref=15x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x0C, 8 , 0, 0 }, // #55 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x0C, 4 , 0, 0 }, // #56 [ref=4x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x04, 32, 0, 0 }, // #57 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x03, 0 , 0, 0 }, // #58 [ref=13x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x03, 8 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #59 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x08, 0 , 0, 0 }, // #60 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x04, 1 , 0, 0 }, // #61 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryQuarter   , 0x01, 0 , 0, 0 }, // #62 [ref=6x]
+  { InstDB::RWInfoRm::kCategoryEighth    , 0x01, 0 , 0, 0 }, // #63 [ref=3x]
+  { InstDB::RWInfoRm::kCategoryEighth    , 0x02, 0 , 0, 0 }, // #64 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x0C, 16, 0, 0 }, // #65 [ref=1x]
+  { InstDB::RWInfoRm::kCategoryFixed     , 0x06, 16, 0, 0 }, // #66 [ref=12x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x06, 0 , InstDB::RWInfoRm::kFlagFeatureIfRMI, uint32_t(CpuFeatures::X86::kAVX512_F) }, // #67 [ref=5x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , InstDB::RWInfoRm::kFlagFeatureIfRMI, uint32_t(CpuFeatures::X86::kAVX512_BW) }, // #68 [ref=2x]
+  { InstDB::RWInfoRm::kCategoryConsistent, 0x06, 0 , InstDB::RWInfoRm::kFlagFeatureIfRMI, uint32_t(CpuFeatures::X86::kAVX512_BW) }  // #69 [ref=3x]
+};
+// ----------------------------------------------------------------------------
+// ${InstRWInfoTable:End}
+
+// x86::InstDB - Tests
+// ===================
+
+#if defined(ASMJIT_TEST)
+UNIT(x86_inst_db) {
+  INFO("Checking validity of Inst enums");
+
+  // Cross-validate prefixes.
+  EXPECT(uint32_t(InstOptions::kX86_Rex ) == 0x40000000u, "REX prefix must be at 0x40000000");
+  EXPECT(uint32_t(InstOptions::kX86_Evex) == 0x00001000u, "EVEX prefix must be at 0x00001000");
+
+  // These could be combined together to form a valid REX prefix, they must match.
+  EXPECT(uint32_t(InstOptions::kX86_OpCodeB) == uint32_t(Opcode::kB), "Opcode::kB must match InstOptions::kX86_OpCodeB");
+  EXPECT(uint32_t(InstOptions::kX86_OpCodeX) == uint32_t(Opcode::kX), "Opcode::kX must match InstOptions::kX86_OpCodeX");
+  EXPECT(uint32_t(InstOptions::kX86_OpCodeR) == uint32_t(Opcode::kR), "Opcode::kR must match InstOptions::kX86_OpCodeR");
+  EXPECT(uint32_t(InstOptions::kX86_OpCodeW) == uint32_t(Opcode::kW), "Opcode::kW must match InstOptions::kX86_OpCodeW");
+
+  uint32_t rex_rb = (Opcode::kR >> Opcode::kREX_Shift) | (Opcode::kB >> Opcode::kREX_Shift) | 0x40;
+  uint32_t rex_rw = (Opcode::kR >> Opcode::kREX_Shift) | (Opcode::kW >> Opcode::kREX_Shift) | 0x40;
+
+  EXPECT(rex_rb == 0x45, "Opcode::kR|B must form a valid REX prefix (0x45) if combined with 0x40");
+  EXPECT(rex_rw == 0x4C, "Opcode::kR|W must form a valid REX prefix (0x4C) if combined with 0x40");
+}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
diff --git a/lib/lepton/asmjit/x86/x86instdb.h b/lib/lepton/asmjit/x86/x86instdb.h
new file mode 100644
index 0000000000..87a286c282
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86instdb.h
@@ -0,0 +1,563 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86INSTDB_H_INCLUDED
+#define ASMJIT_X86_X86INSTDB_H_INCLUDED
+
+#include "../x86/x86globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! Instruction database (X86).
+namespace InstDB {
+
+//! Describes which operation mode is supported by an instruction.
+enum class Mode : uint8_t {
+  //! Invalid mode.
+  kNone = 0x00u,
+  //! X86 mode supported.
+  kX86 = 0x01u,
+  //! X64 mode supported.
+  kX64 = 0x02u,
+  //! Both X86 and X64 modes supported.
+  kAny = 0x03u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(Mode)
+
+//! Converts architecture to operation mode, see \ref Mode.
+static constexpr Mode modeFromArch(Arch arch) noexcept {
+  return arch == Arch::kX86 ? Mode::kX86 :
+         arch == Arch::kX64 ? Mode::kX64 : Mode::kNone;
+}
+
+//! Operand signature flags used by \ref OpSignature.
+enum class OpFlags : uint64_t {
+  //! No operand flags.
+  kNone = 0u,
+
+  kRegGpbLo        = 0x0000000000000001u, //!< Operand can be low 8-bit GPB register.
+  kRegGpbHi        = 0x0000000000000002u, //!< Operand can be high 8-bit GPB register.
+  kRegGpw          = 0x0000000000000004u, //!< Operand can be 16-bit GPW register.
+  kRegGpd          = 0x0000000000000008u, //!< Operand can be 32-bit GPD register.
+  kRegGpq          = 0x0000000000000010u, //!< Operand can be 64-bit GPQ register.
+  kRegXmm          = 0x0000000000000020u, //!< Operand can be 128-bit XMM register.
+  kRegYmm          = 0x0000000000000040u, //!< Operand can be 256-bit YMM register.
+  kRegZmm          = 0x0000000000000080u, //!< Operand can be 512-bit ZMM register.
+  kRegMm           = 0x0000000000000100u, //!< Operand can be 64-bit MM register.
+  kRegKReg         = 0x0000000000000200u, //!< Operand can be 64-bit K register.
+  kRegSReg         = 0x0000000000000400u, //!< Operand can be SReg (segment register).
+  kRegCReg         = 0x0000000000000800u, //!< Operand can be CReg (control register).
+  kRegDReg         = 0x0000000000001000u, //!< Operand can be DReg (debug register).
+  kRegSt           = 0x0000000000002000u, //!< Operand can be 80-bit ST register (X87).
+  kRegBnd          = 0x0000000000004000u, //!< Operand can be 128-bit BND register.
+  kRegTmm          = 0x0000000000008000u, //!< Operand can be 0..8192-bit TMM register.
+  kRegMask         = 0x000000000000FFFFu, //!< Mask of all possible register types.
+
+  kMemUnspecified  = 0x0000000000040000u, //!< Operand can be a scalar memory pointer without size.
+  kMem8            = 0x0000000000080000u, //!< Operand can be an 8-bit memory pointer.
+  kMem16           = 0x0000000000100000u, //!< Operand can be a 16-bit memory pointer.
+  kMem32           = 0x0000000000200000u, //!< Operand can be a 32-bit memory pointer.
+  kMem48           = 0x0000000000400000u, //!< Operand can be a 48-bit memory pointer (FAR pointers only).
+  kMem64           = 0x0000000000800000u, //!< Operand can be a 64-bit memory pointer.
+  kMem80           = 0x0000000001000000u, //!< Operand can be an 80-bit memory pointer.
+  kMem128          = 0x0000000002000000u, //!< Operand can be a 128-bit memory pointer.
+  kMem256          = 0x0000000004000000u, //!< Operand can be a 256-bit memory pointer.
+  kMem512          = 0x0000000008000000u, //!< Operand can be a 512-bit memory pointer.
+  kMem1024         = 0x0000000010000000u, //!< Operand can be a 1024-bit memory pointer.
+  kMemMask         = 0x000000001FFC0000u, //!< Mask of all possible scalar memory types.
+
+  kVm32x           = 0x0000000040000000u, //!< Operand can be a vm32x (vector) pointer.
+  kVm32y           = 0x0000000080000000u, //!< Operand can be a vm32y (vector) pointer.
+  kVm32z           = 0x0000000100000000u, //!< Operand can be a vm32z (vector) pointer.
+  kVm64x           = 0x0000000200000000u, //!< Operand can be a vm64x (vector) pointer.
+  kVm64y           = 0x0000000400000000u, //!< Operand can be a vm64y (vector) pointer.
+  kVm64z           = 0x0000000800000000u, //!< Operand can be a vm64z (vector) pointer.
+  kVmMask          = 0x0000000FC0000000u, //!< Mask of all possible vector memory types.
+
+  kImmI4           = 0x0000001000000000u, //!< Operand can be signed 4-bit immediate.
+  kImmU4           = 0x0000002000000000u, //!< Operand can be unsigned 4-bit immediate.
+  kImmI8           = 0x0000004000000000u, //!< Operand can be signed 8-bit immediate.
+  kImmU8           = 0x0000008000000000u, //!< Operand can be unsigned 8-bit immediate.
+  kImmI16          = 0x0000010000000000u, //!< Operand can be signed 16-bit immediate.
+  kImmU16          = 0x0000020000000000u, //!< Operand can be unsigned 16-bit immediate.
+  kImmI32          = 0x0000040000000000u, //!< Operand can be signed 32-bit immediate.
+  kImmU32          = 0x0000080000000000u, //!< Operand can be unsigned 32-bit immediate.
+  kImmI64          = 0x0000100000000000u, //!< Operand can be signed 64-bit immediate.
+  kImmU64          = 0x0000200000000000u, //!< Operand can be unsigned 64-bit immediate.
+  kImmMask         = 0x00003FF000000000u, //!< Mask of all immediate types.
+
+  kRel8            = 0x0000400000000000u, //!< Operand can be relative 8-bit  displacement.
+  kRel32           = 0x0000800000000000u, //!< Operand can be relative 32-bit displacement.
+  kRelMask         = 0x0000C00000000000u, //!< Mask of all relative displacement types.
+
+  kFlagMemBase     = 0x0001000000000000u, //!< Flag: Only memory base is allowed (no index, no offset).
+  kFlagMemDs       = 0x0002000000000000u, //!< Flag: Implicit memory operand's DS segment.
+  kFlagMemEs       = 0x0004000000000000u, //!< Flag: Implicit memory operand's ES segment.
+
+  kFlagMib         = 0x0008000000000000u, //!< Flag: Operand is MIB (base+index) pointer.
+  kFlagTMem        = 0x0010000000000000u, //!< Flag: Operand is TMEM (sib_mem), AMX memory pointer.
+
+  kFlagImplicit    = 0x0080000000000000u, //!< Flag: Operand is implicit.
+  kFlagMask        = 0x009F000000000000u, //!< Mask of all flags.
+
+  //! Contains mask of all registers, memory operands, immediate operands, and displacement operands.
+  kOpMask          = kRegMask | kMemMask | kVmMask | kImmMask | kRelMask
+};
+ASMJIT_DEFINE_ENUM_FLAGS(OpFlags)
+
+//! Operand signature.
+//!
+//! Contains all possible operand combinations, memory size information, and a fixed register id (or `BaseReg::kIdBad`
+//! if fixed id isn't required).
+struct OpSignature {
+  //! \name Members
+  //! \{
+
+  uint64_t _flags : 56;
+  uint64_t _regMask : 8;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns operand signature flags.
+  inline OpFlags flags() const noexcept { return (OpFlags)_flags; }
+
+  //! Tests whether the given `flag` is set.
+  inline bool hasFlag(OpFlags flag) const noexcept { return (_flags & uint64_t(flag)) != 0; }
+
+  //! Tests whether this signature contains at least one register operand of any type.
+  inline bool hasReg() const noexcept { return hasFlag(OpFlags::kRegMask); }
+  //! Tests whether this signature contains at least one scalar memory operand of any type.
+  inline bool hasMem() const noexcept { return hasFlag(OpFlags::kMemMask); }
+  //! Tests whether this signature contains at least one vector memory operand of any type.
+  inline bool hasVm() const noexcept { return hasFlag(OpFlags::kVmMask); }
+  //! Tests whether this signature contains at least one immediate operand of any type.
+  inline bool hasImm() const noexcept { return hasFlag(OpFlags::kImmMask); }
+  //! Tests whether this signature contains at least one relative displacement operand of any type.
+  inline bool hasRel() const noexcept { return hasFlag(OpFlags::kRelMask); }
+
+  //! Tests whether the operand is implicit.
+  inline bool isImplicit() const noexcept { return hasFlag(OpFlags::kFlagImplicit); }
+
+  //! Returns a physical register mask.
+  inline RegMask regMask() const noexcept { return _regMask; }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const OpSignature _opSignatureTable[];
+
+//! Instruction signature.
+//!
+//! Contains a sequence of operands' combinations and other metadata that defines a single instruction. This data is
+//! used by instruction validator.
+struct InstSignature {
+  //! \name Members
+  //! \{
+
+  //! Count of operands in `opIndex` (0..6).
+  uint8_t _opCount : 3;
+  //! Architecture modes supported (X86 / X64).
+  uint8_t _mode : 2;
+  //! Number of implicit operands.
+  uint8_t _implicitOpCount : 3;
+  //! Reserved for future use.
+  uint8_t _reserved;
+  //! Indexes to `OpSignature` table.
+  uint8_t _opSignatureIndexes[Globals::kMaxOpCount];
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns instruction operation mode.
+  inline Mode mode() const noexcept { return (Mode)_mode; }
+  //! Tests whether the instruction supports the given operating mode.
+  inline bool supportsMode(Mode mode) const noexcept { return (uint8_t(_mode) & uint8_t(mode)) != 0; }
+
+  //! Returns the number of operands of this signature.
+  inline uint32_t opCount() const noexcept { return _opCount; }
+  //! Returns the number of implicit operands this signature has.
+  inline uint32_t implicitOpCount() const noexcept { return _implicitOpCount; }
+  //! Tests whether this instruction signature has at least one implicit operand.
+  inline bool hasImplicitOperands() const noexcept { return _implicitOpCount != 0; }
+
+  //! Returns indexes to \ref _opSignatureTable for each operand of the instruction.
+  //!
+  //! \note The returned array always provides indexes for all operands (see \ref Globals::kMaxOpCount) even if the
+  //! instruction provides less operands. Undefined operands have always index of zero.
+  inline const uint8_t* opSignatureIndexes() const noexcept { return _opSignatureIndexes; }
+
+  //! Returns index to \ref _opSignatureTable, corresponding to the requested operand `index` of the instruction.
+  inline uint8_t opSignatureIndex(size_t index) const noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxOpCount);
+    return _opSignatureIndexes[index];
+  }
+
+  //! Returns \ref OpSignature corresponding to the requested operand `index` of the instruction.
+  inline const OpSignature& opSignature(size_t index) const noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxOpCount);
+    return _opSignatureTable[_opSignatureIndexes[index]];
+  }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const InstSignature _instSignatureTable[];
+
+//! Instruction flags.
+//!
+//! Details about instruction encoding, operation, features, and some limitations.
+enum class InstFlags : uint32_t {
+  //! No flags.
+  kNone = 0x00000000u,
+
+  // Instruction Family
+  // ------------------
+  //
+  // Instruction family information.
+
+  //! Instruction that accesses FPU registers.
+  kFpu = 0x00000100u,
+  //! Instruction that accesses MMX registers (including 3DNOW and GEODE) and EMMS.
+  kMmx = 0x00000200u,
+  //! Instruction that accesses XMM registers (SSE, AVX, AVX512).
+  kVec = 0x00000400u,
+
+  // FPU Flags
+  // ---------
+  //
+  // Used to tell the encoder which memory operand sizes are encodable.
+
+  //! FPU instruction can address `word_ptr` (shared with M80).
+  kFpuM16 = 0x00000800u,
+  //! FPU instruction can address `dword_ptr`.
+  kFpuM32 = 0x00001000u,
+  //! FPU instruction can address `qword_ptr`.
+  kFpuM64 = 0x00002000u,
+  //! FPU instruction can address `tword_ptr` (shared with M16).
+  kFpuM80 = 0x00000800u,
+
+  // Prefixes and Encoding Flags
+  // ---------------------------
+  //
+  // These describe optional X86 prefixes that can be used to change the instruction's operation.
+
+  //! Instruction can be prefixed with using the REP(REPE) or REPNE prefix.
+  kRep = 0x00004000u,
+  //! Rep prefix is accepted, but it has no effect other than being emitted with the instruction (as an extra byte).
+  kRepIgnored = 0x00008000u,
+  //! Instruction can be prefixed with using the LOCK prefix.
+  kLock = 0x00010000u,
+  //! Instruction can be prefixed with using the XACQUIRE prefix.
+  kXAcquire = 0x00020000u,
+  //! Instruction can be prefixed with using the XRELEASE prefix.
+  kXRelease = 0x00040000u,
+  //! Instruction uses MIB (BNDLDX|BNDSTX) to encode two registers.
+  kMib = 0x00080000u,
+  //! Instruction uses VSIB instead of legacy SIB.
+  kVsib = 0x00100000u,
+  //! Instruction uses TSIB (or SIB_MEM) encoding (MODRM followed by SIB).
+  kTsib = 0x00200000u,
+
+  // If both `kPrefixVex` and `kPrefixEvex` flags are specified it means that the instructions can be encoded
+  // by either VEX or EVEX prefix. In that case AsmJit checks global options and also instruction options to decide
+  // whether to emit VEX or EVEX prefix.
+
+  //! Instruction can be encoded by VEX|XOP (AVX|AVX2|BMI|XOP|...).
+  kVex = 0x00400000u,
+  //! Instruction can be encoded by EVEX (AVX512).
+  kEvex = 0x00800000u,
+  //! EVEX encoding is preferred over VEX encoding (AVX515_VNNI vs AVX_VNNI).
+  kPreferEvex = 0x01000000u,
+  //! EVEX and VEX signatures are compatible.
+  kEvexCompat = 0x02000000u,
+  //! EVEX instruction requires K register in the first operand (compare instructions).
+  kEvexKReg = 0x04000000u,
+  //! EVEX instruction requires two operands and K register as a selector (gather instructions).
+  kEvexTwoOp = 0x08000000u,
+  //! VEX instruction that can be transformed to a compatible EVEX instruction.
+  kEvexTransformable = 0x10000000u,
+
+  // Other Flags
+  // -----------
+
+  //! Instruction uses consecutive registers.
+  //!
+  //! Used by V4FMADDPS, V4FMADDSS, V4FNMADDPS, V4FNMADDSS, VP4DPWSSD, VP4DPWSSDS, VP2INTERSECTD, and VP2INTERSECTQ
+  //! instructions
+  kConsecutiveRegs = 0x20000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(InstFlags)
+
+//! AVX-512 flags.
+enum class Avx512Flags : uint32_t {
+  //! No AVX-512 flags.
+  kNone = 0,
+
+  //! Internally used in tables, has no meaning.
+  k_ = 0x00000000u,
+  //! Supports masking {k1..k7}.
+  kK = 0x00000001u,
+  //! Supports zeroing {z}, must be used together with `kAvx512k`.
+  kZ = 0x00000002u,
+  //! Supports 'embedded-rounding' {er} with implicit {sae},
+  kER = 0x00000004u,
+  //! Supports 'suppress-all-exceptions' {sae}.
+  kSAE = 0x00000008u,
+  //! Supports 16-bit broadcast 'b16'.
+  kB16 = 0x00000010u,
+  //! Supports 32-bit broadcast 'b32'.
+  kB32 = 0x00000020u,
+  //! Supports 64-bit broadcast 'b64'.
+  kB64 = 0x00000040u,
+  //! Operates on a vector of consecutive registers (AVX512_4FMAPS and AVX512_4VNNIW).
+  kT4X = 0x00000080u,
+
+  //! Implicit zeroing if {k} masking is used. Using {z} is not valid in this case as it's implicit.
+  kImplicitZ = 0x00000100,
+};
+ASMJIT_DEFINE_ENUM_FLAGS(Avx512Flags)
+
+//! Instruction common information.
+//!
+//! Aggregated information shared across one or more instruction.
+struct CommonInfo {
+  //! Instruction flags.
+  uint32_t _flags;
+  //! Reserved for future use.
+  uint32_t _avx512Flags : 11;
+  //! First `InstSignature` entry in the database.
+  uint32_t _iSignatureIndex : 11;
+  //! Number of relevant `ISignature` entries.
+  uint32_t _iSignatureCount : 5;
+  //! Instruction control flow category, see \ref InstControlFlow.
+  uint32_t _controlFlow : 3;
+  //! Specifies what happens if all source operands share the same register.
+  uint32_t _sameRegHint : 2;
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns instruction flags.
+  inline InstFlags flags() const noexcept { return (InstFlags)_flags; }
+  //! Tests whether the instruction has a `flag`.
+  inline bool hasFlag(InstFlags flag) const noexcept { return Support::test(_flags, flag); }
+
+  //! Returns instruction AVX-512 flags.
+  inline Avx512Flags avx512Flags() const noexcept { return (Avx512Flags)_avx512Flags; }
+  //! Tests whether the instruction has an AVX-512 `flag`.
+  inline bool hasAvx512Flag(Avx512Flags flag) const noexcept { return Support::test(_avx512Flags, flag); }
+
+  //! Tests whether the instruction is FPU instruction.
+  inline bool isFpu() const noexcept { return hasFlag(InstFlags::kFpu); }
+  //! Tests whether the instruction is MMX/3DNOW instruction that accesses MMX registers (includes EMMS and FEMMS).
+  inline bool isMmx() const noexcept { return hasFlag(InstFlags::kMmx); }
+  //! Tests whether the instruction is SSE|AVX|AVX512 instruction that accesses XMM|YMM|ZMM registers.
+  inline bool isVec() const noexcept { return hasFlag(InstFlags::kVec); }
+  //! Tests whether the instruction is SSE+ (SSE4.2, AES, SHA included) instruction that accesses XMM registers.
+  inline bool isSse() const noexcept { return (flags() & (InstFlags::kVec | InstFlags::kVex | InstFlags::kEvex)) == InstFlags::kVec; }
+  //! Tests whether the instruction is AVX+ (FMA included) instruction that accesses XMM|YMM|ZMM registers.
+  inline bool isAvx() const noexcept { return isVec() && isVexOrEvex(); }
+
+  //! Tests whether the instruction can be prefixed with LOCK prefix.
+  inline bool hasLockPrefix() const noexcept { return hasFlag(InstFlags::kLock); }
+  //! Tests whether the instruction can be prefixed with REP (REPE|REPZ) prefix.
+  inline bool hasRepPrefix() const noexcept { return hasFlag(InstFlags::kRep); }
+  //! Tests whether the instruction can be prefixed with XACQUIRE prefix.
+  inline bool hasXAcquirePrefix() const noexcept { return hasFlag(InstFlags::kXAcquire); }
+  //! Tests whether the instruction can be prefixed with XRELEASE prefix.
+  inline bool hasXReleasePrefix() const noexcept { return hasFlag(InstFlags::kXRelease); }
+
+  //! Tests whether the rep prefix is supported by the instruction, but ignored (has no effect).
+  inline bool isRepIgnored() const noexcept { return hasFlag(InstFlags::kRepIgnored); }
+  //! Tests whether the instruction uses MIB.
+  inline bool isMibOp() const noexcept { return hasFlag(InstFlags::kMib); }
+  //! Tests whether the instruction uses VSIB.
+  inline bool isVsibOp() const noexcept { return hasFlag(InstFlags::kVsib); }
+  //! Tests whether the instruction uses TSIB (AMX, instruction requires MOD+SIB).
+  inline bool isTsibOp() const noexcept { return hasFlag(InstFlags::kTsib); }
+  //! Tests whether the instruction uses VEX (can be set together with EVEX if both are encodable).
+  inline bool isVex() const noexcept { return hasFlag(InstFlags::kVex); }
+  //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
+  inline bool isEvex() const noexcept { return hasFlag(InstFlags::kEvex); }
+  //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
+  inline bool isVexOrEvex() const noexcept { return hasFlag(InstFlags::kVex | InstFlags::kEvex); }
+
+  //! Tests whether the instruction should prefer EVEX prefix instead of VEX prefix.
+  inline bool preferEvex() const noexcept { return hasFlag(InstFlags::kPreferEvex); }
+
+  inline bool isEvexCompatible() const noexcept { return hasFlag(InstFlags::kEvexCompat); }
+  inline bool isEvexKRegOnly() const noexcept { return hasFlag(InstFlags::kEvexKReg); }
+  inline bool isEvexTwoOpOnly() const noexcept { return hasFlag(InstFlags::kEvexTwoOp); }
+  inline bool isEvexTransformable() const noexcept { return hasFlag(InstFlags::kEvexTransformable); }
+
+  //! Tests whether the instruction supports AVX512 masking {k}.
+  inline bool hasAvx512K() const noexcept { return hasAvx512Flag(Avx512Flags::kK); }
+  //! Tests whether the instruction supports AVX512 zeroing {k}{z}.
+  inline bool hasAvx512Z() const noexcept { return hasAvx512Flag(Avx512Flags::kZ); }
+  //! Tests whether the instruction supports AVX512 embedded-rounding {er}.
+  inline bool hasAvx512ER() const noexcept { return hasAvx512Flag(Avx512Flags::kER); }
+  //! Tests whether the instruction supports AVX512 suppress-all-exceptions {sae}.
+  inline bool hasAvx512SAE() const noexcept { return hasAvx512Flag(Avx512Flags::kSAE); }
+  //! Tests whether the instruction supports AVX512 broadcast (either 32-bit or 64-bit).
+  inline bool hasAvx512B() const noexcept { return hasAvx512Flag(Avx512Flags::kB16 | Avx512Flags::kB32 | Avx512Flags::kB64); }
+  //! Tests whether the instruction supports AVX512 broadcast (16-bit).
+  inline bool hasAvx512B16() const noexcept { return hasAvx512Flag(Avx512Flags::kB16); }
+  //! Tests whether the instruction supports AVX512 broadcast (32-bit).
+  inline bool hasAvx512B32() const noexcept { return hasAvx512Flag(Avx512Flags::kB32); }
+  //! Tests whether the instruction supports AVX512 broadcast (64-bit).
+  inline bool hasAvx512B64() const noexcept { return hasAvx512Flag(Avx512Flags::kB64); }
+
+  // Returns the size of the broadcast - either 2, 4, or 8, or 0 if broadcast is not supported.
+  inline uint32_t broadcastSize() const noexcept {
+    constexpr uint32_t kShift = Support::ConstCTZ<uint32_t(Avx512Flags::kB16)>::value;
+    return (uint32_t(_avx512Flags) & uint32_t(Avx512Flags::kB16 | Avx512Flags::kB32 | Avx512Flags::kB64)) >> (kShift - 1);
+  }
+
+  inline uint32_t signatureIndex() const noexcept { return _iSignatureIndex; }
+  inline uint32_t signatureCount() const noexcept { return _iSignatureCount; }
+
+  inline const InstSignature* signatureData() const noexcept { return _instSignatureTable + _iSignatureIndex; }
+  inline const InstSignature* signatureEnd() const noexcept { return _instSignatureTable + _iSignatureIndex + _iSignatureCount; }
+
+  //! Returns a control flow category of the instruction.
+  inline InstControlFlow controlFlow() const noexcept { return (InstControlFlow)_controlFlow; }
+
+  //! Returns a hint that can be used when both inputs are the same register.
+  inline InstSameRegHint sameRegHint() const noexcept { return (InstSameRegHint)_sameRegHint; }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const CommonInfo _commonInfoTable[];
+
+//! Instruction information.
+struct InstInfo {
+  //! Index to \ref _nameData.
+  uint32_t _nameDataIndex : 14;
+  //! Index to \ref _commonInfoTable.
+  uint32_t _commonInfoIndex : 10;
+  //! Index to \ref _additionalInfoTable.
+  uint32_t _additionalInfoIndex : 8;
+
+  //! Instruction encoding (internal encoding identifier used by \ref Assembler).
+  uint8_t _encoding;
+  //! Main opcode value (0..255).
+  uint8_t _mainOpcodeValue;
+  //! Index to \ref _mainOpcodeTable` that is combined with \ref _mainOpcodeValue to form the final opcode.
+  uint8_t _mainOpcodeIndex;
+  //! Index to \ref _altOpcodeTable that contains a full alternative opcode.
+  uint8_t _altOpcodeIndex;
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns common information, see \ref CommonInfo.
+  inline const CommonInfo& commonInfo() const noexcept { return _commonInfoTable[_commonInfoIndex]; }
+
+  //! Returns instruction flags, see \ref Flags.
+  inline InstFlags flags() const noexcept { return commonInfo().flags(); }
+  //! Tests whether the instruction has flag `flag`, see \ref Flags.
+  inline bool hasFlag(InstFlags flag) const noexcept { return commonInfo().hasFlag(flag); }
+
+  //! Returns instruction AVX-512 flags, see \ref Avx512Flags.
+  inline Avx512Flags avx512Flags() const noexcept { return commonInfo().avx512Flags(); }
+  //! Tests whether the instruction has an AVX-512 `flag`, see \ref Avx512Flags.
+  inline bool hasAvx512Flag(Avx512Flags flag) const noexcept { return commonInfo().hasAvx512Flag(flag); }
+
+  //! Tests whether the instruction is FPU instruction.
+  inline bool isFpu() const noexcept { return commonInfo().isFpu(); }
+  //! Tests whether the instruction is MMX/3DNOW instruction that accesses MMX registers (includes EMMS and FEMMS).
+  inline bool isMmx() const noexcept { return commonInfo().isMmx(); }
+  //! Tests whether the instruction is SSE|AVX|AVX512 instruction that accesses XMM|YMM|ZMM registers.
+  inline bool isVec() const noexcept { return commonInfo().isVec(); }
+  //! Tests whether the instruction is SSE+ (SSE4.2, AES, SHA included) instruction that accesses XMM registers.
+  inline bool isSse() const noexcept { return commonInfo().isSse(); }
+  //! Tests whether the instruction is AVX+ (FMA included) instruction that accesses XMM|YMM|ZMM registers.
+  inline bool isAvx() const noexcept { return commonInfo().isAvx(); }
+
+  //! Tests whether the instruction can be prefixed with LOCK prefix.
+  inline bool hasLockPrefix() const noexcept { return commonInfo().hasLockPrefix(); }
+  //! Tests whether the instruction can be prefixed with REP (REPE|REPZ) prefix.
+  inline bool hasRepPrefix() const noexcept { return commonInfo().hasRepPrefix(); }
+  //! Tests whether the instruction can be prefixed with XACQUIRE prefix.
+  inline bool hasXAcquirePrefix() const noexcept { return commonInfo().hasXAcquirePrefix(); }
+  //! Tests whether the instruction can be prefixed with XRELEASE prefix.
+  inline bool hasXReleasePrefix() const noexcept { return commonInfo().hasXReleasePrefix(); }
+
+  //! Tests whether the rep prefix is supported by the instruction, but ignored (has no effect).
+  inline bool isRepIgnored() const noexcept { return commonInfo().isRepIgnored(); }
+  //! Tests whether the instruction uses MIB.
+  inline bool isMibOp() const noexcept { return hasFlag(InstFlags::kMib); }
+  //! Tests whether the instruction uses VSIB.
+  inline bool isVsibOp() const noexcept { return hasFlag(InstFlags::kVsib); }
+  //! Tests whether the instruction uses VEX (can be set together with EVEX if both are encodable).
+  inline bool isVex() const noexcept { return hasFlag(InstFlags::kVex); }
+  //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
+  inline bool isEvex() const noexcept { return hasFlag(InstFlags::kEvex); }
+  //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
+  inline bool isVexOrEvex() const noexcept { return hasFlag(InstFlags::kVex | InstFlags::kEvex); }
+
+  inline bool isEvexCompatible() const noexcept { return hasFlag(InstFlags::kEvexCompat); }
+  inline bool isEvexKRegOnly() const noexcept { return hasFlag(InstFlags::kEvexKReg); }
+  inline bool isEvexTwoOpOnly() const noexcept { return hasFlag(InstFlags::kEvexTwoOp); }
+  inline bool isEvexTransformable() const noexcept { return hasFlag(InstFlags::kEvexTransformable); }
+
+  //! Tests whether the instruction supports AVX512 masking {k}.
+  inline bool hasAvx512K() const noexcept { return hasAvx512Flag(Avx512Flags::kK); }
+  //! Tests whether the instruction supports AVX512 zeroing {k}{z}.
+  inline bool hasAvx512Z() const noexcept { return hasAvx512Flag(Avx512Flags::kZ); }
+  //! Tests whether the instruction supports AVX512 embedded-rounding {er}.
+  inline bool hasAvx512ER() const noexcept { return hasAvx512Flag(Avx512Flags::kER); }
+  //! Tests whether the instruction supports AVX512 suppress-all-exceptions {sae}.
+  inline bool hasAvx512SAE() const noexcept { return hasAvx512Flag(Avx512Flags::kSAE); }
+  //! Tests whether the instruction supports AVX512 broadcast (either 32-bit or 64-bit).
+  inline bool hasAvx512B() const noexcept { return hasAvx512Flag(Avx512Flags::kB16 | Avx512Flags::kB32 | Avx512Flags::kB64); }
+  //! Tests whether the instruction supports AVX512 broadcast (16-bit).
+  inline bool hasAvx512B16() const noexcept { return hasAvx512Flag(Avx512Flags::kB16); }
+  //! Tests whether the instruction supports AVX512 broadcast (32-bit).
+  inline bool hasAvx512B32() const noexcept { return hasAvx512Flag(Avx512Flags::kB32); }
+  //! Tests whether the instruction supports AVX512 broadcast (64-bit).
+  inline bool hasAvx512B64() const noexcept { return hasAvx512Flag(Avx512Flags::kB64); }
+
+  //! Returns a control flow category of the instruction.
+  inline InstControlFlow controlFlow() const noexcept { return commonInfo().controlFlow(); }
+  //! Returns a hint that can be used when both inputs are the same register.
+  inline InstSameRegHint sameRegHint() const noexcept { return commonInfo().sameRegHint(); }
+
+  inline uint32_t signatureIndex() const noexcept { return commonInfo().signatureIndex(); }
+  inline uint32_t signatureCount() const noexcept { return commonInfo().signatureCount(); }
+
+  inline const InstSignature* signatureData() const noexcept { return commonInfo().signatureData(); }
+  inline const InstSignature* signatureEnd() const noexcept { return commonInfo().signatureEnd(); }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const InstInfo _instInfoTable[];
+
+static inline const InstInfo& infoById(InstId instId) noexcept {
+  ASMJIT_ASSERT(Inst::isDefinedId(instId));
+  return _instInfoTable[instId];
+}
+
+//! \cond INTERNAL
+static_assert(sizeof(OpSignature) == 8, "InstDB::OpSignature must be 8 bytes long");
+//! \endcond
+
+} // {InstDB}
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86INSTDB_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86instdb_p.h b/lib/lepton/asmjit/x86/x86instdb_p.h
new file mode 100644
index 0000000000..b8e12e16b4
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86instdb_p.h
@@ -0,0 +1,311 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86INSTDB_P_H_INCLUDED
+#define ASMJIT_X86_X86INSTDB_P_H_INCLUDED
+
+#include "../x86/x86instdb.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+namespace InstDB {
+
+//! Instruction encoding (X86).
+//!
+//! This is a specific identifier that is used by AsmJit to describe the way each instruction is encoded. Some
+//! encodings are special only for a single instruction as X86 instruction set contains a lot of legacy encodings,
+//! and some encodings describe a group of instructions that share some commons, like MMX, SSE, AVX, AVX512
+//! instructions, etc...
+enum EncodingId : uint32_t {
+  kEncodingNone = 0,                     //!< Never used.
+  kEncodingX86Op,                        //!< X86 [OP].
+  kEncodingX86Op_Mod11RM,                //!< X86 [OP] (opcode with ModRM byte where MOD must be 11b).
+  kEncodingX86Op_Mod11RM_I8,             //!< X86 [OP] (opcode with ModRM byte + 8-bit immediate).
+  kEncodingX86Op_xAddr,                  //!< X86 [OP] (implicit address in the first register operand).
+  kEncodingX86Op_xAX,                    //!< X86 [OP] (implicit or explicit '?AX' form).
+  kEncodingX86Op_xDX_xAX,                //!< X86 [OP] (implicit or explicit '?DX, ?AX' form).
+  kEncodingX86Op_MemZAX,                 //!< X86 [OP] (implicit or explicit '[EAX|RAX]' form).
+  kEncodingX86I_xAX,                     //!< X86 [I] (implicit or explicit '?AX' form).
+  kEncodingX86M,                         //!< X86 [M] (handles 2|4|8-bytes size).
+  kEncodingX86M_NoMemSize,               //!< X86 [M] (handles 2|4|8-bytes size, but doesn't consider memory size).
+  kEncodingX86M_NoSize,                  //!< X86 [M] (doesn't handle any size).
+  kEncodingX86M_GPB,                     //!< X86 [M] (handles single-byte size).
+  kEncodingX86M_GPB_MulDiv,              //!< X86 [M] (like GPB, handles implicit|explicit MUL|DIV|IDIV).
+  kEncodingX86M_Only,                    //!< X86 [M] (restricted to memory operand of any size).
+  kEncodingX86M_Only_EDX_EAX,            //!< X86 [M] (memory operand only, followed by implicit <edx> and <eax>).
+  kEncodingX86M_Nop,                     //!< X86 [M] (special case of NOP instruction).
+  kEncodingX86R_Native,                  //!< X86 [R] (register must be either 32-bit or 64-bit depending on arch).
+  kEncodingX86R_FromM,                   //!< X86 [R] - which specifies memory address.
+  kEncodingX86R32_EDX_EAX,               //!< X86 [R32] followed by implicit EDX and EAX.
+  kEncodingX86Rm,                        //!< X86 [RM] (doesn't handle single-byte size).
+  kEncodingX86Rm_Raw66H,                 //!< X86 [RM] (used by LZCNT, POPCNT, and TZCNT).
+  kEncodingX86Rm_NoSize,                 //!< X86 [RM] (doesn't add REX.W prefix if 64-bit reg is used).
+  kEncodingX86Mr,                        //!< X86 [MR] (doesn't handle single-byte size).
+  kEncodingX86Mr_NoSize,                 //!< X86 [MR] (doesn't handle any size).
+  kEncodingX86Arith,                     //!< X86 adc, add, and, cmp, or, sbb, sub, xor.
+  kEncodingX86Bswap,                     //!< X86 bswap.
+  kEncodingX86Bt,                        //!< X86 bt, btc, btr, bts.
+  kEncodingX86Call,                      //!< X86 call.
+  kEncodingX86Cmpxchg,                   //!< X86 [MR] cmpxchg.
+  kEncodingX86Cmpxchg8b_16b,             //!< X86 [MR] cmpxchg8b, cmpxchg16b.
+  kEncodingX86Crc,                       //!< X86 crc32.
+  kEncodingX86Enter,                     //!< X86 enter.
+  kEncodingX86Imul,                      //!< X86 imul.
+  kEncodingX86In,                        //!< X86 in.
+  kEncodingX86Ins,                       //!< X86 ins[b|q|d].
+  kEncodingX86IncDec,                    //!< X86 inc, dec.
+  kEncodingX86Int,                       //!< X86 int (interrupt).
+  kEncodingX86Jcc,                       //!< X86 jcc.
+  kEncodingX86JecxzLoop,                 //!< X86 jcxz, jecxz, jrcxz, loop, loope, loopne.
+  kEncodingX86Jmp,                       //!< X86 jmp.
+  kEncodingX86JmpRel,                    //!< X86 xbegin.
+  kEncodingX86LcallLjmp,                 //!< X86 lcall/ljmp.
+  kEncodingX86Lea,                       //!< X86 lea.
+  kEncodingX86Mov,                       //!< X86 mov (all possible cases).
+  kEncodingX86Movabs,                    //!< X86 movabs.
+  kEncodingX86MovsxMovzx,                //!< X86 movsx, movzx.
+  kEncodingX86MovntiMovdiri,             //!< X86 movnti/movdiri.
+  kEncodingX86EnqcmdMovdir64b,           //!< X86 enqcmd/enqcmds/movdir64b.
+  kEncodingX86Out,                       //!< X86 out.
+  kEncodingX86Outs,                      //!< X86 out[b|w|d].
+  kEncodingX86Push,                      //!< X86 push.
+  kEncodingX86Pop,                       //!< X86 pop.
+  kEncodingX86Ret,                       //!< X86 ret.
+  kEncodingX86Rot,                       //!< X86 rcl, rcr, rol, ror, sal, sar, shl, shr.
+  kEncodingX86Set,                       //!< X86 setcc.
+  kEncodingX86ShldShrd,                  //!< X86 shld, shrd.
+  kEncodingX86StrRm,                     //!< X86 lods.
+  kEncodingX86StrMr,                     //!< X86 scas, stos.
+  kEncodingX86StrMm,                     //!< X86 cmps, movs.
+  kEncodingX86Test,                      //!< X86 test.
+  kEncodingX86Xadd,                      //!< X86 xadd.
+  kEncodingX86Xchg,                      //!< X86 xchg.
+  kEncodingX86Fence,                     //!< X86 lfence, mfence, sfence.
+  kEncodingX86Bndmov,                    //!< X86 [RM|MR] (used by BNDMOV).
+  kEncodingFpuOp,                        //!< FPU [OP].
+  kEncodingFpuArith,                     //!< FPU fadd, fdiv, fdivr, fmul, fsub, fsubr.
+  kEncodingFpuCom,                       //!< FPU fcom, fcomp.
+  kEncodingFpuFldFst,                    //!< FPU fld, fst, fstp.
+  kEncodingFpuM,                         //!< FPU fiadd, ficom, ficomp, fidiv, fidivr, fild, fimul, fist, fistp, fisttp, fisub, fisubr.
+  kEncodingFpuR,                         //!< FPU fcmov, fcomi, fcomip, ffree, fucom, fucomi, fucomip, fucomp, fxch.
+  kEncodingFpuRDef,                      //!< FPU faddp, fdivp, fdivrp, fmulp, fsubp, fsubrp.
+  kEncodingFpuStsw,                      //!< FPU fnstsw, Fstsw.
+  kEncodingExtRm,                        //!< EXT [RM].
+  kEncodingExtRm_XMM0,                   //!< EXT [RM<XMM0>].
+  kEncodingExtRm_ZDI,                    //!< EXT [RM<ZDI>].
+  kEncodingExtRm_P,                      //!< EXT [RM] (propagates 66H if the instruction uses XMM register).
+  kEncodingExtRm_Wx,                     //!< EXT [RM] (propagates REX.W if GPQ is used or the second operand is GPQ/QWORD_PTR).
+  kEncodingExtRm_Wx_GpqOnly,             //!< EXT [RM] (propagates REX.W if the first operand is GPQ register).
+  kEncodingExtRmRi,                      //!< EXT [RM|RI].
+  kEncodingExtRmRi_P,                    //!< EXT [RM|RI] (propagates 66H if the instruction uses XMM register).
+  kEncodingExtRmi,                       //!< EXT [RMI].
+  kEncodingExtRmi_P,                     //!< EXT [RMI] (propagates 66H if the instruction uses XMM register).
+  kEncodingExtPextrw,                    //!< EXT pextrw.
+  kEncodingExtExtract,                   //!< EXT pextrb, pextrd, pextrq, extractps.
+  kEncodingExtMov,                       //!< EXT mov?? - #1:[MM|XMM, MM|XMM|Mem] #2:[MM|XMM|Mem, MM|XMM].
+  kEncodingExtMovbe,                     //!< EXT movbe.
+  kEncodingExtMovd,                      //!< EXT movd.
+  kEncodingExtMovq,                      //!< EXT movq.
+  kEncodingExtExtrq,                     //!< EXT extrq (SSE4A).
+  kEncodingExtInsertq,                   //!< EXT insrq (SSE4A).
+  kEncodingExt3dNow,                     //!< EXT [RMI] (3DNOW specific).
+  kEncodingVexOp,                        //!< VEX [OP].
+  kEncodingVexOpMod,                     //!< VEX [OP] with MODR/M.
+  kEncodingVexKmov,                      //!< VEX [RM|MR] (used by kmov[b|w|d|q]).
+  kEncodingVexR_Wx,                      //!< VEX|EVEX [R] (propagatex VEX.W if GPQ used).
+  kEncodingVexM,                         //!< VEX|EVEX [M].
+  kEncodingVexM_VM,                      //!< VEX|EVEX [M] (propagates VEX|EVEX.L, VSIB support).
+  kEncodingVexMr_Lx,                     //!< VEX|EVEX [MR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexMr_VM,                     //!< VEX|EVEX [MR] (VSIB support).
+  kEncodingVexMri,                       //!< VEX|EVEX [MRI].
+  kEncodingVexMri_Lx,                    //!< VEX|EVEX [MRI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexMri_Vpextrw,               //!< VEX|EVEX [MRI] (special case required by VPEXTRW instruction).
+  kEncodingVexRm,                        //!< VEX|EVEX [RM].
+  kEncodingVexRm_ZDI,                    //!< VEX|EVEX [RM<ZDI>].
+  kEncodingVexRm_Wx,                     //!< VEX|EVEX [RM] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRm_Lx,                     //!< VEX|EVEX [RM] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRm_Lx_Narrow,              //!< VEX|EVEX [RM] (the destination vector size is narrowed).
+  kEncodingVexRm_Lx_Bcst,                //!< VEX|EVEX [RM] (can handle broadcast r32/r64).
+  kEncodingVexRm_VM,                     //!< VEX|EVEX [RM] (propagates VEX|EVEX.L, VSIB support).
+  kEncodingVexRm_T1_4X,                  //!<     EVEX [RM] (used by NN instructions that use RM-T1_4X encoding).
+  kEncodingVexRmi,                       //!< VEX|EVEX [RMI].
+  kEncodingVexRmi_Wx,                    //!< VEX|EVEX [RMI] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRmi_Lx,                    //!< VEX|EVEX [RMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvm,                       //!< VEX|EVEX [RVM].
+  kEncodingVexRvm_Wx,                    //!< VEX|EVEX [RVM] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRvm_ZDX_Wx,                //!< VEX|EVEX [RVM<ZDX>] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRvm_Lx,                    //!< VEX|EVEX [RVM] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvm_Lx_KEvex,              //!< VEX|EVEX [RVM] (forces EVEX prefix if K register is used on destination).
+  kEncodingVexRvm_Lx_2xK,                //!< VEX|EVEX [RVM] (vp2intersectd/vp2intersectq).
+  kEncodingVexRvmr,                      //!< VEX|EVEX [RVMR].
+  kEncodingVexRvmr_Lx,                   //!< VEX|EVEX [RVMR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmi,                      //!< VEX|EVEX [RVMI].
+  kEncodingVexRvmi_KEvex,                //!< VEX|EVEX [RVMI] (forces EVEX prefix if K register is used on destination).
+  kEncodingVexRvmi_Lx,                   //!< VEX|EVEX [RVMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmi_Lx_KEvex,             //!< VEX|EVEX [RVMI] (forces EVEX prefix if K register is used on destination).
+  kEncodingVexRmv,                       //!< VEX|EVEX [RMV].
+  kEncodingVexRmv_Wx,                    //!< VEX|EVEX [RMV] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRmv_VM,                    //!< VEX|EVEX [RMV] (propagates VEX|EVEX.L, VSIB support).
+  kEncodingVexRmvRm_VM,                  //!< VEX|EVEX [RMV|RM] (propagates VEX|EVEX.L, VSIB support).
+  kEncodingVexRmvi,                      //!< VEX|EVEX [RMVI].
+  kEncodingVexRmMr,                      //!< VEX|EVEX [RM|MR].
+  kEncodingVexRmMr_Lx,                   //!< VEX|EVEX [RM|MR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmRmv,                    //!< VEX|EVEX [RVM|RMV].
+  kEncodingVexRvmRmi,                    //!< VEX|EVEX [RVM|RMI].
+  kEncodingVexRvmRmi_Lx,                 //!< VEX|EVEX [RVM|RMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmRmvRmi,                 //!< VEX|EVEX [RVM|RMV|RMI].
+  kEncodingVexRvmMr,                     //!< VEX|EVEX [RVM|MR].
+  kEncodingVexRvmMvr,                    //!< VEX|EVEX [RVM|MVR].
+  kEncodingVexRvmMvr_Lx,                 //!< VEX|EVEX [RVM|MVR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmVmi,                    //!< VEX|EVEX [RVM|VMI].
+  kEncodingVexRvmVmi_Lx,                 //!< VEX|EVEX [RVM|VMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmVmi_Lx_MEvex,           //!< VEX|EVEX [RVM|VMI] (propagates EVEX if the second operand is memory).
+  kEncodingVexVm,                        //!< VEX|EVEX [VM].
+  kEncodingVexVm_Wx,                     //!< VEX|EVEX [VM] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexVmi,                       //!< VEX|EVEX [VMI].
+  kEncodingVexVmi_Lx,                    //!< VEX|EVEX [VMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexVmi4_Wx,                   //!< VEX|EVEX [VMI] (propagates VEX|EVEX.W if GPQ used, DWORD Immediate).
+  kEncodingVexVmi_Lx_MEvex,              //!< VEX|EVEX [VMI] (force EVEX prefix when the second operand is memory)
+  kEncodingVexRvrmRvmr,                  //!< VEX|EVEX [RVRM|RVMR].
+  kEncodingVexRvrmRvmr_Lx,               //!< VEX|EVEX [RVRM|RVMR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvrmiRvmri_Lx,             //!< VEX|EVEX [RVRMI|RVMRI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexMovdMovq,                  //!< VEX|EVEX vmovd, vmovq.
+  kEncodingVexMovssMovsd,                //!< VEX|EVEX vmovss, vmovsd.
+  kEncodingFma4,                         //!< FMA4 [R, R, R/M, R/M].
+  kEncodingFma4_Lx,                      //!< FMA4 [R, R, R/M, R/M] (propagates AVX.L if YMM used).
+  kEncodingAmxCfg,                       //!< AMX ldtilecfg/sttilecfg.
+  kEncodingAmxR,                         //!< AMX [R] - tilezero.
+  kEncodingAmxRm,                        //!< AMX tileloadd/tileloaddt1.
+  kEncodingAmxMr,                        //!< AMX tilestored.
+  kEncodingAmxRmv,                       //!< AMX instructions that use TMM registers.
+  kEncodingCount                         //!< Count of instruction encodings.
+};
+
+//! Additional information table, provides CPU extensions required to execute an instruction and RW flags.
+struct AdditionalInfo {
+  //! Index to `_instFlagsTable`.
+  uint8_t _instFlagsIndex;
+  //! Index to `_rwFlagsTable`.
+  uint8_t _rwFlagsIndex;
+  //! Features vector.
+  uint8_t _features[6];
+
+  inline const uint8_t* featuresBegin() const noexcept { return _features; }
+  inline const uint8_t* featuresEnd() const noexcept { return _features + ASMJIT_ARRAY_SIZE(_features); }
+};
+
+// ${NameLimits:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+enum : uint32_t { kMaxNameSize = 17 };
+// ----------------------------------------------------------------------------
+// ${NameLimits:End}
+
+struct InstNameIndex {
+  uint16_t start;
+  uint16_t end;
+};
+
+struct RWInfo {
+  enum Category : uint8_t {
+    kCategoryGeneric,
+    kCategoryMov,
+    kCategoryMovabs,
+    kCategoryImul,
+    kCategoryMovh64,
+    kCategoryPunpcklxx,
+    kCategoryVmaskmov,
+    kCategoryVmovddup,
+    kCategoryVmovmskpd,
+    kCategoryVmovmskps,
+    kCategoryVmov1_2,
+    kCategoryVmov1_4,
+    kCategoryVmov1_8,
+    kCategoryVmov2_1,
+    kCategoryVmov4_1,
+    kCategoryVmov8_1
+  };
+
+  uint8_t category;
+  uint8_t rmInfo;
+  uint8_t opInfoIndex[6];
+};
+
+struct RWInfoOp {
+  uint64_t rByteMask;
+  uint64_t wByteMask;
+  uint8_t physId;
+  uint8_t consecutiveLeadCount;
+  uint8_t reserved[2];
+  OpRWFlags flags;
+};
+
+//! R/M information.
+//!
+//! This data is used to replace register operand by a memory operand reliably.
+struct RWInfoRm {
+  enum Category : uint8_t {
+    kCategoryNone = 0,
+    kCategoryFixed,
+    kCategoryConsistent,
+    kCategoryHalf,
+    kCategoryQuarter,
+    kCategoryEighth
+  };
+
+  enum Flags : uint8_t {
+    kFlagAmbiguous = 0x01,
+    //! Special semantics for PEXTRW - memory operand can only be used with SSE4.1 instruction and it's forbidden in MMX.
+    kFlagPextrw = 0x02,
+    //! Special semantics for MOVSS and MOVSD - doesn't zero extend the destination if the operation is a reg to reg move.
+    kFlagMovssMovsd = 0x04,
+    //! Special semantics for AVX shift instructions that do not provide reg/mem in AVX/AVX2 mode (AVX-512 is required).
+    kFlagFeatureIfRMI = 0x08
+  };
+
+  uint8_t category;
+  uint8_t rmOpsMask;
+  uint8_t fixedSize;
+  uint8_t flags;
+  uint8_t rmFeature;
+};
+
+struct RWFlagsInfoTable {
+  //! CPU/FPU flags read.
+  uint32_t readFlags;
+  //! CPU/FPU flags written or undefined.
+  uint32_t writeFlags;
+};
+
+extern const uint8_t rwInfoIndexA[Inst::_kIdCount];
+extern const uint8_t rwInfoIndexB[Inst::_kIdCount];
+extern const RWInfo rwInfoA[];
+extern const RWInfo rwInfoB[];
+extern const RWInfoOp rwInfoOp[];
+extern const RWInfoRm rwInfoRm[];
+extern const RWFlagsInfoTable _rwFlagsInfoTable[];
+extern const InstRWFlags _instFlagsTable[];
+
+extern const uint32_t _mainOpcodeTable[];
+extern const uint32_t _altOpcodeTable[];
+
+#ifndef ASMJIT_NO_TEXT
+extern const char _nameData[];
+extern const InstNameIndex instNameIndex[26];
+#endif // !ASMJIT_NO_TEXT
+
+extern const AdditionalInfo _additionalInfoTable[];
+
+} // {InstDB}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86INSTDB_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86opcode_p.h b/lib/lepton/asmjit/x86/x86opcode_p.h
new file mode 100644
index 0000000000..94a76f0481
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86opcode_p.h
@@ -0,0 +1,436 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86OPCODE_P_H_INCLUDED
+#define ASMJIT_X86_X86OPCODE_P_H_INCLUDED
+
+#include "../x86/x86globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+//! Helper class to store and manipulate X86 opcodes.
+//!
+//! The first 8 least significant bits describe the opcode byte as defined in ISA manuals, all other bits
+//! describe other properties like prefixes, see `Opcode::Bits` for more information.
+struct Opcode {
+  uint32_t v;
+
+  //! Describes a meaning of all bits of AsmJit's 32-bit opcode value.
+  //!
+  //! This schema is AsmJit specific and has been designed to allow encoding of all X86 instructions available. X86,
+  //! MMX, and SSE+ instructions always use `MM` and `PP` fields, which are encoded to corresponding prefixes needed
+  //! by X86 or SIMD instructions. AVX+ instructions embed `MMMMM` and `PP` fields in a VEX prefix, and AVX-512
+  //! instructions embed `MM` and `PP` in EVEX prefix.
+  //!
+  //! The instruction opcode definition uses 1 or 2 bytes as an opcode value. 1 byte is needed by most of the
+  //! instructions, 2 bytes are only used by legacy X87-FPU instructions. This means that a second byte is free to
+  //! by used by instructions encoded by using VEX and/or EVEX prefix.
+  //!
+  //! The fields description:
+  //!
+  //! - `MM` field is used to encode prefixes needed by the instruction or as a part of VEX/EVEX prefix. Described as
+  //!   `mm` and `mmmmm` in instruction manuals.
+  //!
+  //!   NOTE: Since `MM` field is defined as `mmmmm` (5 bits), but only 2 least significant bits are used by VEX and
+  //!   EVEX prefixes, and additional 4th bit is used by XOP prefix, AsmJit uses the 3rd and 5th bit for it's own
+  //!   purposes. These bits will probably never be used in future encodings as AVX512 uses only `000mm` from `mmmmm`.
+  //!
+  //! - `PP` field is used to encode prefixes needed by the instruction or as a part of VEX/EVEX prefix. Described as
+  //!   `pp` in instruction manuals.
+  //!
+  //! - `LL` field is used exclusively by AVX+ and AVX512+ instruction sets. It describes vector size, which is `L.128`
+  //!   for XMM register, `L.256` for for YMM register, and `L.512` for ZMM register. The `LL` field is omitted in case
+  //!   that instruction supports multiple vector lengths, however, if the instruction requires specific `L` value it
+  //!   must be specified as a part of the opcode.
+  //!
+  //!   NOTE: `LL` having value `11` is not defined yet.
+  //!
+  //! - `W` field is the most complicated. It was added by 64-bit architecture to promote default operation width
+  //!   (instructions that perform 32-bit operation by default require to override the width to 64-bit explicitly).
+  //!   There is nothing wrong on this, however, some instructions introduced implicit `W` override, for example a
+  //!   `cdqe` instruction is basically a `cwde` instruction with overridden `W` (set to 1). There are some others
+  //!   in the base X86 instruction set. More recent instruction sets started using `W` field more often:
+  //!
+  //!   - AVX instructions started using `W` field as an extended opcode for FMA, GATHER, PERM, and other instructions.
+  //!     It also uses `W` field to override the default operation width in instructions like `vmovq`.
+  //!
+  //!   - AVX-512 instructions started using `W` field as an extended opcode for all new instructions. This wouldn't
+  //!     have been an issue if the `W` field of AVX-512 have matched AVX, but this is not always the case.
+  //!
+  //! - `O` field is an extended opcode field (3 bits) embedded in ModR/M BYTE.
+  //!
+  //! - `CDSHL` and `CDTT` fields describe 'compressed-displacement'. `CDSHL` is defined for each instruction that is
+  //!   AVX-512 encodable (EVEX) and contains a base N shift (base shift to perform the calculation). The `CDTT` field
+  //!   is derived from instruction specification and describes additional shift to calculate the final `CDSHL` that
+  //!   will be used in SIB byte.
+  //!
+  //! \note Don't reorder any fields here, the shifts and masks were defined carefully to make encoding of X86
+  //! instructions fast, especially to construct REX, VEX, and EVEX prefixes in the most efficient way. Changing
+  //! values defined by these enums many cause AsmJit to emit invalid binary representations of instructions passed to
+  //! `x86::Assembler::_emit`.
+  enum Bits : uint32_t {
+    // MM & VEX & EVEX & XOP
+    // ---------------------
+    //
+    // Two meanings:
+    //  * Part of a legacy opcode (prefixes emitted before the main opcode byte).
+    //  * `MMMMM` field in VEX|EVEX|XOP instruction.
+    //
+    // AVX reserves 5 bits for `MMMMM` field, however AVX instructions only use 2 bits and XOP 3 bits. AVX-512 shrinks
+    // `MMMMM` field into `MMM` so it's safe to use [4:3] bits of `MMMMM` field for internal payload.
+    //
+    // AsmJit divides MMMMM field into this layout:
+    //
+    // [2:0] - Used to describe 0F, 0F38 and 0F3A legacy prefix bytes and 3 bits of MMMMM field for XOP/AVX/AVX512.
+    // [3]   - Required by XOP instructions, so we use this bit also to indicate that this is a XOP opcode.
+    // [4]   - Used to force EVEX prefix - this bit is not used by any X86 instruction yet, so AsmJit uses it to
+    //         describe EVEX only instructions or sets its bit when user uses InstOptions::kX86_Evex to force EVEX.
+    kMM_Shift      = 8,
+    kMM_Mask       = 0x1Fu << kMM_Shift,
+    kMM_00         = 0x00u << kMM_Shift,
+    kMM_0F         = 0x01u << kMM_Shift,
+    kMM_0F38       = 0x02u << kMM_Shift,
+    kMM_0F3A       = 0x03u << kMM_Shift,   // Described also as XOP.M3 in AMD manuals.
+    kMM_0F01       = 0x04u << kMM_Shift,   // AsmJit way to describe 0F01 (never VEX/EVEX).
+
+    kMM_MAP5       = 0x05u << kMM_Shift,   // EVEX.MAP5.
+    kMM_MAP6       = 0x06u << kMM_Shift,   // EVEX.MAP6.
+
+    // `XOP` field is only used to force XOP prefix instead of VEX3 prefix. We know XOP encodings always use 0b1000
+    // bit of MM field and that no VEX and EVEX instruction use such bit yet, so we can use this bit to force XOP
+    // prefix to be emitted instead of VEX3 prefix. See `x86VEXPrefix` defined in `x86assembler.cpp`.
+    kMM_XOP08      = 0x08u << kMM_Shift,   // XOP.M8.
+    kMM_XOP09      = 0x09u << kMM_Shift,   // XOP.M9.
+    kMM_XOP0A      = 0x0Au << kMM_Shift,   // XOP.MA.
+
+    kMM_IsXOP_Shift= kMM_Shift + 3,
+    kMM_IsXOP      = kMM_XOP08,
+
+    // NOTE: Force VEX3 allows to force to emit VEX3 instead of VEX2 in some cases (similar to forcing REX prefix).
+    // Force EVEX will force emitting EVEX prefix instead of VEX2|VEX3. EVEX-only instructions will have ForceEvex
+    // always set, however. instructions that can be encoded by either VEX or EVEX prefix should not have ForceEvex
+    // set.
+    kMM_ForceEvex  = 0x10u << kMM_Shift,   // Force 4-BYTE EVEX prefix.
+
+    // FPU_2B - Second-Byte of the Opcode used by FPU
+    // ----------------------------------------------
+    //
+    // Second byte opcode. This BYTE is ONLY used by FPU instructions and collides with 3 bits from `MM` and 5 bits
+    // from 'CDSHL' and 'CDTT'. It's fine as FPU and AVX512 flags are never used at the same time.
+    kFPU_2B_Shift  = 10,
+    kFPU_2B_Mask   = 0xFF << kFPU_2B_Shift,
+
+    // CDSHL & CDTT
+    // ------------
+    //
+    // Compressed displacement bits.
+    //
+    // Each opcode defines the base size (N) shift:
+    //   [0]: BYTE  (1 byte).
+    //   [1]: WORD  (2 bytes).
+    //   [2]: DWORD (4 bytes - float/int32).
+    //   [3]: QWORD (8 bytes - double/int64).
+    //   [4]: OWORD (16 bytes - used by FV|FVM|M128).
+    //
+    // Which is then scaled by the instruction's TT (TupleType) into possible:
+    //   [5]: YWORD (32 bytes)
+    //   [6]: ZWORD (64 bytes)
+    //
+    // These bits are then adjusted before calling EmitModSib or EmitModVSib.
+    kCDSHL_Shift   = 13,
+    kCDSHL_Mask    = 0x7u << kCDSHL_Shift,
+
+    kCDSHL__       = 0x0u << kCDSHL_Shift, // Base element size not used.
+    kCDSHL_0       = 0x0u << kCDSHL_Shift, // N << 0.
+    kCDSHL_1       = 0x1u << kCDSHL_Shift, // N << 1.
+    kCDSHL_2       = 0x2u << kCDSHL_Shift, // N << 2.
+    kCDSHL_3       = 0x3u << kCDSHL_Shift, // N << 3.
+    kCDSHL_4       = 0x4u << kCDSHL_Shift, // N << 4.
+    kCDSHL_5       = 0x5u << kCDSHL_Shift, // N << 5.
+
+    // Compressed displacement tuple-type (specific to AsmJit).
+    //
+    // Since we store the base offset independently of CDTT we can simplify the number of 'TUPLE_TYPE' groups
+    // significantly and just handle special cases.
+    kCDTT_Shift    = 16,
+    kCDTT_Mask     = 0x3u << kCDTT_Shift,
+    kCDTT_None     = 0x0u << kCDTT_Shift,  // Does nothing.
+    kCDTT_ByLL     = 0x1u << kCDTT_Shift,  // Scales by LL (1x 2x 4x).
+    kCDTT_T1W      = 0x2u << kCDTT_Shift,  // Used to add 'W' to the shift.
+    kCDTT_DUP      = 0x3u << kCDTT_Shift,  // Special 'VMOVDDUP' case.
+
+    // Aliases that match names used in instruction manuals.
+    kCDTT__        = kCDTT_None,
+    kCDTT_FV       = kCDTT_ByLL,
+    kCDTT_HV       = kCDTT_ByLL,
+    kCDTT_QV       = kCDTT_ByLL,
+    kCDTT_FVM      = kCDTT_ByLL,
+    kCDTT_T1S      = kCDTT_None,
+    kCDTT_T1F      = kCDTT_None,
+    kCDTT_T1_4X    = kCDTT_None,
+    kCDTT_T4X      = kCDTT_None,           // Alias to have only 3 letters.
+    kCDTT_T2       = kCDTT_None,
+    kCDTT_T4       = kCDTT_None,
+    kCDTT_T8       = kCDTT_None,
+    kCDTT_HVM      = kCDTT_ByLL,
+    kCDTT_QVM      = kCDTT_ByLL,
+    kCDTT_OVM      = kCDTT_ByLL,
+    kCDTT_128      = kCDTT_None,
+
+    // `O` Field in ModR/M (??:xxx:???)
+    // --------------------------------
+
+    kModO_Shift    = 18,
+    kModO_Mask     = 0x7u << kModO_Shift,
+
+    kModO__        = 0x0u,
+    kModO_0        = 0x0u << kModO_Shift,
+    kModO_1        = 0x1u << kModO_Shift,
+    kModO_2        = 0x2u << kModO_Shift,
+    kModO_3        = 0x3u << kModO_Shift,
+    kModO_4        = 0x4u << kModO_Shift,
+    kModO_5        = 0x5u << kModO_Shift,
+    kModO_6        = 0x6u << kModO_Shift,
+    kModO_7        = 0x7u << kModO_Shift,
+
+    // `RM` Field in ModR/M (??:???:xxx)
+    // ---------------------------------
+    //
+    // Second data field used by ModR/M byte. This is only used by few instructions that use OPCODE+MOD/RM where both
+    // values in Mod/RM are part of the opcode.
+
+    kModRM_Shift    = 13,
+    kModRM_Mask     = 0x7u << kModRM_Shift,
+
+    kModRM__        = 0x0u,
+    kModRM_0        = 0x0u << kModRM_Shift,
+    kModRM_1        = 0x1u << kModRM_Shift,
+    kModRM_2        = 0x2u << kModRM_Shift,
+    kModRM_3        = 0x3u << kModRM_Shift,
+    kModRM_4        = 0x4u << kModRM_Shift,
+    kModRM_5        = 0x5u << kModRM_Shift,
+    kModRM_6        = 0x6u << kModRM_Shift,
+    kModRM_7        = 0x7u << kModRM_Shift,
+
+    // `PP` Field
+    // ----------
+    //
+    // These fields are stored deliberately right after each other as it makes it easier to construct VEX prefix from
+    // the opcode value stored in the instruction database.
+    //
+    // Two meanings:
+    //   * "PP" field in AVX/XOP/AVX-512 instruction.
+    //   * Mandatory Prefix in legacy encoding.
+    //
+    // AVX reserves 2 bits for `PP` field, but AsmJit extends the storage by 1 more bit that is used to emit 9B prefix
+    // for some X87-FPU instructions.
+
+    kPP_Shift      = 21,
+    kPP_VEXMask    = 0x03u << kPP_Shift,   // PP field mask used by VEX/EVEX.
+    kPP_FPUMask    = 0x07u << kPP_Shift,   // Mask used by EMIT_PP, also includes '0x9B'.
+    kPP_00         = 0x00u << kPP_Shift,
+    kPP_66         = 0x01u << kPP_Shift,
+    kPP_F3         = 0x02u << kPP_Shift,
+    kPP_F2         = 0x03u << kPP_Shift,
+
+    kPP_9B         = 0x07u << kPP_Shift,   // AsmJit specific to emit FPU's '9B' byte.
+
+    // REX|VEX|EVEX B|X|R|W Bits
+    // -------------------------
+    //
+    // NOTE: REX.[B|X|R] are never stored within the opcode itself, they are reserved by AsmJit are are added
+    // dynamically to the opcode to represent [REX|VEX|EVEX].[B|X|R] bits. REX.W can be stored in DB as it's sometimes
+    // part of the opcode itself.
+
+    // These must be binary compatible with instruction options.
+    kREX_Shift     = 24,
+    kREX_Mask      = 0x0Fu << kREX_Shift,
+    kB             = 0x01u << kREX_Shift,  // Never stored in DB, used by encoder.
+    kX             = 0x02u << kREX_Shift,  // Never stored in DB, used by encoder.
+    kR             = 0x04u << kREX_Shift,  // Never stored in DB, used by encoder.
+    kW             = 0x08u << kREX_Shift,
+    kW_Shift       = kREX_Shift + 3,
+
+    kW__           = 0u << kW_Shift,       // REX.W/VEX.W is unspecified.
+    kW_x           = 0u << kW_Shift,       // REX.W/VEX.W is based on instruction operands.
+    kW_I           = 0u << kW_Shift,       // REX.W/VEX.W is ignored (WIG).
+    kW_0           = 0u << kW_Shift,       // REX.W/VEX.W is 0 (W0).
+    kW_1           = 1u << kW_Shift,       // REX.W/VEX.W is 1 (W1).
+
+    // EVEX.W Field
+    // ------------
+    //
+    // `W` field used by EVEX instruction encoding.
+
+    kEvex_W_Shift  = 28,
+    kEvex_W_Mask   = 1u << kEvex_W_Shift,
+
+    kEvex_W__      = 0u << kEvex_W_Shift,  // EVEX.W is unspecified (not EVEX instruction).
+    kEvex_W_x      = 0u << kEvex_W_Shift,  // EVEX.W is based on instruction operands.
+    kEvex_W_I      = 0u << kEvex_W_Shift,  // EVEX.W is ignored (WIG).
+    kEvex_W_0      = 0u << kEvex_W_Shift,  // EVEX.W is 0 (W0).
+    kEvex_W_1      = 1u << kEvex_W_Shift,  // EVEX.W is 1 (W1).
+
+    // `L` or `LL` field in AVX/XOP/AVX-512
+    // ------------------------------------
+    //
+    // VEX/XOP prefix can only use the first bit `L.128` or `L.256`. EVEX prefix prefix makes it possible to use also
+    // `L.512`. If the instruction set manual describes an instruction by `LIG` it means that the `L` field is ignored
+    // and AsmJit defaults to `0` in such case.
+    kLL_Shift      = 29,
+    kLL_Mask       = 0x3u << kLL_Shift,
+
+    kLL__          = 0x0u << kLL_Shift,    // LL is unspecified.
+    kLL_x          = 0x0u << kLL_Shift,    // LL is based on instruction operands.
+    kLL_I          = 0x0u << kLL_Shift,    // LL is ignored (LIG).
+    kLL_0          = 0x0u << kLL_Shift,    // LL is 0 (L.128).
+    kLL_1          = 0x1u << kLL_Shift,    // LL is 1 (L.256).
+    kLL_2          = 0x2u << kLL_Shift,    // LL is 2 (L.512).
+
+    // Opcode Combinations
+    // -------------------
+
+    k0      = 0,                           // '__' (no prefix, used internally).
+    k000000 = kPP_00 | kMM_00,             // '__' (no prefix, to be the same width as others).
+    k000F00 = kPP_00 | kMM_0F,             // '0F'
+    k000F01 = kPP_00 | kMM_0F01,           // '0F01'
+    k000F0F = kPP_00 | kMM_0F,             // '0F0F' - 3DNOW, equal to 0x0F, must have special encoding to take effect.
+    k000F38 = kPP_00 | kMM_0F38,           // 'NP.0F38'
+    k000F3A = kPP_00 | kMM_0F3A,           // 'NP.0F3A'
+    k00MAP5 = kPP_00 | kMM_MAP5,           // 'NP.MAP5'
+    k00MAP6 = kPP_00 | kMM_MAP6,           // 'NP.MAP5'
+    k660000 = kPP_66 | kMM_00,             // '66'
+    k660F00 = kPP_66 | kMM_0F,             // '66.0F'
+    k660F01 = kPP_66 | kMM_0F01,           // '66.0F01'
+    k660F38 = kPP_66 | kMM_0F38,           // '66.0F38'
+    k660F3A = kPP_66 | kMM_0F3A,           // '66.0F3A'
+    k66MAP5 = kPP_66 | kMM_MAP5,           // '66.MAP5'
+    k66MAP6 = kPP_66 | kMM_MAP6,           // '66.MAP5'
+    kF20000 = kPP_F2 | kMM_00,             // 'F2'
+    kF20F00 = kPP_F2 | kMM_0F,             // 'F2.0F'
+    kF20F01 = kPP_F2 | kMM_0F01,           // 'F2.0F01'
+    kF20F38 = kPP_F2 | kMM_0F38,           // 'F2.0F38'
+    kF20F3A = kPP_F2 | kMM_0F3A,           // 'F2.0F3A'
+    kF2MAP5 = kPP_F2 | kMM_MAP5,           // 'F2.MAP5'
+    kF2MAP6 = kPP_F2 | kMM_MAP6,           // 'F2.MAP5'
+    kF30000 = kPP_F3 | kMM_00,             // 'F3'
+    kF30F00 = kPP_F3 | kMM_0F,             // 'F3.0F'
+    kF30F01 = kPP_F3 | kMM_0F01,           // 'F3.0F01'
+    kF30F38 = kPP_F3 | kMM_0F38,           // 'F3.0F38'
+    kF30F3A = kPP_F3 | kMM_0F3A,           // 'F3.0F3A'
+    kF3MAP5 = kPP_F3 | kMM_MAP5,           // 'F3.MAP5'
+    kF3MAP6 = kPP_F3 | kMM_MAP6,           // 'F3.MAP5'
+    kFPU_00 = kPP_00 | kMM_00,             // '__' (FPU)
+    kFPU_9B = kPP_9B | kMM_00,             // '9B' (FPU)
+    kXOP_M8 = kPP_00 | kMM_XOP08,          // 'M8' (XOP)
+    kXOP_M9 = kPP_00 | kMM_XOP09,          // 'M9' (XOP)
+    kXOP_MA = kPP_00 | kMM_XOP0A           // 'MA' (XOP)
+  };
+
+  // Opcode Builder
+  // --------------
+
+  inline uint32_t get() const noexcept { return v; }
+
+  inline bool hasW() const noexcept { return (v & kW) != 0; }
+  inline bool has66h() const noexcept { return (v & kPP_66) != 0; }
+
+  inline Opcode& add(uint32_t x) noexcept { return operator+=(x); }
+
+  inline Opcode& add66h() noexcept { return operator|=(kPP_66); }
+  template<typename T>
+  inline Opcode& add66hIf(T exp) noexcept { return operator|=(uint32_t(exp) << kPP_Shift); }
+  template<typename T>
+  inline Opcode& add66hBySize(T size) noexcept { return add66hIf(size == 2); }
+
+  inline Opcode& addW() noexcept { return operator|=(kW); }
+  template<typename T>
+  inline Opcode& addWIf(T exp) noexcept { return operator|=(uint32_t(exp) << kW_Shift); }
+  template<typename T>
+  inline Opcode& addWBySize(T size) noexcept { return addWIf(size == 8); }
+
+  template<typename T>
+  inline Opcode& addPrefixBySize(T size) noexcept {
+    static const uint32_t mask[16] = {
+      0,          // #0
+      0,          // #1 -> nothing (already handled or not possible)
+      kPP_66,     // #2 -> 66H
+      0,          // #3
+      0,          // #4 -> nothing
+      0,          // #5
+      0,          // #6
+      0,          // #7
+      kW          // #8 -> REX.W
+    };
+    return operator|=(mask[size & 0xF]);
+  }
+
+  template<typename T>
+  inline Opcode& addArithBySize(T size) noexcept {
+    static const uint32_t mask[16] = {
+      0,          // #0
+      0,          // #1 -> nothing
+      1 | kPP_66, // #2 -> NOT_BYTE_OP(1) and 66H
+      0,          // #3
+      1,          // #4 -> NOT_BYTE_OP(1)
+      0,          // #5
+      0,          // #6
+      0,          // #7
+      1 | kW      // #8 -> NOT_BYTE_OP(1) and REX.W
+    };
+    return operator|=(mask[size & 0xF]);
+  }
+
+  inline Opcode& forceEvex() noexcept { return operator|=(kMM_ForceEvex); }
+  template<typename T>
+  inline Opcode& forceEvexIf(T exp) noexcept { return operator|=(uint32_t(exp) << Support::ConstCTZ<uint32_t(kMM_ForceEvex)>::value); }
+
+  //! Extract `O` field (R) from the opcode (specified as /0..7 in instruction manuals).
+  inline uint32_t extractModO() const noexcept {
+    return (v >> kModO_Shift) & 0x07;
+  }
+
+  //! Extract `RM` field (RM) from the opcode (usually specified as another opcode value).
+  inline uint32_t extractModRM() const noexcept {
+    return (v >> kModRM_Shift) & 0x07;
+  }
+
+  //! Extract `REX` prefix from opcode combined with `options`.
+  inline uint32_t extractRex(InstOptions options) const noexcept {
+    // kREX was designed in a way that when shifted there will be no bytes set except REX.[B|X|R|W].
+    // The returned value forms a real REX prefix byte. This case should be unit-tested as well.
+    return (v | uint32_t(options)) >> kREX_Shift;
+  }
+
+  inline uint32_t extractLLMMMMM(InstOptions options) const noexcept {
+    uint32_t llMmmmm = uint32_t(v & (kLL_Mask | kMM_Mask));
+    uint32_t vexEvex = uint32_t(options & InstOptions::kX86_Evex);
+    return (llMmmmm | vexEvex) >> kMM_Shift;
+  }
+
+  inline Opcode& operator=(uint32_t x) noexcept { v = x; return *this; }
+  inline Opcode& operator+=(uint32_t x) noexcept { v += x; return *this; }
+  inline Opcode& operator-=(uint32_t x) noexcept { v -= x; return *this; }
+  inline Opcode& operator&=(uint32_t x) noexcept { v &= x; return *this; }
+  inline Opcode& operator|=(uint32_t x) noexcept { v |= x; return *this; }
+  inline Opcode& operator^=(uint32_t x) noexcept { v ^= x; return *this; }
+
+  inline uint32_t operator&(uint32_t x) const noexcept { return v & x; }
+  inline uint32_t operator|(uint32_t x) const noexcept { return v | x; }
+  inline uint32_t operator^(uint32_t x) const noexcept { return v ^ x; }
+  inline uint32_t operator<<(uint32_t x) const noexcept { return v << x; }
+  inline uint32_t operator>>(uint32_t x) const noexcept { return v >> x; }
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86OPCODE_P_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86operand.cpp b/lib/lepton/asmjit/x86/x86operand.cpp
new file mode 100644
index 0000000000..a47fec2b5b
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86operand.cpp
@@ -0,0 +1,231 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../core/misc_p.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::Operand - Tests
+// ====================
+
+#if defined(ASMJIT_TEST)
+UNIT(x86_operand) {
+  Label L(1000); // Label with some ID.
+
+  INFO("Checking basic properties of built-in X86 registers");
+  EXPECT(gpb(Gp::kIdAx) == al);
+  EXPECT(gpb(Gp::kIdBx) == bl);
+  EXPECT(gpb(Gp::kIdCx) == cl);
+  EXPECT(gpb(Gp::kIdDx) == dl);
+
+  EXPECT(gpb_lo(Gp::kIdAx) == al);
+  EXPECT(gpb_lo(Gp::kIdBx) == bl);
+  EXPECT(gpb_lo(Gp::kIdCx) == cl);
+  EXPECT(gpb_lo(Gp::kIdDx) == dl);
+
+  EXPECT(gpb_hi(Gp::kIdAx) == ah);
+  EXPECT(gpb_hi(Gp::kIdBx) == bh);
+  EXPECT(gpb_hi(Gp::kIdCx) == ch);
+  EXPECT(gpb_hi(Gp::kIdDx) == dh);
+
+  EXPECT(gpw(Gp::kIdAx) == ax);
+  EXPECT(gpw(Gp::kIdBx) == bx);
+  EXPECT(gpw(Gp::kIdCx) == cx);
+  EXPECT(gpw(Gp::kIdDx) == dx);
+
+  EXPECT(gpd(Gp::kIdAx) == eax);
+  EXPECT(gpd(Gp::kIdBx) == ebx);
+  EXPECT(gpd(Gp::kIdCx) == ecx);
+  EXPECT(gpd(Gp::kIdDx) == edx);
+
+  EXPECT(gpq(Gp::kIdAx) == rax);
+  EXPECT(gpq(Gp::kIdBx) == rbx);
+  EXPECT(gpq(Gp::kIdCx) == rcx);
+  EXPECT(gpq(Gp::kIdDx) == rdx);
+
+  EXPECT(gpb(Gp::kIdAx) != dl);
+  EXPECT(gpw(Gp::kIdBx) != cx);
+  EXPECT(gpd(Gp::kIdCx) != ebx);
+  EXPECT(gpq(Gp::kIdDx) != rax);
+
+  INFO("Checking if x86::reg(...) matches built-in IDs");
+  EXPECT(gpb(5) == bpl);
+  EXPECT(gpw(5) == bp);
+  EXPECT(gpd(5) == ebp);
+  EXPECT(gpq(5) == rbp);
+  EXPECT(st(5)  == st5);
+  EXPECT(mm(5)  == mm5);
+  EXPECT(k(5)   == k5);
+  EXPECT(cr(5)  == cr5);
+  EXPECT(dr(5)  == dr5);
+  EXPECT(xmm(5) == xmm5);
+  EXPECT(ymm(5) == ymm5);
+  EXPECT(zmm(5) == zmm5);
+
+  INFO("Checking x86::Gp register properties");
+  EXPECT(Gp().isReg() == true);
+  EXPECT(eax.isReg() == true);
+  EXPECT(eax.id() == 0);
+  EXPECT(eax.size() == 4);
+  EXPECT(eax.type() == RegType::kX86_Gpd);
+  EXPECT(eax.group() == RegGroup::kGp);
+
+  INFO("Checking x86::Xmm register properties");
+  EXPECT(Xmm().isReg() == true);
+  EXPECT(xmm4.isReg() == true);
+  EXPECT(xmm4.id() == 4);
+  EXPECT(xmm4.size() == 16);
+  EXPECT(xmm4.type() == RegType::kX86_Xmm);
+  EXPECT(xmm4.group() == RegGroup::kVec);
+  EXPECT(xmm4.isVec());
+
+  INFO("Checking x86::Ymm register properties");
+  EXPECT(Ymm().isReg() == true);
+  EXPECT(ymm5.isReg() == true);
+  EXPECT(ymm5.id() == 5);
+  EXPECT(ymm5.size() == 32);
+  EXPECT(ymm5.type() == RegType::kX86_Ymm);
+  EXPECT(ymm5.group() == RegGroup::kVec);
+  EXPECT(ymm5.isVec());
+
+  INFO("Checking x86::Zmm register properties");
+  EXPECT(Zmm().isReg() == true);
+  EXPECT(zmm6.isReg() == true);
+  EXPECT(zmm6.id() == 6);
+  EXPECT(zmm6.size() == 64);
+  EXPECT(zmm6.type() == RegType::kX86_Zmm);
+  EXPECT(zmm6.group() == RegGroup::kVec);
+  EXPECT(zmm6.isVec());
+
+  INFO("Checking x86::Vec register properties");
+  EXPECT(Vec().isReg() == true);
+  // Converts a VEC register to a type of the passed register, but keeps the ID.
+  EXPECT(xmm4.cloneAs(ymm10) == ymm4);
+  EXPECT(xmm4.cloneAs(zmm11) == zmm4);
+  EXPECT(ymm5.cloneAs(xmm12) == xmm5);
+  EXPECT(ymm5.cloneAs(zmm13) == zmm5);
+  EXPECT(zmm6.cloneAs(xmm14) == xmm6);
+  EXPECT(zmm6.cloneAs(ymm15) == ymm6);
+
+  EXPECT(xmm7.xmm() == xmm7);
+  EXPECT(xmm7.ymm() == ymm7);
+  EXPECT(xmm7.zmm() == zmm7);
+
+  EXPECT(ymm7.xmm() == xmm7);
+  EXPECT(ymm7.ymm() == ymm7);
+  EXPECT(ymm7.zmm() == zmm7);
+
+  EXPECT(zmm7.xmm() == xmm7);
+  EXPECT(zmm7.ymm() == ymm7);
+  EXPECT(zmm7.zmm() == zmm7);
+
+  INFO("Checking x86::Mm register properties");
+  EXPECT(Mm().isReg() == true);
+  EXPECT(mm2.isReg() == true);
+  EXPECT(mm2.id() == 2);
+  EXPECT(mm2.size() == 8);
+  EXPECT(mm2.type() == RegType::kX86_Mm);
+  EXPECT(mm2.group() == RegGroup::kX86_MM);
+
+  INFO("Checking x86::KReg register properties");
+  EXPECT(KReg().isReg() == true);
+  EXPECT(k3.isReg() == true);
+  EXPECT(k3.id() == 3);
+  EXPECT(k3.size() == 0);
+  EXPECT(k3.type() == RegType::kX86_KReg);
+  EXPECT(k3.group() == RegGroup::kX86_K);
+
+  INFO("Checking x86::St register properties");
+  EXPECT(St().isReg() == true);
+  EXPECT(st1.isReg() == true);
+  EXPECT(st1.id() == 1);
+  EXPECT(st1.size() == 10);
+  EXPECT(st1.type() == RegType::kX86_St);
+  EXPECT(st1.group() == RegGroup::kX86_St);
+
+  INFO("Checking if default constructed regs behave as expected");
+  EXPECT(Reg().isValid() == false);
+  EXPECT(Gp().isValid() == false);
+  EXPECT(Xmm().isValid() == false);
+  EXPECT(Ymm().isValid() == false);
+  EXPECT(Zmm().isValid() == false);
+  EXPECT(Mm().isValid() == false);
+  EXPECT(KReg().isValid() == false);
+  EXPECT(SReg().isValid() == false);
+  EXPECT(CReg().isValid() == false);
+  EXPECT(DReg().isValid() == false);
+  EXPECT(St().isValid() == false);
+  EXPECT(Bnd().isValid() == false);
+
+  INFO("Checking x86::Mem operand");
+  Mem m;
+  EXPECT(m == Mem(), "Two default constructed x86::Mem operands must be equal");
+
+  m = ptr(L);
+  EXPECT(m.hasBase() == true);
+  EXPECT(m.hasBaseReg() == false);
+  EXPECT(m.hasBaseLabel() == true);
+  EXPECT(m.hasOffset() == false);
+  EXPECT(m.isOffset64Bit() == false);
+  EXPECT(m.offset() == 0);
+  EXPECT(m.offsetLo32() == 0);
+
+  m = ptr(0x0123456789ABCDEFu);
+  EXPECT(m.hasBase() == false);
+  EXPECT(m.hasBaseReg() == false);
+  EXPECT(m.hasIndex() == false);
+  EXPECT(m.hasIndexReg() == false);
+  EXPECT(m.hasOffset() == true);
+  EXPECT(m.isOffset64Bit() == true);
+  EXPECT(m.offset() == int64_t(0x0123456789ABCDEFu));
+  EXPECT(m.offsetLo32() == int32_t(0x89ABCDEFu));
+  m.addOffset(1);
+  EXPECT(m.offset() == int64_t(0x0123456789ABCDF0u));
+
+  m = ptr(0x0123456789ABCDEFu, rdi, 3);
+  EXPECT(m.hasSegment() == false);
+  EXPECT(m.hasBase() == false);
+  EXPECT(m.hasBaseReg() == false);
+  EXPECT(m.hasIndex() == true);
+  EXPECT(m.hasIndexReg() == true);
+  EXPECT(m.indexType() == rdi.type());
+  EXPECT(m.indexId() == rdi.id());
+  EXPECT(m.shift() == 3);
+  EXPECT(m.hasOffset() == true);
+  EXPECT(m.isOffset64Bit() == true);
+  EXPECT(m.offset() == int64_t(0x0123456789ABCDEFu));
+  EXPECT(m.offsetLo32() == int32_t(0x89ABCDEFu));
+  m.resetIndex();
+  EXPECT(m.hasIndex() == false);
+  EXPECT(m.hasIndexReg() == false);
+
+  m = ptr(rax);
+  EXPECT(m.hasBase() == true);
+  EXPECT(m.hasBaseReg() == true);
+  EXPECT(m.baseType() == rax.type());
+  EXPECT(m.baseId() == rax.id());
+  EXPECT(m.hasIndex() == false);
+  EXPECT(m.hasIndexReg() == false);
+  EXPECT(m.indexType() == RegType::kNone);
+  EXPECT(m.indexId() == 0);
+  EXPECT(m.hasOffset() == false);
+  EXPECT(m.isOffset64Bit() == false);
+  EXPECT(m.offset() == 0);
+  EXPECT(m.offsetLo32() == 0);
+  m.setIndex(rsi);
+  EXPECT(m.hasIndex() == true);
+  EXPECT(m.hasIndexReg() == true);
+  EXPECT(m.indexType() == rsi.type());
+  EXPECT(m.indexId() == rsi.id());
+}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
diff --git a/lib/lepton/asmjit/x86/x86operand.h b/lib/lepton/asmjit/x86/x86operand.h
new file mode 100644
index 0000000000..037d4af4dd
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86operand.h
@@ -0,0 +1,1085 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86OPERAND_H_INCLUDED
+#define ASMJIT_X86_X86OPERAND_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+#include "../x86/x86globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+class Reg;
+class Mem;
+
+class Gp;
+class Gpb;
+class GpbLo;
+class GpbHi;
+class Gpw;
+class Gpd;
+class Gpq;
+class Vec;
+class Xmm;
+class Ymm;
+class Zmm;
+class Mm;
+class KReg;
+class SReg;
+class CReg;
+class DReg;
+class St;
+class Bnd;
+class Tmm;
+class Rip;
+
+//! Register traits (X86).
+//!
+//! Register traits contains information about a particular register type. It's used by asmjit to setup register
+//! information on-the-fly and to populate tables that contain register information (this way it's possible to change
+//! register types and groups without having to reorder these tables).
+template<RegType kRegType>
+struct RegTraits : public BaseRegTraits {};
+
+//! \cond
+// <--------------------+-----+-------------------------+------------------------+---+---+------------------+
+//                      | Reg |        Reg-Type         |        Reg-Group       |Sz |Cnt|      TypeId      |
+// <--------------------+-----+-------------------------+------------------------+---+---+------------------+
+ASMJIT_DEFINE_REG_TRAITS(Rip  , RegType::kX86_Rip       , RegGroup::kX86_Rip     , 0 , 1 , TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(GpbLo, RegType::kX86_GpbLo     , RegGroup::kGp          , 1 , 16, TypeId::kInt8    );
+ASMJIT_DEFINE_REG_TRAITS(GpbHi, RegType::kX86_GpbHi     , RegGroup::kGp          , 1 , 4 , TypeId::kInt8    );
+ASMJIT_DEFINE_REG_TRAITS(Gpw  , RegType::kX86_Gpw       , RegGroup::kGp          , 2 , 16, TypeId::kInt16   );
+ASMJIT_DEFINE_REG_TRAITS(Gpd  , RegType::kX86_Gpd       , RegGroup::kGp          , 4 , 16, TypeId::kInt32   );
+ASMJIT_DEFINE_REG_TRAITS(Gpq  , RegType::kX86_Gpq       , RegGroup::kGp          , 8 , 16, TypeId::kInt64   );
+ASMJIT_DEFINE_REG_TRAITS(Xmm  , RegType::kX86_Xmm       , RegGroup::kVec         , 16, 32, TypeId::kInt32x4 );
+ASMJIT_DEFINE_REG_TRAITS(Ymm  , RegType::kX86_Ymm       , RegGroup::kVec         , 32, 32, TypeId::kInt32x8 );
+ASMJIT_DEFINE_REG_TRAITS(Zmm  , RegType::kX86_Zmm       , RegGroup::kVec         , 64, 32, TypeId::kInt32x16);
+ASMJIT_DEFINE_REG_TRAITS(KReg , RegType::kX86_KReg      , RegGroup::kX86_K       , 0 , 8 , TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(Mm   , RegType::kX86_Mm        , RegGroup::kX86_MM      , 8 , 8 , TypeId::kMmx64   );
+ASMJIT_DEFINE_REG_TRAITS(SReg , RegType::kX86_SReg      , RegGroup::kX86_SReg    , 2 , 7 , TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(CReg , RegType::kX86_CReg      , RegGroup::kX86_CReg    , 0 , 16, TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(DReg , RegType::kX86_DReg      , RegGroup::kX86_DReg    , 0 , 16, TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(St   , RegType::kX86_St        , RegGroup::kX86_St      , 10, 8 , TypeId::kFloat80 );
+ASMJIT_DEFINE_REG_TRAITS(Bnd  , RegType::kX86_Bnd       , RegGroup::kX86_Bnd     , 16, 4 , TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(Tmm  , RegType::kX86_Tmm       , RegGroup::kX86_Tmm     , 0 , 8 , TypeId::kVoid    );
+//! \endcond
+
+//! Register (X86).
+class Reg : public BaseReg {
+public:
+  ASMJIT_DEFINE_ABSTRACT_REG(Reg, BaseReg)
+
+  //! Tests whether the register is a GPB register (8-bit).
+  inline constexpr bool isGpb() const noexcept { return size() == 1; }
+  //! Tests whether the register is a low GPB register (8-bit).
+  inline constexpr bool isGpbLo() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_GpbLo>::kSignature); }
+  //! Tests whether the register is a high GPB register (8-bit).
+  inline constexpr bool isGpbHi() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_GpbHi>::kSignature); }
+  //! Tests whether the register is a GPW register (16-bit).
+  inline constexpr bool isGpw() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Gpw>::kSignature); }
+  //! Tests whether the register is a GPD register (32-bit).
+  inline constexpr bool isGpd() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Gpd>::kSignature); }
+  //! Tests whether the register is a GPQ register (64-bit).
+  inline constexpr bool isGpq() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Gpq>::kSignature); }
+  //! Tests whether the register is an XMM register (128-bit).
+  inline constexpr bool isXmm() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Xmm>::kSignature); }
+  //! Tests whether the register is a YMM register (256-bit).
+  inline constexpr bool isYmm() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Ymm>::kSignature); }
+  //! Tests whether the register is a ZMM register (512-bit).
+  inline constexpr bool isZmm() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Zmm>::kSignature); }
+  //! Tests whether the register is an MMX register (64-bit).
+  inline constexpr bool isMm() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Mm>::kSignature); }
+  //! Tests whether the register is a K register (64-bit).
+  inline constexpr bool isKReg() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_KReg>::kSignature); }
+  //! Tests whether the register is a segment register.
+  inline constexpr bool isSReg() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_SReg>::kSignature); }
+  //! Tests whether the register is a control register.
+  inline constexpr bool isCReg() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_CReg>::kSignature); }
+  //! Tests whether the register is a debug register.
+  inline constexpr bool isDReg() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_DReg>::kSignature); }
+  //! Tests whether the register is an FPU register (80-bit).
+  inline constexpr bool isSt() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_St>::kSignature); }
+  //! Tests whether the register is a bound register.
+  inline constexpr bool isBnd() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Bnd>::kSignature); }
+  //! Tests whether the register is a TMM register.
+  inline constexpr bool isTmm() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Tmm>::kSignature); }
+  //! Tests whether the register is RIP.
+  inline constexpr bool isRip() const noexcept { return hasBaseSignature(RegTraits<RegType::kX86_Rip>::kSignature); }
+
+  template<RegType REG_TYPE>
+  inline void setRegT(uint32_t rId) noexcept {
+    setSignature(OperandSignature{RegTraits<REG_TYPE>::kSignature});
+    setId(rId);
+  }
+
+  inline void setTypeAndId(RegType type, uint32_t id) noexcept {
+    setSignature(signatureOf(type));
+    setId(id);
+  }
+
+  static inline RegGroup groupOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kX86).regTypeToGroup(type); }
+  static inline TypeId typeIdOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kX86).regTypeToTypeId(type); }
+  static inline OperandSignature signatureOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kX86).regTypeToSignature(type); }
+
+  template<RegType REG_TYPE>
+  static inline RegGroup groupOfT() noexcept { return RegGroup(RegTraits<REG_TYPE>::kGroup); }
+
+  template<RegType REG_TYPE>
+  static inline TypeId typeIdOfT() noexcept { return TypeId(RegTraits<REG_TYPE>::kTypeId); }
+
+  template<RegType REG_TYPE>
+  static inline OperandSignature signatureOfT() noexcept { return OperandSignature{RegTraits<REG_TYPE>::kSignature}; }
+
+  static inline OperandSignature signatureOfVecByType(TypeId typeId) noexcept {
+    return OperandSignature{typeId <= TypeId::_kVec128End ? uint32_t(RegTraits<RegType::kX86_Xmm>::kSignature) :
+                            typeId <= TypeId::_kVec256End ? uint32_t(RegTraits<RegType::kX86_Ymm>::kSignature) :
+                                                            uint32_t(RegTraits<RegType::kX86_Zmm>::kSignature)};
+  }
+
+  static inline OperandSignature signatureOfVecBySize(uint32_t size) noexcept {
+    return OperandSignature{size <= 16 ? uint32_t(RegTraits<RegType::kX86_Xmm>::kSignature) :
+                            size <= 32 ? uint32_t(RegTraits<RegType::kX86_Ymm>::kSignature) :
+                                         uint32_t(RegTraits<RegType::kX86_Zmm>::kSignature)};
+  }
+
+  //! Tests whether the `op` operand is either a low or high 8-bit GPB register.
+  static inline bool isGpb(const Operand_& op) noexcept {
+    // Check operand type, register group, and size. Not interested in register type.
+    return op.signature().subset(Signature::kOpTypeMask | Signature::kRegGroupMask | Signature::kSizeMask) ==
+           (Signature::fromOpType(OperandType::kReg) | Signature::fromRegGroup(RegGroup::kGp) | Signature::fromSize(1));
+  }
+
+  static inline bool isGpbLo(const Operand_& op) noexcept { return op.as<Reg>().isGpbLo(); }
+  static inline bool isGpbHi(const Operand_& op) noexcept { return op.as<Reg>().isGpbHi(); }
+  static inline bool isGpw(const Operand_& op) noexcept { return op.as<Reg>().isGpw(); }
+  static inline bool isGpd(const Operand_& op) noexcept { return op.as<Reg>().isGpd(); }
+  static inline bool isGpq(const Operand_& op) noexcept { return op.as<Reg>().isGpq(); }
+  static inline bool isXmm(const Operand_& op) noexcept { return op.as<Reg>().isXmm(); }
+  static inline bool isYmm(const Operand_& op) noexcept { return op.as<Reg>().isYmm(); }
+  static inline bool isZmm(const Operand_& op) noexcept { return op.as<Reg>().isZmm(); }
+  static inline bool isMm(const Operand_& op) noexcept { return op.as<Reg>().isMm(); }
+  static inline bool isKReg(const Operand_& op) noexcept { return op.as<Reg>().isKReg(); }
+  static inline bool isSReg(const Operand_& op) noexcept { return op.as<Reg>().isSReg(); }
+  static inline bool isCReg(const Operand_& op) noexcept { return op.as<Reg>().isCReg(); }
+  static inline bool isDReg(const Operand_& op) noexcept { return op.as<Reg>().isDReg(); }
+  static inline bool isSt(const Operand_& op) noexcept { return op.as<Reg>().isSt(); }
+  static inline bool isBnd(const Operand_& op) noexcept { return op.as<Reg>().isBnd(); }
+  static inline bool isTmm(const Operand_& op) noexcept { return op.as<Reg>().isTmm(); }
+  static inline bool isRip(const Operand_& op) noexcept { return op.as<Reg>().isRip(); }
+
+  static inline bool isGpb(const Operand_& op, uint32_t rId) noexcept { return isGpb(op) & (op.id() == rId); }
+  static inline bool isGpbLo(const Operand_& op, uint32_t rId) noexcept { return isGpbLo(op) & (op.id() == rId); }
+  static inline bool isGpbHi(const Operand_& op, uint32_t rId) noexcept { return isGpbHi(op) & (op.id() == rId); }
+  static inline bool isGpw(const Operand_& op, uint32_t rId) noexcept { return isGpw(op) & (op.id() == rId); }
+  static inline bool isGpd(const Operand_& op, uint32_t rId) noexcept { return isGpd(op) & (op.id() == rId); }
+  static inline bool isGpq(const Operand_& op, uint32_t rId) noexcept { return isGpq(op) & (op.id() == rId); }
+  static inline bool isXmm(const Operand_& op, uint32_t rId) noexcept { return isXmm(op) & (op.id() == rId); }
+  static inline bool isYmm(const Operand_& op, uint32_t rId) noexcept { return isYmm(op) & (op.id() == rId); }
+  static inline bool isZmm(const Operand_& op, uint32_t rId) noexcept { return isZmm(op) & (op.id() == rId); }
+  static inline bool isMm(const Operand_& op, uint32_t rId) noexcept { return isMm(op) & (op.id() == rId); }
+  static inline bool isKReg(const Operand_& op, uint32_t rId) noexcept { return isKReg(op) & (op.id() == rId); }
+  static inline bool isSReg(const Operand_& op, uint32_t rId) noexcept { return isSReg(op) & (op.id() == rId); }
+  static inline bool isCReg(const Operand_& op, uint32_t rId) noexcept { return isCReg(op) & (op.id() == rId); }
+  static inline bool isDReg(const Operand_& op, uint32_t rId) noexcept { return isDReg(op) & (op.id() == rId); }
+  static inline bool isSt(const Operand_& op, uint32_t rId) noexcept { return isSt(op) & (op.id() == rId); }
+  static inline bool isBnd(const Operand_& op, uint32_t rId) noexcept { return isBnd(op) & (op.id() == rId); }
+  static inline bool isTmm(const Operand_& op, uint32_t rId) noexcept { return isTmm(op) & (op.id() == rId); }
+  static inline bool isRip(const Operand_& op, uint32_t rId) noexcept { return isRip(op) & (op.id() == rId); }
+};
+
+//! General purpose register (X86).
+class Gp : public Reg {
+public:
+  ASMJIT_DEFINE_ABSTRACT_REG(Gp, Reg)
+
+  //! Physical id (X86).
+  //!
+  //! \note Register indexes have been reduced to only support general purpose registers. There is no need to
+  //! have enumerations with number suffix that expands to the exactly same value as the suffix value itself.
+  enum Id : uint32_t {
+    kIdAx  = 0,  //!< Physical id of AL|AH|AX|EAX|RAX registers.
+    kIdCx  = 1,  //!< Physical id of CL|CH|CX|ECX|RCX registers.
+    kIdDx  = 2,  //!< Physical id of DL|DH|DX|EDX|RDX registers.
+    kIdBx  = 3,  //!< Physical id of BL|BH|BX|EBX|RBX registers.
+    kIdSp  = 4,  //!< Physical id of SPL|SP|ESP|RSP registers.
+    kIdBp  = 5,  //!< Physical id of BPL|BP|EBP|RBP registers.
+    kIdSi  = 6,  //!< Physical id of SIL|SI|ESI|RSI registers.
+    kIdDi  = 7,  //!< Physical id of DIL|DI|EDI|RDI registers.
+    kIdR8  = 8,  //!< Physical id of R8B|R8W|R8D|R8 registers (64-bit only).
+    kIdR9  = 9,  //!< Physical id of R9B|R9W|R9D|R9 registers (64-bit only).
+    kIdR10 = 10, //!< Physical id of R10B|R10W|R10D|R10 registers (64-bit only).
+    kIdR11 = 11, //!< Physical id of R11B|R11W|R11D|R11 registers (64-bit only).
+    kIdR12 = 12, //!< Physical id of R12B|R12W|R12D|R12 registers (64-bit only).
+    kIdR13 = 13, //!< Physical id of R13B|R13W|R13D|R13 registers (64-bit only).
+    kIdR14 = 14, //!< Physical id of R14B|R14W|R14D|R14 registers (64-bit only).
+    kIdR15 = 15  //!< Physical id of R15B|R15W|R15D|R15 registers (64-bit only).
+  };
+
+  //! Casts this register to 8-bit (LO) part.
+  inline GpbLo r8() const noexcept;
+  //! Casts this register to 8-bit (LO) part.
+  inline GpbLo r8Lo() const noexcept;
+  //! Casts this register to 8-bit (HI) part.
+  inline GpbHi r8Hi() const noexcept;
+  //! Casts this register to 16-bit.
+  inline Gpw r16() const noexcept;
+  //! Casts this register to 32-bit.
+  inline Gpd r32() const noexcept;
+  //! Casts this register to 64-bit.
+  inline Gpq r64() const noexcept;
+};
+
+//! Vector register (XMM|YMM|ZMM) (X86).
+class Vec : public Reg {
+  ASMJIT_DEFINE_ABSTRACT_REG(Vec, Reg)
+
+  //! Casts this register to XMM (clone).
+  inline Xmm xmm() const noexcept;
+  //! Casts this register to YMM.
+  inline Ymm ymm() const noexcept;
+  //! Casts this register to ZMM.
+  inline Zmm zmm() const noexcept;
+
+  //! Casts this register to a register that has half the size (or XMM if it's already XMM).
+  inline Vec half() const noexcept {
+    return Vec(type() == RegType::kX86_Zmm ? signatureOfT<RegType::kX86_Ymm>() : signatureOfT<RegType::kX86_Xmm>(), id());
+  }
+};
+
+//! Segment register (X86).
+class SReg : public Reg {
+  ASMJIT_DEFINE_FINAL_REG(SReg, Reg, RegTraits<RegType::kX86_SReg>)
+
+  //! X86 segment id.
+  enum Id : uint32_t {
+    //! No segment (default).
+    kIdNone = 0,
+    //! ES segment.
+    kIdEs = 1,
+    //! CS segment.
+    kIdCs = 2,
+    //! SS segment.
+    kIdSs = 3,
+    //! DS segment.
+    kIdDs = 4,
+    //! FS segment.
+    kIdFs = 5,
+    //! GS segment.
+    kIdGs = 6,
+
+    //! Count of X86 segment registers supported by AsmJit.
+    //!
+    //! \note X86 architecture has 6 segment registers - ES, CS, SS, DS, FS, GS. X64 architecture lowers them down to
+    //! just FS and GS. AsmJit supports 7 segment registers - all addressable in both X86 and X64 modes and one extra
+    //! called `SReg::kIdNone`, which is AsmJit specific and means that there is no segment register specified.
+    kIdCount = 7
+  };
+};
+
+//! GPB low or high register (X86).
+class Gpb : public Gp { ASMJIT_DEFINE_ABSTRACT_REG(Gpb, Gp) };
+//! GPB low register (X86).
+class GpbLo : public Gpb { ASMJIT_DEFINE_FINAL_REG(GpbLo, Gpb, RegTraits<RegType::kX86_GpbLo>) };
+//! GPB high register (X86).
+class GpbHi : public Gpb { ASMJIT_DEFINE_FINAL_REG(GpbHi, Gpb, RegTraits<RegType::kX86_GpbHi>) };
+//! GPW register (X86).
+class Gpw : public Gp { ASMJIT_DEFINE_FINAL_REG(Gpw, Gp, RegTraits<RegType::kX86_Gpw>) };
+//! GPD register (X86).
+class Gpd : public Gp { ASMJIT_DEFINE_FINAL_REG(Gpd, Gp, RegTraits<RegType::kX86_Gpd>) };
+//! GPQ register (X86_64).
+class Gpq : public Gp { ASMJIT_DEFINE_FINAL_REG(Gpq, Gp, RegTraits<RegType::kX86_Gpq>) };
+
+//! 128-bit XMM register (SSE+).
+class Xmm : public Vec {
+  ASMJIT_DEFINE_FINAL_REG(Xmm, Vec, RegTraits<RegType::kX86_Xmm>)
+  //! Casts this register to a register that has half the size (XMM).
+  inline Xmm half() const noexcept { return Xmm(id()); }
+};
+
+//! 256-bit YMM register (AVX+).
+class Ymm : public Vec {
+  ASMJIT_DEFINE_FINAL_REG(Ymm, Vec, RegTraits<RegType::kX86_Ymm>)
+  //! Casts this register to a register that has half the size (XMM).
+  inline Xmm half() const noexcept { return Xmm(id()); }
+};
+
+//! 512-bit ZMM register (AVX512+).
+class Zmm : public Vec {
+  ASMJIT_DEFINE_FINAL_REG(Zmm, Vec, RegTraits<RegType::kX86_Zmm>)
+  //! Casts this register to a register that has half the size (YMM).
+  inline Ymm half() const noexcept { return Ymm(id()); }
+};
+
+//! 64-bit MMX register (MMX+).
+class Mm : public Reg { ASMJIT_DEFINE_FINAL_REG(Mm, Reg, RegTraits<RegType::kX86_Mm>) };
+//! 64-bit K register (AVX512+).
+class KReg : public Reg { ASMJIT_DEFINE_FINAL_REG(KReg, Reg, RegTraits<RegType::kX86_KReg>) };
+//! 32-bit or 64-bit control register (X86).
+class CReg : public Reg { ASMJIT_DEFINE_FINAL_REG(CReg, Reg, RegTraits<RegType::kX86_CReg>) };
+//! 32-bit or 64-bit debug register (X86).
+class DReg : public Reg { ASMJIT_DEFINE_FINAL_REG(DReg, Reg, RegTraits<RegType::kX86_DReg>) };
+//! 80-bit FPU register (X86).
+class St : public Reg { ASMJIT_DEFINE_FINAL_REG(St, Reg, RegTraits<RegType::kX86_St>) };
+//! 128-bit BND register (BND+).
+class Bnd : public Reg { ASMJIT_DEFINE_FINAL_REG(Bnd, Reg, RegTraits<RegType::kX86_Bnd>) };
+//! 8192-bit TMM register (AMX).
+class Tmm : public Reg { ASMJIT_DEFINE_FINAL_REG(Tmm, Reg, RegTraits<RegType::kX86_Tmm>) };
+//! RIP register (X86).
+class Rip : public Reg { ASMJIT_DEFINE_FINAL_REG(Rip, Reg, RegTraits<RegType::kX86_Rip>) };
+
+//! \cond
+inline GpbLo Gp::r8() const noexcept { return GpbLo(id()); }
+inline GpbLo Gp::r8Lo() const noexcept { return GpbLo(id()); }
+inline GpbHi Gp::r8Hi() const noexcept { return GpbHi(id()); }
+inline Gpw Gp::r16() const noexcept { return Gpw(id()); }
+inline Gpd Gp::r32() const noexcept { return Gpd(id()); }
+inline Gpq Gp::r64() const noexcept { return Gpq(id()); }
+inline Xmm Vec::xmm() const noexcept { return Xmm(id()); }
+inline Ymm Vec::ymm() const noexcept { return Ymm(id()); }
+inline Zmm Vec::zmm() const noexcept { return Zmm(id()); }
+//! \endcond
+
+//! \namespace asmjit::x86::regs
+//!
+//! Registers provided by X86 and X64 ISAs are in both `asmjit::x86` and
+//! `asmjit::x86::regs` namespaces so they can be included with using directive.
+//! For example `using namespace asmjit::x86::regs` would include all registers,
+//! but not other X86-specific API, whereas `using namespace asmjit::x86` would
+//! include everything X86-specific.
+#ifndef _DOXYGEN
+namespace regs {
+#endif
+
+//! Creates an 8-bit low GPB register operand.
+static constexpr GpbLo gpb(uint32_t rId) noexcept { return GpbLo(rId); }
+//! Creates an 8-bit low GPB register operand.
+static constexpr GpbLo gpb_lo(uint32_t rId) noexcept { return GpbLo(rId); }
+//! Creates an 8-bit high GPB register operand.
+static constexpr GpbHi gpb_hi(uint32_t rId) noexcept { return GpbHi(rId); }
+//! Creates a 16-bit GPW register operand.
+static constexpr Gpw gpw(uint32_t rId) noexcept { return Gpw(rId); }
+//! Creates a 32-bit GPD register operand.
+static constexpr Gpd gpd(uint32_t rId) noexcept { return Gpd(rId); }
+//! Creates a 64-bit GPQ register operand (64-bit).
+static constexpr Gpq gpq(uint32_t rId) noexcept { return Gpq(rId); }
+//! Creates a 128-bit XMM register operand.
+static constexpr Xmm xmm(uint32_t rId) noexcept { return Xmm(rId); }
+//! Creates a 256-bit YMM register operand.
+static constexpr Ymm ymm(uint32_t rId) noexcept { return Ymm(rId); }
+//! Creates a 512-bit ZMM register operand.
+static constexpr Zmm zmm(uint32_t rId) noexcept { return Zmm(rId); }
+//! Creates a 64-bit Mm register operand.
+static constexpr Mm mm(uint32_t rId) noexcept { return Mm(rId); }
+//! Creates a 64-bit K register operand.
+static constexpr KReg k(uint32_t rId) noexcept { return KReg(rId); }
+//! Creates a 32-bit or 64-bit control register operand.
+static constexpr CReg cr(uint32_t rId) noexcept { return CReg(rId); }
+//! Creates a 32-bit or 64-bit debug register operand.
+static constexpr DReg dr(uint32_t rId) noexcept { return DReg(rId); }
+//! Creates an 80-bit st register operand.
+static constexpr St st(uint32_t rId) noexcept { return St(rId); }
+//! Creates a 128-bit bound register operand.
+static constexpr Bnd bnd(uint32_t rId) noexcept { return Bnd(rId); }
+//! Creates a TMM register operand.
+static constexpr Tmm tmm(uint32_t rId) noexcept { return Tmm(rId); }
+
+static constexpr GpbLo al = GpbLo(Gp::kIdAx);
+static constexpr GpbLo bl = GpbLo(Gp::kIdBx);
+static constexpr GpbLo cl = GpbLo(Gp::kIdCx);
+static constexpr GpbLo dl = GpbLo(Gp::kIdDx);
+static constexpr GpbLo spl = GpbLo(Gp::kIdSp);
+static constexpr GpbLo bpl = GpbLo(Gp::kIdBp);
+static constexpr GpbLo sil = GpbLo(Gp::kIdSi);
+static constexpr GpbLo dil = GpbLo(Gp::kIdDi);
+static constexpr GpbLo r8b = GpbLo(Gp::kIdR8);
+static constexpr GpbLo r9b = GpbLo(Gp::kIdR9);
+static constexpr GpbLo r10b = GpbLo(Gp::kIdR10);
+static constexpr GpbLo r11b = GpbLo(Gp::kIdR11);
+static constexpr GpbLo r12b = GpbLo(Gp::kIdR12);
+static constexpr GpbLo r13b = GpbLo(Gp::kIdR13);
+static constexpr GpbLo r14b = GpbLo(Gp::kIdR14);
+static constexpr GpbLo r15b = GpbLo(Gp::kIdR15);
+
+static constexpr GpbHi ah = GpbHi(Gp::kIdAx);
+static constexpr GpbHi bh = GpbHi(Gp::kIdBx);
+static constexpr GpbHi ch = GpbHi(Gp::kIdCx);
+static constexpr GpbHi dh = GpbHi(Gp::kIdDx);
+
+static constexpr Gpw ax = Gpw(Gp::kIdAx);
+static constexpr Gpw bx = Gpw(Gp::kIdBx);
+static constexpr Gpw cx = Gpw(Gp::kIdCx);
+static constexpr Gpw dx = Gpw(Gp::kIdDx);
+static constexpr Gpw sp = Gpw(Gp::kIdSp);
+static constexpr Gpw bp = Gpw(Gp::kIdBp);
+static constexpr Gpw si = Gpw(Gp::kIdSi);
+static constexpr Gpw di = Gpw(Gp::kIdDi);
+static constexpr Gpw r8w = Gpw(Gp::kIdR8);
+static constexpr Gpw r9w = Gpw(Gp::kIdR9);
+static constexpr Gpw r10w = Gpw(Gp::kIdR10);
+static constexpr Gpw r11w = Gpw(Gp::kIdR11);
+static constexpr Gpw r12w = Gpw(Gp::kIdR12);
+static constexpr Gpw r13w = Gpw(Gp::kIdR13);
+static constexpr Gpw r14w = Gpw(Gp::kIdR14);
+static constexpr Gpw r15w = Gpw(Gp::kIdR15);
+
+static constexpr Gpd eax = Gpd(Gp::kIdAx);
+static constexpr Gpd ebx = Gpd(Gp::kIdBx);
+static constexpr Gpd ecx = Gpd(Gp::kIdCx);
+static constexpr Gpd edx = Gpd(Gp::kIdDx);
+static constexpr Gpd esp = Gpd(Gp::kIdSp);
+static constexpr Gpd ebp = Gpd(Gp::kIdBp);
+static constexpr Gpd esi = Gpd(Gp::kIdSi);
+static constexpr Gpd edi = Gpd(Gp::kIdDi);
+static constexpr Gpd r8d = Gpd(Gp::kIdR8);
+static constexpr Gpd r9d = Gpd(Gp::kIdR9);
+static constexpr Gpd r10d = Gpd(Gp::kIdR10);
+static constexpr Gpd r11d = Gpd(Gp::kIdR11);
+static constexpr Gpd r12d = Gpd(Gp::kIdR12);
+static constexpr Gpd r13d = Gpd(Gp::kIdR13);
+static constexpr Gpd r14d = Gpd(Gp::kIdR14);
+static constexpr Gpd r15d = Gpd(Gp::kIdR15);
+
+static constexpr Gpq rax = Gpq(Gp::kIdAx);
+static constexpr Gpq rbx = Gpq(Gp::kIdBx);
+static constexpr Gpq rcx = Gpq(Gp::kIdCx);
+static constexpr Gpq rdx = Gpq(Gp::kIdDx);
+static constexpr Gpq rsp = Gpq(Gp::kIdSp);
+static constexpr Gpq rbp = Gpq(Gp::kIdBp);
+static constexpr Gpq rsi = Gpq(Gp::kIdSi);
+static constexpr Gpq rdi = Gpq(Gp::kIdDi);
+static constexpr Gpq r8 = Gpq(Gp::kIdR8);
+static constexpr Gpq r9 = Gpq(Gp::kIdR9);
+static constexpr Gpq r10 = Gpq(Gp::kIdR10);
+static constexpr Gpq r11 = Gpq(Gp::kIdR11);
+static constexpr Gpq r12 = Gpq(Gp::kIdR12);
+static constexpr Gpq r13 = Gpq(Gp::kIdR13);
+static constexpr Gpq r14 = Gpq(Gp::kIdR14);
+static constexpr Gpq r15 = Gpq(Gp::kIdR15);
+
+static constexpr Xmm xmm0 = Xmm(0);
+static constexpr Xmm xmm1 = Xmm(1);
+static constexpr Xmm xmm2 = Xmm(2);
+static constexpr Xmm xmm3 = Xmm(3);
+static constexpr Xmm xmm4 = Xmm(4);
+static constexpr Xmm xmm5 = Xmm(5);
+static constexpr Xmm xmm6 = Xmm(6);
+static constexpr Xmm xmm7 = Xmm(7);
+static constexpr Xmm xmm8 = Xmm(8);
+static constexpr Xmm xmm9 = Xmm(9);
+static constexpr Xmm xmm10 = Xmm(10);
+static constexpr Xmm xmm11 = Xmm(11);
+static constexpr Xmm xmm12 = Xmm(12);
+static constexpr Xmm xmm13 = Xmm(13);
+static constexpr Xmm xmm14 = Xmm(14);
+static constexpr Xmm xmm15 = Xmm(15);
+static constexpr Xmm xmm16 = Xmm(16);
+static constexpr Xmm xmm17 = Xmm(17);
+static constexpr Xmm xmm18 = Xmm(18);
+static constexpr Xmm xmm19 = Xmm(19);
+static constexpr Xmm xmm20 = Xmm(20);
+static constexpr Xmm xmm21 = Xmm(21);
+static constexpr Xmm xmm22 = Xmm(22);
+static constexpr Xmm xmm23 = Xmm(23);
+static constexpr Xmm xmm24 = Xmm(24);
+static constexpr Xmm xmm25 = Xmm(25);
+static constexpr Xmm xmm26 = Xmm(26);
+static constexpr Xmm xmm27 = Xmm(27);
+static constexpr Xmm xmm28 = Xmm(28);
+static constexpr Xmm xmm29 = Xmm(29);
+static constexpr Xmm xmm30 = Xmm(30);
+static constexpr Xmm xmm31 = Xmm(31);
+
+static constexpr Ymm ymm0 = Ymm(0);
+static constexpr Ymm ymm1 = Ymm(1);
+static constexpr Ymm ymm2 = Ymm(2);
+static constexpr Ymm ymm3 = Ymm(3);
+static constexpr Ymm ymm4 = Ymm(4);
+static constexpr Ymm ymm5 = Ymm(5);
+static constexpr Ymm ymm6 = Ymm(6);
+static constexpr Ymm ymm7 = Ymm(7);
+static constexpr Ymm ymm8 = Ymm(8);
+static constexpr Ymm ymm9 = Ymm(9);
+static constexpr Ymm ymm10 = Ymm(10);
+static constexpr Ymm ymm11 = Ymm(11);
+static constexpr Ymm ymm12 = Ymm(12);
+static constexpr Ymm ymm13 = Ymm(13);
+static constexpr Ymm ymm14 = Ymm(14);
+static constexpr Ymm ymm15 = Ymm(15);
+static constexpr Ymm ymm16 = Ymm(16);
+static constexpr Ymm ymm17 = Ymm(17);
+static constexpr Ymm ymm18 = Ymm(18);
+static constexpr Ymm ymm19 = Ymm(19);
+static constexpr Ymm ymm20 = Ymm(20);
+static constexpr Ymm ymm21 = Ymm(21);
+static constexpr Ymm ymm22 = Ymm(22);
+static constexpr Ymm ymm23 = Ymm(23);
+static constexpr Ymm ymm24 = Ymm(24);
+static constexpr Ymm ymm25 = Ymm(25);
+static constexpr Ymm ymm26 = Ymm(26);
+static constexpr Ymm ymm27 = Ymm(27);
+static constexpr Ymm ymm28 = Ymm(28);
+static constexpr Ymm ymm29 = Ymm(29);
+static constexpr Ymm ymm30 = Ymm(30);
+static constexpr Ymm ymm31 = Ymm(31);
+
+static constexpr Zmm zmm0 = Zmm(0);
+static constexpr Zmm zmm1 = Zmm(1);
+static constexpr Zmm zmm2 = Zmm(2);
+static constexpr Zmm zmm3 = Zmm(3);
+static constexpr Zmm zmm4 = Zmm(4);
+static constexpr Zmm zmm5 = Zmm(5);
+static constexpr Zmm zmm6 = Zmm(6);
+static constexpr Zmm zmm7 = Zmm(7);
+static constexpr Zmm zmm8 = Zmm(8);
+static constexpr Zmm zmm9 = Zmm(9);
+static constexpr Zmm zmm10 = Zmm(10);
+static constexpr Zmm zmm11 = Zmm(11);
+static constexpr Zmm zmm12 = Zmm(12);
+static constexpr Zmm zmm13 = Zmm(13);
+static constexpr Zmm zmm14 = Zmm(14);
+static constexpr Zmm zmm15 = Zmm(15);
+static constexpr Zmm zmm16 = Zmm(16);
+static constexpr Zmm zmm17 = Zmm(17);
+static constexpr Zmm zmm18 = Zmm(18);
+static constexpr Zmm zmm19 = Zmm(19);
+static constexpr Zmm zmm20 = Zmm(20);
+static constexpr Zmm zmm21 = Zmm(21);
+static constexpr Zmm zmm22 = Zmm(22);
+static constexpr Zmm zmm23 = Zmm(23);
+static constexpr Zmm zmm24 = Zmm(24);
+static constexpr Zmm zmm25 = Zmm(25);
+static constexpr Zmm zmm26 = Zmm(26);
+static constexpr Zmm zmm27 = Zmm(27);
+static constexpr Zmm zmm28 = Zmm(28);
+static constexpr Zmm zmm29 = Zmm(29);
+static constexpr Zmm zmm30 = Zmm(30);
+static constexpr Zmm zmm31 = Zmm(31);
+
+static constexpr Mm mm0 = Mm(0);
+static constexpr Mm mm1 = Mm(1);
+static constexpr Mm mm2 = Mm(2);
+static constexpr Mm mm3 = Mm(3);
+static constexpr Mm mm4 = Mm(4);
+static constexpr Mm mm5 = Mm(5);
+static constexpr Mm mm6 = Mm(6);
+static constexpr Mm mm7 = Mm(7);
+
+static constexpr KReg k0 = KReg(0);
+static constexpr KReg k1 = KReg(1);
+static constexpr KReg k2 = KReg(2);
+static constexpr KReg k3 = KReg(3);
+static constexpr KReg k4 = KReg(4);
+static constexpr KReg k5 = KReg(5);
+static constexpr KReg k6 = KReg(6);
+static constexpr KReg k7 = KReg(7);
+
+static constexpr SReg no_seg = SReg(SReg::kIdNone);
+static constexpr SReg es = SReg(SReg::kIdEs);
+static constexpr SReg cs = SReg(SReg::kIdCs);
+static constexpr SReg ss = SReg(SReg::kIdSs);
+static constexpr SReg ds = SReg(SReg::kIdDs);
+static constexpr SReg fs = SReg(SReg::kIdFs);
+static constexpr SReg gs = SReg(SReg::kIdGs);
+
+static constexpr CReg cr0 = CReg(0);
+static constexpr CReg cr1 = CReg(1);
+static constexpr CReg cr2 = CReg(2);
+static constexpr CReg cr3 = CReg(3);
+static constexpr CReg cr4 = CReg(4);
+static constexpr CReg cr5 = CReg(5);
+static constexpr CReg cr6 = CReg(6);
+static constexpr CReg cr7 = CReg(7);
+static constexpr CReg cr8 = CReg(8);
+static constexpr CReg cr9 = CReg(9);
+static constexpr CReg cr10 = CReg(10);
+static constexpr CReg cr11 = CReg(11);
+static constexpr CReg cr12 = CReg(12);
+static constexpr CReg cr13 = CReg(13);
+static constexpr CReg cr14 = CReg(14);
+static constexpr CReg cr15 = CReg(15);
+
+static constexpr DReg dr0 = DReg(0);
+static constexpr DReg dr1 = DReg(1);
+static constexpr DReg dr2 = DReg(2);
+static constexpr DReg dr3 = DReg(3);
+static constexpr DReg dr4 = DReg(4);
+static constexpr DReg dr5 = DReg(5);
+static constexpr DReg dr6 = DReg(6);
+static constexpr DReg dr7 = DReg(7);
+static constexpr DReg dr8 = DReg(8);
+static constexpr DReg dr9 = DReg(9);
+static constexpr DReg dr10 = DReg(10);
+static constexpr DReg dr11 = DReg(11);
+static constexpr DReg dr12 = DReg(12);
+static constexpr DReg dr13 = DReg(13);
+static constexpr DReg dr14 = DReg(14);
+static constexpr DReg dr15 = DReg(15);
+
+static constexpr St st0 = St(0);
+static constexpr St st1 = St(1);
+static constexpr St st2 = St(2);
+static constexpr St st3 = St(3);
+static constexpr St st4 = St(4);
+static constexpr St st5 = St(5);
+static constexpr St st6 = St(6);
+static constexpr St st7 = St(7);
+
+static constexpr Bnd bnd0 = Bnd(0);
+static constexpr Bnd bnd1 = Bnd(1);
+static constexpr Bnd bnd2 = Bnd(2);
+static constexpr Bnd bnd3 = Bnd(3);
+
+static constexpr Tmm tmm0 = Tmm(0);
+static constexpr Tmm tmm1 = Tmm(1);
+static constexpr Tmm tmm2 = Tmm(2);
+static constexpr Tmm tmm3 = Tmm(3);
+static constexpr Tmm tmm4 = Tmm(4);
+static constexpr Tmm tmm5 = Tmm(5);
+static constexpr Tmm tmm6 = Tmm(6);
+static constexpr Tmm tmm7 = Tmm(7);
+
+static constexpr Rip rip = Rip(0);
+
+#ifndef _DOXYGEN
+} // {regs}
+
+// Make `x86::regs` accessible through `x86` namespace as well.
+using namespace regs;
+#endif
+
+//! Memory operand specific to X86 and X86_64 architecture.
+class Mem : public BaseMem {
+public:
+  //! \name Constants
+  //! \{
+
+  //! Additional bits of operand's signature used by `x86::Mem`.
+  enum AdditionalBits : uint32_t {
+    // Memory address type (2 bits).
+    // |........|........|XX......|........|
+    kSignatureMemAddrTypeShift = 14,
+    kSignatureMemAddrTypeMask = 0x03u << kSignatureMemAddrTypeShift,
+
+    // Memory shift amount (2 bits).
+    // |........|......XX|........|........|
+    kSignatureMemShiftValueShift = 16,
+    kSignatureMemShiftValueMask = 0x03u << kSignatureMemShiftValueShift,
+
+    // Memory segment reg (3 bits).
+    // |........|...XXX..|........|........|
+    kSignatureMemSegmentShift = 18,
+    kSignatureMemSegmentMask = 0x07u << kSignatureMemSegmentShift,
+
+    // Memory broadcast type (3 bits).
+    // |........|XXX.....|........|........|
+    kSignatureMemBroadcastShift = 21,
+    kSignatureMemBroadcastMask = 0x7u << kSignatureMemBroadcastShift
+  };
+
+  //! Address type.
+  enum class AddrType : uint32_t {
+    //! Default address type, Assembler will select the best type when necessary.
+    kDefault = 0,
+    //! Absolute address type.
+    kAbs = 1,
+    //! Relative address type.
+    kRel = 2,
+
+    //! Maximum value of `AddrType`.
+    kMaxValue = kRel
+  };
+
+  //! Memory broadcast type.
+  enum class Broadcast : uint32_t {
+    //! No broadcast (regular memory operand).
+    kNone = 0,
+    //! Broadcast {1to2}.
+    k1To2 = 1,
+    //! Broadcast {1to4}.
+    k1To4 = 2,
+    //! Broadcast {1to8}.
+    k1To8 = 3,
+    //! Broadcast {1to16}.
+    k1To16 = 4,
+    //! Broadcast {1to32}.
+    k1To32 = 5,
+    //! Broadcast {1to64}.
+    k1To64 = 6,
+
+    //! Maximum value of `Broadcast`.
+    kMaxValue = k1To64
+  };
+
+  //! \}
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Creates a default `Mem` operand that points to [0].
+  inline constexpr Mem() noexcept
+    : BaseMem() {}
+
+  inline constexpr Mem(const Mem& other) noexcept
+    : BaseMem(other) {}
+
+  inline explicit Mem(Globals::NoInit_) noexcept
+    : BaseMem(Globals::NoInit) {}
+
+  inline constexpr Mem(const Signature& signature, uint32_t baseId, uint32_t indexId, int32_t offset) noexcept
+    : BaseMem(signature, baseId, indexId, offset) {}
+
+  inline constexpr Mem(const Label& base, int32_t off, uint32_t size = 0, Signature signature = OperandSignature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(RegType::kLabelTag) |
+              Signature::fromSize(size) |
+              signature, base.id(), 0, off) {}
+
+  inline constexpr Mem(const Label& base, const BaseReg& index, uint32_t shift, int32_t off, uint32_t size = 0, Signature signature = OperandSignature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(RegType::kLabelTag) |
+              Signature::fromMemIndexType(index.type()) |
+              Signature::fromValue<kSignatureMemShiftValueMask>(shift) |
+              Signature::fromSize(size) |
+              signature, base.id(), index.id(), off) {}
+
+  inline constexpr Mem(const BaseReg& base, int32_t off, uint32_t size = 0, Signature signature = OperandSignature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(base.type()) |
+              Signature::fromSize(size) |
+              signature, base.id(), 0, off) {}
+
+  inline constexpr Mem(const BaseReg& base, const BaseReg& index, uint32_t shift, int32_t off, uint32_t size = 0, Signature signature = OperandSignature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(base.type()) |
+              Signature::fromMemIndexType(index.type()) |
+              Signature::fromValue<kSignatureMemShiftValueMask>(shift) |
+              Signature::fromSize(size) |
+              signature, base.id(), index.id(), off) {}
+
+  inline constexpr explicit Mem(uint64_t base, uint32_t size = 0, Signature signature = OperandSignature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromSize(size) |
+              signature, uint32_t(base >> 32), 0, int32_t(uint32_t(base & 0xFFFFFFFFu))) {}
+
+  inline constexpr Mem(uint64_t base, const BaseReg& index, uint32_t shift = 0, uint32_t size = 0, Signature signature = OperandSignature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemIndexType(index.type()) |
+              Signature::fromValue<kSignatureMemShiftValueMask>(shift) |
+              Signature::fromSize(size) |
+              signature, uint32_t(base >> 32), index.id(), int32_t(uint32_t(base & 0xFFFFFFFFu))) {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline Mem& operator=(const Mem& other) noexcept = default;
+
+  //! \}
+
+  //! \name Clone
+  //! \{
+
+  //! Clones the memory operand.
+  inline constexpr Mem clone() const noexcept { return Mem(*this); }
+
+  //! Creates a new copy of this memory operand adjusted by `off`.
+  inline Mem cloneAdjusted(int64_t off) const noexcept {
+    Mem result(*this);
+    result.addOffset(off);
+    return result;
+  }
+
+  inline constexpr Mem cloneBroadcasted(Broadcast b) const noexcept {
+    return Mem((_signature & ~Signature{kSignatureMemBroadcastMask}) | Signature::fromValue<kSignatureMemBroadcastMask>(b), _baseId, _data[0], int32_t(_data[1]));
+  }
+
+  //! \}
+
+  //! \name Base & Index
+  //! \{
+
+  //! Converts memory `baseType` and `baseId` to `x86::Reg` instance.
+  //!
+  //! The memory must have a valid base register otherwise the result will be wrong.
+  inline Reg baseReg() const noexcept { return Reg::fromTypeAndId(baseType(), baseId()); }
+
+  //! Converts memory `indexType` and `indexId` to `x86::Reg` instance.
+  //!
+  //! The memory must have a valid index register otherwise the result will be wrong.
+  inline Reg indexReg() const noexcept { return Reg::fromTypeAndId(indexType(), indexId()); }
+
+  using BaseMem::setIndex;
+
+  inline void setIndex(const BaseReg& index, uint32_t shift) noexcept {
+    setIndex(index);
+    setShift(shift);
+  }
+
+  //! \}
+
+  //! \name Address Type
+  //! \{
+
+  //! Returns the address type of the memory operand.
+  //!
+  //! By default, address type of newly created memory operands is always \ref AddrType::kDefault.
+  inline constexpr AddrType addrType() const noexcept { return (AddrType)_signature.getField<kSignatureMemAddrTypeMask>(); }
+  //! Sets the address type to `addrType`.
+  inline void setAddrType(AddrType addrType) noexcept { _signature.setField<kSignatureMemAddrTypeMask>(uint32_t(addrType)); }
+  //! Resets the address type to \ref AddrType::kDefault.
+  inline void resetAddrType() noexcept { _signature.setField<kSignatureMemAddrTypeMask>(uint32_t(AddrType::kDefault)); }
+
+  //! Tests whether the address type is \ref AddrType::kAbs.
+  inline constexpr bool isAbs() const noexcept { return addrType() == AddrType::kAbs; }
+  //! Sets the address type to \ref AddrType::kAbs.
+  inline void setAbs() noexcept { setAddrType(AddrType::kAbs); }
+
+  //! Tests whether the address type is \ref AddrType::kRel.
+  inline constexpr bool isRel() const noexcept { return addrType() == AddrType::kRel; }
+  //! Sets the address type to \ref AddrType::kRel.
+  inline void setRel() noexcept { setAddrType(AddrType::kRel); }
+
+  //! \}
+
+  //! \name Segment
+  //! \{
+
+  //! Tests whether the memory operand has a segment override.
+  inline constexpr bool hasSegment() const noexcept { return _signature.hasField<kSignatureMemSegmentMask>(); }
+  //! Returns the associated segment override as `SReg` operand.
+  inline constexpr SReg segment() const noexcept { return SReg(segmentId()); }
+  //! Returns segment override register id, see `SReg::Id`.
+  inline constexpr uint32_t segmentId() const noexcept { return _signature.getField<kSignatureMemSegmentMask>(); }
+
+  //! Sets the segment override to `seg`.
+  inline void setSegment(const SReg& seg) noexcept { setSegment(seg.id()); }
+  //! Sets the segment override to `id`.
+  inline void setSegment(uint32_t rId) noexcept { _signature.setField<kSignatureMemSegmentMask>(rId); }
+  //! Resets the segment override.
+  inline void resetSegment() noexcept { _signature.setField<kSignatureMemSegmentMask>(0); }
+
+  //! \}
+
+  //! \name Shift
+  //! \{
+
+  //! Tests whether the memory operand has shift (aka scale) value.
+  inline constexpr bool hasShift() const noexcept { return _signature.hasField<kSignatureMemShiftValueMask>(); }
+  //! Returns the memory operand's shift (aka scale) value.
+  inline constexpr uint32_t shift() const noexcept { return _signature.getField<kSignatureMemShiftValueMask>(); }
+  //! Sets the memory operand's shift (aka scale) value.
+  inline void setShift(uint32_t shift) noexcept { _signature.setField<kSignatureMemShiftValueMask>(shift); }
+  //! Resets the memory operand's shift (aka scale) value to zero.
+  inline void resetShift() noexcept { _signature.setField<kSignatureMemShiftValueMask>(0); }
+
+  //! \}
+
+  //! \name Broadcast
+  //! \{
+
+  //! Tests whether the memory operand has broadcast {1tox}.
+  inline constexpr bool hasBroadcast() const noexcept { return _signature.hasField<kSignatureMemBroadcastMask>(); }
+  //! Returns the memory operand's broadcast.
+  inline constexpr Broadcast getBroadcast() const noexcept { return (Broadcast)_signature.getField<kSignatureMemBroadcastMask>(); }
+  //! Sets the memory operand's broadcast.
+  inline void setBroadcast(Broadcast b) noexcept { _signature.setField<kSignatureMemBroadcastMask>(uint32_t(b)); }
+  //! Resets the memory operand's broadcast to none.
+  inline void resetBroadcast() noexcept { _signature.setField<kSignatureMemBroadcastMask>(0); }
+
+  //! Returns a new `Mem` without a broadcast (the possible broadcast is cleared).
+  inline constexpr Mem _1to1() const noexcept { return cloneBroadcasted(Broadcast::kNone); }
+  //! Returns a new `Mem` with {1to2} broadcast (AVX-512).
+  inline constexpr Mem _1to2() const noexcept { return cloneBroadcasted(Broadcast::k1To2); }
+  //! Returns a new `Mem` with {1to4} broadcast (AVX-512).
+  inline constexpr Mem _1to4() const noexcept { return cloneBroadcasted(Broadcast::k1To4); }
+  //! Returns a new `Mem` with {1to8} broadcast (AVX-512).
+  inline constexpr Mem _1to8() const noexcept { return cloneBroadcasted(Broadcast::k1To8); }
+  //! Returns a new `Mem` with {1to16} broadcast (AVX-512).
+  inline constexpr Mem _1to16() const noexcept { return cloneBroadcasted(Broadcast::k1To16); }
+  //! Returns a new `Mem` with {1to32} broadcast (AVX-512).
+  inline constexpr Mem _1to32() const noexcept { return cloneBroadcasted(Broadcast::k1To32); }
+  //! Returns a new `Mem` with {1to64} broadcast (AVX-512).
+  inline constexpr Mem _1to64() const noexcept { return cloneBroadcasted(Broadcast::k1To64); }
+
+  //! \}
+};
+
+//! Creates `[base.reg + offset]` memory operand.
+static inline constexpr Mem ptr(const Gp& base, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(base, offset, size);
+}
+//! Creates `[base.reg + (index << shift) + offset]` memory operand (scalar index).
+static inline constexpr Mem ptr(const Gp& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, offset, size);
+}
+//! Creates `[base.reg + (index << shift) + offset]` memory operand (vector index).
+static inline constexpr Mem ptr(const Gp& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, offset, size);
+}
+
+//! Creates `[base + offset]` memory operand.
+static inline constexpr Mem ptr(const Label& base, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(base, offset, size);
+}
+//! Creates `[base + (index << shift) + offset]` memory operand.
+static inline constexpr Mem ptr(const Label& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, offset, size);
+}
+//! Creates `[base + (index << shift) + offset]` memory operand.
+static inline constexpr Mem ptr(const Label& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, offset, size);
+}
+
+//! Creates `[rip + offset]` memory operand.
+static inline constexpr Mem ptr(const Rip& rip_, int32_t offset = 0, uint32_t size = 0) noexcept {
+  return Mem(rip_, offset, size);
+}
+
+//! Creates `[base]` absolute memory operand.
+static inline constexpr Mem ptr(uint64_t base, uint32_t size = 0) noexcept {
+  return Mem(base, size);
+}
+//! Creates `[base + (index.reg << shift)]` absolute memory operand.
+static inline constexpr Mem ptr(uint64_t base, const Reg& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, size);
+}
+//! Creates `[base + (index.reg << shift)]` absolute memory operand.
+static inline constexpr Mem ptr(uint64_t base, const Vec& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, size);
+}
+
+//! Creates `[base]` absolute memory operand (absolute).
+static inline constexpr Mem ptr_abs(uint64_t base, uint32_t size = 0) noexcept {
+  return Mem(base, size, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs));
+}
+//! Creates `[base + (index.reg << shift)]` absolute memory operand (absolute).
+static inline constexpr Mem ptr_abs(uint64_t base, const Reg& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, size, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs));
+}
+//! Creates `[base + (index.reg << shift)]` absolute memory operand (absolute).
+static inline constexpr Mem ptr_abs(uint64_t base, const Vec& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, size, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs));
+}
+
+//! Creates `[base]` relative memory operand (relative).
+static inline constexpr Mem ptr_rel(uint64_t base, uint32_t size = 0) noexcept {
+  return Mem(base, size, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel));
+}
+//! Creates `[base + (index.reg << shift)]` relative memory operand (relative).
+static inline constexpr Mem ptr_rel(uint64_t base, const Reg& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, size, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel));
+}
+//! Creates `[base + (index.reg << shift)]` relative memory operand (relative).
+static inline constexpr Mem ptr_rel(uint64_t base, const Vec& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
+  return Mem(base, index, shift, size, OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel));
+}
+
+#define ASMJIT_MEM_PTR(FUNC, SIZE)                                                    \
+  static constexpr Mem FUNC(const Gp& base, int32_t offset = 0) noexcept {            \
+    return Mem(base, offset, SIZE);                                                   \
+  }                                                                                   \
+  static constexpr Mem FUNC(const Gp& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \
+    return Mem(base, index, shift, offset, SIZE);                                     \
+  }                                                                                   \
+  static constexpr Mem FUNC(const Gp& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \
+    return Mem(base, index, shift, offset, SIZE);                                     \
+  }                                                                                   \
+  static constexpr Mem FUNC(const Label& base, int32_t offset = 0) noexcept {         \
+    return Mem(base, offset, SIZE);                                                   \
+  }                                                                                   \
+  static constexpr Mem FUNC(const Label& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \
+    return Mem(base, index, shift, offset, SIZE);                                     \
+  }                                                                                   \
+  static constexpr Mem FUNC(const Rip& rip_, int32_t offset = 0) noexcept {           \
+    return Mem(rip_, offset, SIZE);                                                   \
+  }                                                                                   \
+  static constexpr Mem FUNC(uint64_t base) noexcept {                                 \
+    return Mem(base, SIZE);                                                           \
+  }                                                                                   \
+  static constexpr Mem FUNC(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \
+    return Mem(base, index, shift, SIZE);                                             \
+  }                                                                                   \
+  static constexpr Mem FUNC(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \
+    return Mem(base, index, shift, SIZE);                                             \
+  }                                                                                   \
+                                                                                      \
+  static constexpr Mem FUNC##_abs(uint64_t base) noexcept {                           \
+    return Mem(base, SIZE,                                                            \
+      OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs)); \
+  }                                                                                   \
+  static constexpr Mem FUNC##_abs(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \
+    return Mem(base, index, shift, SIZE,                                              \
+      OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs)); \
+  }                                                                                   \
+  static constexpr Mem FUNC##_abs(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \
+    return Mem(base, index, shift, SIZE,                                              \
+      OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kAbs)); \
+  }                                                                                   \
+                                                                                      \
+  static constexpr Mem FUNC##_rel(uint64_t base) noexcept {                           \
+    return Mem(base, SIZE,                                                            \
+      OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel)); \
+  }                                                                                   \
+  static constexpr Mem FUNC##_rel(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \
+    return Mem(base, index, shift, SIZE,                                              \
+      OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel)); \
+  }                                                                                   \
+  static constexpr Mem FUNC##_rel(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \
+    return Mem(base, index, shift, SIZE,                                              \
+      OperandSignature::fromValue<Mem::kSignatureMemAddrTypeMask>(Mem::AddrType::kRel)); \
+  }
+
+// Definition of memory operand constructors that use platform independent naming.
+ASMJIT_MEM_PTR(ptr_8, 1)
+ASMJIT_MEM_PTR(ptr_16, 2)
+ASMJIT_MEM_PTR(ptr_32, 4)
+ASMJIT_MEM_PTR(ptr_48, 6)
+ASMJIT_MEM_PTR(ptr_64, 8)
+ASMJIT_MEM_PTR(ptr_80, 10)
+ASMJIT_MEM_PTR(ptr_128, 16)
+ASMJIT_MEM_PTR(ptr_256, 32)
+ASMJIT_MEM_PTR(ptr_512, 64)
+
+// Definition of memory operand constructors that use X86-specific convention.
+ASMJIT_MEM_PTR(byte_ptr, 1)
+ASMJIT_MEM_PTR(word_ptr, 2)
+ASMJIT_MEM_PTR(dword_ptr, 4)
+ASMJIT_MEM_PTR(fword_ptr, 6)
+ASMJIT_MEM_PTR(qword_ptr, 8)
+ASMJIT_MEM_PTR(tbyte_ptr, 10)
+ASMJIT_MEM_PTR(tword_ptr, 10)
+ASMJIT_MEM_PTR(oword_ptr, 16)
+ASMJIT_MEM_PTR(dqword_ptr, 16)
+ASMJIT_MEM_PTR(qqword_ptr, 32)
+ASMJIT_MEM_PTR(xmmword_ptr, 16)
+ASMJIT_MEM_PTR(ymmword_ptr, 32)
+ASMJIT_MEM_PTR(zmmword_ptr, 64)
+
+#undef ASMJIT_MEM_PTR
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+//! \cond INTERNAL
+ASMJIT_BEGIN_NAMESPACE
+ASMJIT_DEFINE_TYPE_ID(x86::Gpb, TypeId::kInt8);
+ASMJIT_DEFINE_TYPE_ID(x86::Gpw, TypeId::kInt16);
+ASMJIT_DEFINE_TYPE_ID(x86::Gpd, TypeId::kInt32);
+ASMJIT_DEFINE_TYPE_ID(x86::Gpq, TypeId::kInt64);
+ASMJIT_DEFINE_TYPE_ID(x86::Mm , TypeId::kMmx64);
+ASMJIT_DEFINE_TYPE_ID(x86::Xmm, TypeId::kInt32x4);
+ASMJIT_DEFINE_TYPE_ID(x86::Ymm, TypeId::kInt32x8);
+ASMJIT_DEFINE_TYPE_ID(x86::Zmm, TypeId::kInt32x16);
+ASMJIT_END_NAMESPACE
+//! \endcond
+
+#endif // ASMJIT_X86_X86OPERAND_H_INCLUDED
diff --git a/lib/lepton/asmjit/x86/x86rapass.cpp b/lib/lepton/asmjit/x86/x86rapass.cpp
new file mode 100644
index 0000000000..02870d95ff
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86rapass.cpp
@@ -0,0 +1,1509 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86) && !defined(ASMJIT_NO_COMPILER)
+
+#include "../core/cpuinfo.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../x86/x86assembler.h"
+#include "../x86/x86compiler.h"
+#include "../x86/x86instapi_p.h"
+#include "../x86/x86instdb_p.h"
+#include "../x86/x86emithelper_p.h"
+#include "../x86/x86rapass_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::X86RAPass - Utilities
+// ==========================
+
+static ASMJIT_FORCE_INLINE uint64_t raImmMaskFromSize(uint32_t size) noexcept {
+  ASMJIT_ASSERT(size > 0 && size < 256);
+  static constexpr uint64_t masks[] = {
+    0x00000000000000FFu, //   1
+    0x000000000000FFFFu, //   2
+    0x00000000FFFFFFFFu, //   4
+    0xFFFFFFFFFFFFFFFFu, //   8
+    0x0000000000000000u, //  16
+    0x0000000000000000u, //  32
+    0x0000000000000000u, //  64
+    0x0000000000000000u, // 128
+    0x0000000000000000u  // 256
+  };
+  return masks[Support::ctz(size)];
+}
+
+static const RegMask raConsecutiveLeadCountToRegMaskFilter[5] = {
+  0xFFFFFFFFu, // [0] No consecutive.
+  0x00000000u, // [1] Invalid, never used.
+  0x55555555u, // [2] Even registers.
+  0x00000000u, // [3] Invalid, never used.
+  0x11111111u  // [4] Every fourth register.
+};
+
+static ASMJIT_FORCE_INLINE RATiedFlags raUseOutFlagsFromRWFlags(OpRWFlags rwFlags) noexcept {
+  static constexpr RATiedFlags map[] = {
+    RATiedFlags::kNone,
+    RATiedFlags::kRead  | RATiedFlags::kUse,                       // kRead
+    RATiedFlags::kWrite | RATiedFlags::kOut,                       // kWrite
+    RATiedFlags::kRW    | RATiedFlags::kUse,                       // kRW
+    RATiedFlags::kNone,
+    RATiedFlags::kRead  | RATiedFlags::kUse | RATiedFlags::kUseRM, // kRead  | kRegMem
+    RATiedFlags::kWrite | RATiedFlags::kOut | RATiedFlags::kOutRM, // kWrite | kRegMem
+    RATiedFlags::kRW    | RATiedFlags::kUse | RATiedFlags::kUseRM  // kRW    | kRegMem
+  };
+
+  return map[uint32_t(rwFlags & (OpRWFlags::kRW | OpRWFlags::kRegMem))];
+}
+
+static ASMJIT_FORCE_INLINE RATiedFlags raRegRwFlags(OpRWFlags flags) noexcept {
+  return (RATiedFlags)raUseOutFlagsFromRWFlags(flags);
+}
+
+static ASMJIT_FORCE_INLINE RATiedFlags raMemBaseRwFlags(OpRWFlags flags) noexcept {
+  constexpr uint32_t kShift = Support::ConstCTZ<uint32_t(OpRWFlags::kMemBaseRW)>::value;
+  return (RATiedFlags)raUseOutFlagsFromRWFlags(OpRWFlags(uint32_t(flags) >> kShift) & OpRWFlags::kRW);
+}
+
+static ASMJIT_FORCE_INLINE RATiedFlags raMemIndexRwFlags(OpRWFlags flags) noexcept {
+  constexpr uint32_t kShift = Support::ConstCTZ<uint32_t(OpRWFlags::kMemIndexRW)>::value;
+  return (RATiedFlags)raUseOutFlagsFromRWFlags(OpRWFlags(uint32_t(flags) >> kShift) & OpRWFlags::kRW);
+}
+
+// x86::RACFGBuilder
+// =================
+
+class RACFGBuilder : public RACFGBuilderT<RACFGBuilder> {
+public:
+  Arch _arch;
+  bool _is64Bit;
+  bool _avxEnabled;
+
+  inline RACFGBuilder(X86RAPass* pass) noexcept
+    : RACFGBuilderT<RACFGBuilder>(pass),
+      _arch(pass->cc()->arch()),
+      _is64Bit(pass->registerSize() == 8),
+      _avxEnabled(pass->avxEnabled()) {
+  }
+
+  inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cc); }
+
+  inline uint32_t choose(uint32_t sseInst, uint32_t avxInst) const noexcept {
+    return _avxEnabled ? avxInst : sseInst;
+  }
+
+  Error onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& ib) noexcept;
+
+  Error onBeforeInvoke(InvokeNode* invokeNode) noexcept;
+  Error onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept;
+
+  Error moveVecToPtr(InvokeNode* invokeNode, const FuncValue& arg, const Vec& src, BaseReg* out) noexcept;
+  Error moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept;
+  Error moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept;
+  Error moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept;
+
+  Error onBeforeRet(FuncRetNode* funcRet) noexcept;
+  Error onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept;
+};
+
+// x86::RACFGBuilder - OnInst
+// ==========================
+
+Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& ib) noexcept {
+  InstRWInfo rwInfo;
+
+  InstId instId = inst->id();
+  if (Inst::isDefinedId(instId)) {
+    uint32_t opCount = inst->opCount();
+    const Operand* opArray = inst->operands();
+    ASMJIT_PROPAGATE(InstInternal::queryRWInfo(_arch, inst->baseInst(), opArray, opCount, &rwInfo));
+
+    const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
+    bool hasGpbHiConstraint = false;
+    uint32_t singleRegOps = 0;
+
+    // Copy instruction RW flags to instruction builder except kMovOp, which is propagated manually later.
+    ib.addInstRWFlags(rwInfo.instFlags() & ~InstRWFlags::kMovOp);
+
+    // Mask of all operand types used by the instruction - can be used as an optimization later.
+    uint32_t opTypesMask = 0u;
+
+    if (opCount) {
+      // The mask is for all registers, but we are mostly interested in AVX-512 registers at the moment. The mask
+      // will be combined with all available registers of the Compiler at the end so we it never use more registers
+      // than available.
+      RegMask instructionAllowedRegs = 0xFFFFFFFFu;
+
+      uint32_t consecutiveOffset = 0;
+      uint32_t consecutiveLeadId = Globals::kInvalidId;
+      uint32_t consecutiveParent = Globals::kInvalidId;
+
+      if (instInfo.isEvex()) {
+        // EVEX instruction and VEX instructions that can be encoded with EVEX have the possibility to use 32 SIMD
+        // registers (XMM/YMM/ZMM).
+        if (instInfo.isVex() && !instInfo.isEvexCompatible()) {
+          if (instInfo.isEvexKRegOnly()) {
+            // EVEX encodable only if the first operand is K register (compare instructions).
+            if (!Reg::isKReg(opArray[0]))
+              instructionAllowedRegs = 0xFFFFu;
+          }
+          else if (instInfo.isEvexTwoOpOnly()) {
+            // EVEX encodable only if the instruction has two operands (gather instructions).
+            if (opCount != 2)
+              instructionAllowedRegs = 0xFFFFu;
+          }
+          else {
+            instructionAllowedRegs = 0xFFFFu;
+          }
+        }
+      }
+      else if (instInfo.isEvexTransformable()) {
+        ib.addAggregatedFlags(RATiedFlags::kInst_IsTransformable);
+      }
+      else {
+        // Not EVEX, restrict everything to [0-15] registers.
+        instructionAllowedRegs = 0xFFFFu;
+      }
+
+      for (uint32_t i = 0; i < opCount; i++) {
+        const Operand& op = opArray[i];
+        const OpRWInfo& opRwInfo = rwInfo.operand(i);
+
+        opTypesMask |= 1u << uint32_t(op.opType());
+
+        if (op.isReg()) {
+          // Register Operand
+          // ----------------
+          const Reg& reg = op.as<Reg>();
+
+          RATiedFlags flags = raRegRwFlags(opRwInfo.opFlags());
+          RegMask allowedRegs = instructionAllowedRegs;
+
+          // X86-specific constraints related to LO|HI general purpose registers. This is only required when the
+          // register is part of the encoding. If the register is fixed we won't restrict anything as it doesn't
+          // restrict encoding of other registers.
+          if (reg.isGpb() && !opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
+            flags |= RATiedFlags::kX86_Gpb;
+            if (!_is64Bit) {
+              // Restrict to first four - AL|AH|BL|BH|CL|CH|DL|DH. In 32-bit mode it's not possible to access
+              // SIL|DIL, etc, so this is just enough.
+              allowedRegs = 0x0Fu;
+            }
+            else {
+              // If we encountered GPB-HI register the situation is much more complicated than in 32-bit mode.
+              // We need to patch all registers to not use ID higher than 7 and all GPB-LO registers to not use
+              // index higher than 3. Instead of doing the patching here we just set a flag and will do it later,
+              // to not complicate this loop.
+              if (reg.isGpbHi()) {
+                hasGpbHiConstraint = true;
+                allowedRegs = 0x0Fu;
+              }
+            }
+          }
+
+          uint32_t vIndex = Operand::virtIdToIndex(reg.id());
+          if (vIndex < Operand::kVirtIdCount) {
+            RAWorkReg* workReg;
+            ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+            // Use RW instead of Write in case that not the whole register is overwritten. This is important
+            // for liveness as we cannot kill a register that will be used. For example `mov al, 0xFF` is not
+            // a write-only operation if user allocated the whole `rax` register.
+            if ((flags & RATiedFlags::kRW) == RATiedFlags::kWrite) {
+              if (workReg->regByteMask() & ~(opRwInfo.writeByteMask() | opRwInfo.extendByteMask())) {
+                // Not write-only operation.
+                flags = (flags & ~RATiedFlags::kOut) | (RATiedFlags::kRead | RATiedFlags::kUse);
+              }
+            }
+
+            // Do not use RegMem flag if changing Reg to Mem requires additional CPU feature that may not be enabled.
+            if (rwInfo.rmFeature() && Support::test(flags, RATiedFlags::kUseRM | RATiedFlags::kOutRM)) {
+              flags &= ~(RATiedFlags::kUseRM | RATiedFlags::kOutRM);
+            }
+
+            RegGroup group = workReg->group();
+            RegMask useRegs = _pass->_availableRegs[group] & allowedRegs;
+            RegMask outRegs = useRegs;
+
+            uint32_t useId = BaseReg::kIdBad;
+            uint32_t outId = BaseReg::kIdBad;
+
+            uint32_t useRewriteMask = 0;
+            uint32_t outRewriteMask = 0;
+
+            if (opRwInfo.consecutiveLeadCount()) {
+              // There must be a single consecutive register lead, otherwise the RW data is invalid.
+              if (consecutiveLeadId != Globals::kInvalidId)
+                return DebugUtils::errored(kErrorInvalidState);
+
+              // A consecutive lead register cannot be used as a consecutive +1/+2/+3 register, the registers must be distinct.
+              if (RATiedReg::consecutiveDataFromFlags(flags) != 0)
+                return DebugUtils::errored(kErrorNotConsecutiveRegs);
+
+              flags |= RATiedFlags::kLeadConsecutive | RATiedReg::consecutiveDataToFlags(opRwInfo.consecutiveLeadCount() - 1);
+              consecutiveLeadId = workReg->workId();
+
+              RegMask filter = raConsecutiveLeadCountToRegMaskFilter[opRwInfo.consecutiveLeadCount()];
+              if (Support::test(flags, RATiedFlags::kUse)) {
+                flags |= RATiedFlags::kUseConsecutive;
+                useRegs &= filter;
+              }
+              else {
+                flags |= RATiedFlags::kOutConsecutive;
+                outRegs &= filter;
+              }
+            }
+
+            if (Support::test(flags, RATiedFlags::kUse)) {
+              useRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
+              if (opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
+                useId = opRwInfo.physId();
+                flags |= RATiedFlags::kUseFixed;
+              }
+              else if (opRwInfo.hasOpFlag(OpRWFlags::kConsecutive)) {
+                if (consecutiveLeadId == Globals::kInvalidId)
+                  return DebugUtils::errored(kErrorInvalidState);
+
+                if (consecutiveLeadId == workReg->workId())
+                  return DebugUtils::errored(kErrorOverlappedRegs);
+
+                flags |= RATiedFlags::kUseConsecutive | RATiedReg::consecutiveDataToFlags(++consecutiveOffset);
+              }
+            }
+            else {
+              outRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
+              if (opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
+                outId = opRwInfo.physId();
+                flags |= RATiedFlags::kOutFixed;
+              }
+              else if (opRwInfo.hasOpFlag(OpRWFlags::kConsecutive)) {
+                if (consecutiveLeadId == Globals::kInvalidId)
+                  return DebugUtils::errored(kErrorInvalidState);
+
+                if (consecutiveLeadId == workReg->workId())
+                  return DebugUtils::errored(kErrorOverlappedRegs);
+
+                flags |= RATiedFlags::kOutConsecutive | RATiedReg::consecutiveDataToFlags(++consecutiveOffset);
+              }
+            }
+
+            ASMJIT_PROPAGATE(ib.add(workReg, flags, useRegs, useId, useRewriteMask, outRegs, outId, outRewriteMask, opRwInfo.rmSize(), consecutiveParent));
+            if (singleRegOps == i)
+              singleRegOps++;
+
+            if (Support::test(flags, RATiedFlags::kLeadConsecutive | RATiedFlags::kUseConsecutive | RATiedFlags::kOutConsecutive))
+              consecutiveParent = workReg->workId();
+          }
+        }
+        else if (op.isMem()) {
+          // Memory Operand
+          // --------------
+          const Mem& mem = op.as<Mem>();
+          ib.addForbiddenFlags(RATiedFlags::kUseRM | RATiedFlags::kOutRM);
+
+          if (mem.isRegHome()) {
+            RAWorkReg* workReg;
+            ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(mem.baseId()), &workReg));
+            _pass->getOrCreateStackSlot(workReg);
+          }
+          else if (mem.hasBaseReg()) {
+            uint32_t vIndex = Operand::virtIdToIndex(mem.baseId());
+            if (vIndex < Operand::kVirtIdCount) {
+              RAWorkReg* workReg;
+              ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+              RATiedFlags flags = raMemBaseRwFlags(opRwInfo.opFlags());
+              RegGroup group = workReg->group();
+              RegMask inOutRegs = _pass->_availableRegs[group];
+
+              uint32_t useId = BaseReg::kIdBad;
+              uint32_t outId = BaseReg::kIdBad;
+
+              uint32_t useRewriteMask = 0;
+              uint32_t outRewriteMask = 0;
+
+              if (Support::test(flags, RATiedFlags::kUse)) {
+                useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
+                if (opRwInfo.hasOpFlag(OpRWFlags::kMemPhysId)) {
+                  useId = opRwInfo.physId();
+                  flags |= RATiedFlags::kUseFixed;
+                }
+              }
+              else {
+                outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
+                if (opRwInfo.hasOpFlag(OpRWFlags::kMemPhysId)) {
+                  outId = opRwInfo.physId();
+                  flags |= RATiedFlags::kOutFixed;
+                }
+              }
+
+              ASMJIT_PROPAGATE(ib.add(workReg, flags, inOutRegs, useId, useRewriteMask, inOutRegs, outId, outRewriteMask));
+            }
+          }
+
+          if (mem.hasIndexReg()) {
+            uint32_t vIndex = Operand::virtIdToIndex(mem.indexId());
+            if (vIndex < Operand::kVirtIdCount) {
+              RAWorkReg* workReg;
+              ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+              RATiedFlags flags = raMemIndexRwFlags(opRwInfo.opFlags());
+              RegGroup group = workReg->group();
+              RegMask inOutRegs = _pass->_availableRegs[group] & instructionAllowedRegs;
+
+              // Index registers have never fixed id on X86/x64.
+              const uint32_t useId = BaseReg::kIdBad;
+              const uint32_t outId = BaseReg::kIdBad;
+
+              uint32_t useRewriteMask = 0;
+              uint32_t outRewriteMask = 0;
+
+              if (Support::test(flags, RATiedFlags::kUse))
+                useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
+              else
+                outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
+
+              ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, inOutRegs, useId, useRewriteMask, inOutRegs, outId, outRewriteMask));
+            }
+          }
+        }
+      }
+    }
+
+    // Handle extra operand (either REP {cx|ecx|rcx} or AVX-512 {k} selector).
+    if (inst->hasExtraReg()) {
+      uint32_t vIndex = Operand::virtIdToIndex(inst->extraReg().id());
+      if (vIndex < Operand::kVirtIdCount) {
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+        RegGroup group = workReg->group();
+        RegMask inOutRegs = _pass->_availableRegs[group];
+        uint32_t rewriteMask = Support::bitMask(inst->getRewriteIndex(&inst->extraReg()._id));
+
+        if (group == RegGroup::kX86_K) {
+          // AVX-512 mask selector {k} register - read-only, allocable to any register except {k0}.
+          ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, inOutRegs, BaseReg::kIdBad, rewriteMask, inOutRegs, BaseReg::kIdBad, 0));
+          singleRegOps = 0;
+        }
+        else {
+          // REP {cx|ecx|rcx} register - read & write, allocable to {cx|ecx|rcx} only.
+          ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRW, inOutRegs, Gp::kIdCx, rewriteMask, inOutRegs, Gp::kIdBad, 0));
+        }
+      }
+      else {
+        RegGroup group = inst->extraReg().group();
+        if (group == RegGroup::kX86_K && inst->extraReg().id() != 0)
+          singleRegOps = 0;
+      }
+    }
+
+    // If this instruction has move semantics then check whether it could be eliminated if all virtual registers
+    // are allocated into the same register. Take into account the virtual size of the destination register as that's
+    // more important than a physical register size in this case.
+    if (rwInfo.hasInstFlag(InstRWFlags::kMovOp) && !inst->hasExtraReg() && Support::bitTest(opTypesMask, uint32_t(OperandType::kReg))) {
+      // AVX+ move instructions have 3 operand form - the first two operands must be the same to guarantee move semantics.
+      if (opCount == 2 || (opCount == 3 && opArray[0] == opArray[1])) {
+        uint32_t vIndex = Operand::virtIdToIndex(opArray[0].as<Reg>().id());
+        if (vIndex < Operand::kVirtIdCount) {
+          const VirtReg* vReg = _cc->virtRegByIndex(vIndex);
+          const OpRWInfo& opRwInfo = rwInfo.operand(0);
+
+          uint64_t remainingByteMask = vReg->workReg()->regByteMask() & ~opRwInfo.writeByteMask();
+          if (remainingByteMask == 0u || (remainingByteMask & opRwInfo.extendByteMask()) == 0)
+            ib.addInstRWFlags(InstRWFlags::kMovOp);
+        }
+      }
+    }
+
+    // Handle X86 constraints.
+    if (hasGpbHiConstraint) {
+      for (RATiedReg& tiedReg : ib) {
+        RegMask filter = tiedReg.hasFlag(RATiedFlags::kX86_Gpb) ? 0x0Fu : 0xFFu;
+        tiedReg._useRegMask &= filter;
+        tiedReg._outRegMask &= filter;
+      }
+    }
+
+    if (ib.tiedRegCount() == 1) {
+      // Handle special cases of some instructions where all operands share the same
+      // register. In such case the single operand becomes read-only or write-only.
+      InstSameRegHint sameRegHint = InstSameRegHint::kNone;
+      if (singleRegOps == opCount) {
+        sameRegHint = instInfo.sameRegHint();
+      }
+      else if (opCount == 2 && inst->op(1).isImm()) {
+        // Handle some tricks used by X86 asm.
+        const BaseReg& reg = inst->op(0).as<BaseReg>();
+        const Imm& imm = inst->op(1).as<Imm>();
+
+        const RAWorkReg* workReg = _pass->workRegById(ib[0]->workId());
+        uint32_t workRegSize = workReg->signature().size();
+
+        switch (inst->id()) {
+          case Inst::kIdOr: {
+            // Sets the value of the destination register to -1, previous content unused.
+            if (reg.size() >= 4 || reg.size() >= workRegSize) {
+              if (imm.value() == -1 || imm.valueAs<uint64_t>() == raImmMaskFromSize(reg.size()))
+                sameRegHint = InstSameRegHint::kWO;
+            }
+            ASMJIT_FALLTHROUGH;
+          }
+
+          case Inst::kIdAdd:
+          case Inst::kIdAnd:
+          case Inst::kIdRol:
+          case Inst::kIdRor:
+          case Inst::kIdSar:
+          case Inst::kIdShl:
+          case Inst::kIdShr:
+          case Inst::kIdSub:
+          case Inst::kIdXor: {
+            // Updates [E|R]FLAGS without changing the content.
+            if (reg.size() != 4 || reg.size() >= workRegSize) {
+              if (imm.value() == 0)
+                sameRegHint = InstSameRegHint::kRO;
+            }
+            break;
+          }
+        }
+      }
+
+      switch (sameRegHint) {
+        case InstSameRegHint::kNone:
+          break;
+        case InstSameRegHint::kRO:
+          ib[0]->makeReadOnly();
+          break;
+        case InstSameRegHint::kWO:
+          ib[0]->makeWriteOnly();
+          break;
+      }
+    }
+
+    cf = instInfo.controlFlow();
+  }
+
+  return kErrorOk;
+}
+
+// x86::RACFGBuilder - OnInvoke
+// ============================
+
+Error RACFGBuilder::onBeforeInvoke(InvokeNode* invokeNode) noexcept {
+  const FuncDetail& fd = invokeNode->detail();
+  uint32_t argCount = invokeNode->argCount();
+
+  cc()->_setCursor(invokeNode->prev());
+  RegType nativeRegType = cc()->_gpSignature.regType();
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    const FuncValuePack& argPack = fd.argPack(argIndex);
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      if (!argPack[valueIndex])
+        break;
+
+      const FuncValue& arg = argPack[valueIndex];
+      const Operand& op = invokeNode->arg(argIndex, valueIndex);
+
+      if (op.isNone())
+        continue;
+
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (arg.isReg()) {
+          RegGroup regGroup = workReg->group();
+          RegGroup argGroup = Reg::groupOf(arg.regType());
+
+          if (arg.isIndirect()) {
+            if (reg.isGp()) {
+              if (reg.type() != nativeRegType)
+                return DebugUtils::errored(kErrorInvalidAssignment);
+              // It's considered allocated if this is an indirect argument and the user used GP.
+              continue;
+            }
+
+            BaseReg indirectReg;
+            moveVecToPtr(invokeNode, arg, reg.as<Vec>(), &indirectReg);
+            invokeNode->_args[argIndex][valueIndex] = indirectReg;
+          }
+          else {
+            if (regGroup != argGroup) {
+              // TODO: Conversion is not supported.
+              return DebugUtils::errored(kErrorInvalidAssignment);
+            }
+          }
+        }
+        else {
+          if (arg.isIndirect()) {
+            if (reg.isGp()) {
+              if (reg.type() != nativeRegType)
+                return DebugUtils::errored(kErrorInvalidAssignment);
+
+              ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg));
+              continue;
+            }
+
+            BaseReg indirectReg;
+            moveVecToPtr(invokeNode, arg, reg.as<Vec>(), &indirectReg);
+            ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, indirectReg));
+          }
+          else {
+            ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg));
+          }
+        }
+      }
+      else if (op.isImm()) {
+        if (arg.isReg()) {
+          BaseReg reg;
+          ASMJIT_PROPAGATE(moveImmToRegArg(invokeNode, arg, op.as<Imm>(), &reg));
+          invokeNode->_args[argIndex][valueIndex] = reg;
+        }
+        else {
+          ASMJIT_PROPAGATE(moveImmToStackArg(invokeNode, arg, op.as<Imm>()));
+        }
+      }
+    }
+  }
+
+  cc()->_setCursor(invokeNode);
+  if (fd.hasFlag(CallConvFlags::kCalleePopsStack) && fd.argStackSize() != 0)
+    ASMJIT_PROPAGATE(cc()->sub(cc()->zsp(), fd.argStackSize()));
+
+  if (fd.hasRet()) {
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      const FuncValue& ret = fd.ret(valueIndex);
+      if (!ret)
+        break;
+
+      const Operand& op = invokeNode->ret(valueIndex);
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (ret.isReg()) {
+          if (ret.regType() == RegType::kX86_St) {
+            if (workReg->group() != RegGroup::kVec)
+              return DebugUtils::errored(kErrorInvalidAssignment);
+
+            Reg dst(workReg->signature(), workReg->virtId());
+            Mem mem;
+
+            TypeId typeId = TypeUtils::scalarOf(workReg->typeId());
+            if (ret.hasTypeId())
+              typeId = ret.typeId();
+
+            switch (typeId) {
+              case TypeId::kFloat32:
+                ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 4, 4));
+                mem.setSize(4);
+                ASMJIT_PROPAGATE(cc()->fstp(mem));
+                ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), dst.as<Xmm>(), mem));
+                break;
+
+              case TypeId::kFloat64:
+                ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 8, 4));
+                mem.setSize(8);
+                ASMJIT_PROPAGATE(cc()->fstp(mem));
+                ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovsd, Inst::kIdVmovsd), dst.as<Xmm>(), mem));
+                break;
+
+              default:
+                return DebugUtils::errored(kErrorInvalidAssignment);
+            }
+          }
+          else {
+            RegGroup regGroup = workReg->group();
+            RegGroup retGroup = Reg::groupOf(ret.regType());
+
+            if (regGroup != retGroup) {
+              // TODO: Conversion is not supported.
+              return DebugUtils::errored(kErrorInvalidAssignment);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // This block has function call(s).
+  _curBlock->addFlags(RABlockFlags::kHasFuncCalls);
+  _pass->func()->frame().addAttributes(FuncAttributes::kHasFuncCalls);
+  _pass->func()->frame().updateCallStackSize(fd.argStackSize());
+
+  return kErrorOk;
+}
+
+Error RACFGBuilder::onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept {
+  uint32_t argCount = invokeNode->argCount();
+  const FuncDetail& fd = invokeNode->detail();
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    const FuncValuePack& argPack = fd.argPack(argIndex);
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      if (!argPack[valueIndex])
+        continue;
+
+      const FuncValue& arg = argPack[valueIndex];
+      const Operand& op = invokeNode->arg(argIndex, valueIndex);
+
+      if (op.isNone())
+        continue;
+
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (arg.isIndirect()) {
+          RegGroup regGroup = workReg->group();
+          if (regGroup != RegGroup::kGp)
+            return DebugUtils::errored(kErrorInvalidState);
+          ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId()));
+        }
+        else if (arg.isReg()) {
+          RegGroup regGroup = workReg->group();
+          RegGroup argGroup = Reg::groupOf(arg.regType());
+
+          if (regGroup == argGroup) {
+            ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId()));
+          }
+        }
+      }
+    }
+  }
+
+  for (uint32_t retIndex = 0; retIndex < Globals::kMaxValuePack; retIndex++) {
+    const FuncValue& ret = fd.ret(retIndex);
+    if (!ret)
+      break;
+
+    // Not handled here...
+    const Operand& op = invokeNode->ret(retIndex);
+    if (ret.regType() == RegType::kX86_St)
+      continue;
+
+    if (op.isReg()) {
+      const Reg& reg = op.as<Reg>();
+      RAWorkReg* workReg;
+      ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+      if (ret.isReg()) {
+        RegGroup regGroup = workReg->group();
+        RegGroup retGroup = Reg::groupOf(ret.regType());
+
+        if (regGroup == retGroup) {
+          ASMJIT_PROPAGATE(ib.addCallRet(workReg, ret.regId()));
+        }
+      }
+      else {
+        return DebugUtils::errored(kErrorInvalidAssignment);
+      }
+    }
+  }
+
+  // Setup clobbered registers.
+  for (RegGroup group : RegGroupVirtValues{})
+    ib._clobbered[group] = Support::lsbMask<RegMask>(_pass->_physRegCount[group]) & ~fd.preservedRegs(group);
+
+  return kErrorOk;
+}
+
+// x86::RACFGBuilder - MoveVecToPtr
+// ================================
+
+static inline OperandSignature x86VecRegSignatureBySize(uint32_t size) noexcept {
+  return OperandSignature{size >= 64 ? uint32_t(Zmm::kSignature) :
+                          size >= 32 ? uint32_t(Ymm::kSignature) : uint32_t(Xmm::kSignature)};
+}
+
+Error RACFGBuilder::moveVecToPtr(InvokeNode* invokeNode, const FuncValue& arg, const Vec& src, BaseReg* out) noexcept {
+  DebugUtils::unused(invokeNode);
+  ASMJIT_ASSERT(arg.isReg());
+
+  uint32_t argSize = TypeUtils::sizeOf(arg.typeId());
+  if (argSize == 0)
+    return DebugUtils::errored(kErrorInvalidState);
+
+  if (argSize < 16)
+    argSize = 16;
+
+  uint32_t argStackOffset = Support::alignUp(invokeNode->detail()._argStackSize, argSize);
+  _funcNode->frame().updateCallStackAlignment(argSize);
+  invokeNode->detail()._argStackSize = argStackOffset + argSize;
+
+  Vec vecReg(x86VecRegSignatureBySize(argSize), src.id());
+  Mem vecPtr = ptr(_pass->_sp.as<Gp>(), int32_t(argStackOffset));
+
+  uint32_t vMovInstId = choose(Inst::kIdMovaps, Inst::kIdVmovaps);
+  if (argSize > 16)
+    vMovInstId = Inst::kIdVmovaps;
+
+  ASMJIT_PROPAGATE(cc()->_newReg(out, ArchTraits::byArch(cc()->arch()).regTypeToTypeId(cc()->_gpSignature.regType()), nullptr));
+
+  VirtReg* vReg = cc()->virtRegById(out->id());
+  vReg->setWeight(BaseRAPass::kCallArgWeight);
+
+  ASMJIT_PROPAGATE(cc()->lea(out->as<Gp>(), vecPtr));
+  ASMJIT_PROPAGATE(cc()->emit(vMovInstId, ptr(out->as<Gp>()), vecReg));
+
+  if (arg.isStack()) {
+    Mem stackPtr = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
+    ASMJIT_PROPAGATE(cc()->mov(stackPtr, out->as<Gp>()));
+  }
+
+  return kErrorOk;
+}
+
+// x86::RACFGBuilder - MoveImmToRegArg
+// ===================================
+
+Error RACFGBuilder::moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept {
+  DebugUtils::unused(invokeNode);
+  ASMJIT_ASSERT(arg.isReg());
+
+  Imm imm(imm_);
+  TypeId rTypeId = TypeId::kUInt32;
+
+  switch (arg.typeId()) {
+    case TypeId::kInt8: imm.signExtend8Bits(); goto MovU32;
+    case TypeId::kUInt8: imm.zeroExtend8Bits(); goto MovU32;
+    case TypeId::kInt16: imm.signExtend16Bits(); goto MovU32;
+    case TypeId::kUInt16: imm.zeroExtend16Bits(); goto MovU32;
+
+    case TypeId::kInt32:
+    case TypeId::kUInt32:
+MovU32:
+      imm.zeroExtend32Bits();
+      break;
+
+    case TypeId::kInt64:
+    case TypeId::kUInt64:
+      // Moving to GPD automatically zero extends in 64-bit mode.
+      if (imm.isUInt32()) {
+        imm.zeroExtend32Bits();
+        break;
+      }
+
+      rTypeId = TypeId::kUInt64;
+      break;
+
+    default:
+      return DebugUtils::errored(kErrorInvalidAssignment);
+  }
+
+  ASMJIT_PROPAGATE(cc()->_newReg(out, rTypeId, nullptr));
+  cc()->virtRegById(out->id())->setWeight(BaseRAPass::kCallArgWeight);
+
+  return cc()->mov(out->as<x86::Gp>(), imm);
+}
+
+// x86::RACFGBuilder - MoveImmToStackArg
+// =====================================
+
+Error RACFGBuilder::moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept {
+  DebugUtils::unused(invokeNode);
+  ASMJIT_ASSERT(arg.isStack());
+
+  Mem stackPtr = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
+  Imm imm[2];
+
+  stackPtr.setSize(4);
+  imm[0] = imm_;
+  uint32_t nMovs = 0;
+
+  // One stack entry has the same size as the native register size. That means that if we want to move a 32-bit
+  // integer on the stack in 64-bit mode, we need to extend it to a 64-bit integer first. In 32-bit mode, pushing
+  // a 64-bit on stack is done in two steps by pushing low and high parts separately.
+  switch (arg.typeId()) {
+    case TypeId::kInt8: imm[0].signExtend8Bits(); goto MovU32;
+    case TypeId::kUInt8: imm[0].zeroExtend8Bits(); goto MovU32;
+    case TypeId::kInt16: imm[0].signExtend16Bits(); goto MovU32;
+    case TypeId::kUInt16: imm[0].zeroExtend16Bits(); goto MovU32;
+
+    case TypeId::kInt32:
+    case TypeId::kUInt32:
+    case TypeId::kFloat32:
+MovU32:
+      imm[0].zeroExtend32Bits();
+      nMovs = 1;
+      break;
+
+    case TypeId::kInt64:
+    case TypeId::kUInt64:
+    case TypeId::kFloat64:
+    case TypeId::kMmx32:
+    case TypeId::kMmx64:
+      if (_is64Bit && imm[0].isInt32()) {
+        stackPtr.setSize(8);
+        nMovs = 1;
+        break;
+      }
+
+      imm[1].setValue(imm[0].uint32Hi());
+      imm[0].zeroExtend32Bits();
+      nMovs = 2;
+      break;
+
+    default:
+      return DebugUtils::errored(kErrorInvalidAssignment);
+  }
+
+  for (uint32_t i = 0; i < nMovs; i++) {
+    ASMJIT_PROPAGATE(cc()->mov(stackPtr, imm[i]));
+    stackPtr.addOffsetLo32(int32_t(stackPtr.size()));
+  }
+
+  return kErrorOk;
+}
+
+// x86::RACFGBuilder - MoveRegToStackArg
+// =====================================
+
+Error RACFGBuilder::moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept {
+  DebugUtils::unused(invokeNode);
+  ASMJIT_ASSERT(arg.isStack());
+
+  Mem stackPtr = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
+  Reg r0, r1;
+
+  VirtReg* vr = cc()->virtRegById(reg.id());
+  uint32_t registerSize = cc()->registerSize();
+  InstId instId = 0;
+
+  TypeId dstTypeId = arg.typeId();
+  TypeId srcTypeId = vr->typeId();
+
+  switch (dstTypeId) {
+    case TypeId::kInt64:
+    case TypeId::kUInt64:
+      // Extend BYTE->QWORD (GP).
+      if (TypeUtils::isGp8(srcTypeId)) {
+        r1.setRegT<RegType::kX86_GpbLo>(reg.id());
+
+        instId = (dstTypeId == TypeId::kInt64 && srcTypeId == TypeId::kInt8) ? Inst::kIdMovsx : Inst::kIdMovzx;
+        goto ExtendMovGpXQ;
+      }
+
+      // Extend WORD->QWORD (GP).
+      if (TypeUtils::isGp16(srcTypeId)) {
+        r1.setRegT<RegType::kX86_Gpw>(reg.id());
+
+        instId = (dstTypeId == TypeId::kInt64 && srcTypeId == TypeId::kInt16) ? Inst::kIdMovsx : Inst::kIdMovzx;
+        goto ExtendMovGpXQ;
+      }
+
+      // Extend DWORD->QWORD (GP).
+      if (TypeUtils::isGp32(srcTypeId)) {
+        r1.setRegT<RegType::kX86_Gpd>(reg.id());
+
+        instId = Inst::kIdMovsxd;
+        if (dstTypeId == TypeId::kInt64 && srcTypeId == TypeId::kInt32)
+          goto ExtendMovGpXQ;
+        else
+          goto ZeroExtendGpDQ;
+      }
+
+      // Move QWORD (GP).
+      if (TypeUtils::isGp64(srcTypeId)) goto MovGpQ;
+      if (TypeUtils::isMmx(srcTypeId)) goto MovMmQ;
+      if (TypeUtils::isVec(srcTypeId)) goto MovXmmQ;
+      break;
+
+    case TypeId::kInt32:
+    case TypeId::kUInt32:
+    case TypeId::kInt16:
+    case TypeId::kUInt16:
+      // DWORD <- WORD (Zero|Sign Extend).
+      if (TypeUtils::isGp16(srcTypeId)) {
+        bool isDstSigned = dstTypeId == TypeId::kInt16 || dstTypeId == TypeId::kInt32;
+        bool isSrcSigned = srcTypeId == TypeId::kInt8  || srcTypeId == TypeId::kInt16;
+
+        r1.setRegT<RegType::kX86_Gpw>(reg.id());
+        instId = isDstSigned && isSrcSigned ? Inst::kIdMovsx : Inst::kIdMovzx;
+        goto ExtendMovGpD;
+      }
+
+      // DWORD <- BYTE (Zero|Sign Extend).
+      if (TypeUtils::isGp8(srcTypeId)) {
+        bool isDstSigned = dstTypeId == TypeId::kInt16 || dstTypeId == TypeId::kInt32;
+        bool isSrcSigned = srcTypeId == TypeId::kInt8  || srcTypeId == TypeId::kInt16;
+
+        r1.setRegT<RegType::kX86_GpbLo>(reg.id());
+        instId = isDstSigned && isSrcSigned ? Inst::kIdMovsx : Inst::kIdMovzx;
+        goto ExtendMovGpD;
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case TypeId::kInt8:
+    case TypeId::kUInt8:
+      if (TypeUtils::isInt(srcTypeId)) goto MovGpD;
+      if (TypeUtils::isMmx(srcTypeId)) goto MovMmD;
+      if (TypeUtils::isVec(srcTypeId)) goto MovXmmD;
+      break;
+
+    case TypeId::kMmx32:
+    case TypeId::kMmx64:
+      // Extend BYTE->QWORD (GP).
+      if (TypeUtils::isGp8(srcTypeId)) {
+        r1.setRegT<RegType::kX86_GpbLo>(reg.id());
+
+        instId = Inst::kIdMovzx;
+        goto ExtendMovGpXQ;
+      }
+
+      // Extend WORD->QWORD (GP).
+      if (TypeUtils::isGp16(srcTypeId)) {
+        r1.setRegT<RegType::kX86_Gpw>(reg.id());
+
+        instId = Inst::kIdMovzx;
+        goto ExtendMovGpXQ;
+      }
+
+      if (TypeUtils::isGp32(srcTypeId)) goto ExtendMovGpDQ;
+      if (TypeUtils::isGp64(srcTypeId)) goto MovGpQ;
+      if (TypeUtils::isMmx(srcTypeId)) goto MovMmQ;
+      if (TypeUtils::isVec(srcTypeId)) goto MovXmmQ;
+      break;
+
+    case TypeId::kFloat32:
+    case TypeId::kFloat32x1:
+      if (TypeUtils::isVec(srcTypeId)) goto MovXmmD;
+      break;
+
+    case TypeId::kFloat64:
+    case TypeId::kFloat64x1:
+      if (TypeUtils::isVec(srcTypeId)) goto MovXmmQ;
+      break;
+
+    default:
+      if (TypeUtils::isVec(dstTypeId) && reg.as<Reg>().isVec()) {
+        stackPtr.setSize(TypeUtils::sizeOf(dstTypeId));
+        uint32_t vMovInstId = choose(Inst::kIdMovaps, Inst::kIdVmovaps);
+
+        if (TypeUtils::isVec128(dstTypeId))
+          r0.setRegT<RegType::kX86_Xmm>(reg.id());
+        else if (TypeUtils::isVec256(dstTypeId))
+          r0.setRegT<RegType::kX86_Ymm>(reg.id());
+        else if (TypeUtils::isVec512(dstTypeId))
+          r0.setRegT<RegType::kX86_Zmm>(reg.id());
+        else
+          break;
+
+        return cc()->emit(vMovInstId, stackPtr, r0);
+      }
+      break;
+  }
+  return DebugUtils::errored(kErrorInvalidAssignment);
+
+  // Extend+Move Gp.
+ExtendMovGpD:
+  stackPtr.setSize(4);
+  r0.setRegT<RegType::kX86_Gpd>(reg.id());
+
+  ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1));
+  ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, stackPtr, r0));
+  return kErrorOk;
+
+ExtendMovGpXQ:
+  if (registerSize == 8) {
+    stackPtr.setSize(8);
+    r0.setRegT<RegType::kX86_Gpq>(reg.id());
+
+    ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1));
+    ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, stackPtr, r0));
+  }
+  else {
+    stackPtr.setSize(4);
+    r0.setRegT<RegType::kX86_Gpd>(reg.id());
+
+    ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1));
+
+ExtendMovGpDQ:
+    ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, stackPtr, r0));
+    stackPtr.addOffsetLo32(4);
+    ASMJIT_PROPAGATE(cc()->emit(Inst::kIdAnd, stackPtr, 0));
+  }
+  return kErrorOk;
+
+ZeroExtendGpDQ:
+  stackPtr.setSize(4);
+  r0.setRegT<RegType::kX86_Gpd>(reg.id());
+  goto ExtendMovGpDQ;
+
+MovGpD:
+  stackPtr.setSize(4);
+  r0.setRegT<RegType::kX86_Gpd>(reg.id());
+  return cc()->emit(Inst::kIdMov, stackPtr, r0);
+
+MovGpQ:
+  stackPtr.setSize(8);
+  r0.setRegT<RegType::kX86_Gpq>(reg.id());
+  return cc()->emit(Inst::kIdMov, stackPtr, r0);
+
+MovMmD:
+  stackPtr.setSize(4);
+  r0.setRegT<RegType::kX86_Mm>(reg.id());
+  return cc()->emit(choose(Inst::kIdMovd, Inst::kIdVmovd), stackPtr, r0);
+
+MovMmQ:
+  stackPtr.setSize(8);
+  r0.setRegT<RegType::kX86_Mm>(reg.id());
+  return cc()->emit(choose(Inst::kIdMovq, Inst::kIdVmovq), stackPtr, r0);
+
+MovXmmD:
+  stackPtr.setSize(4);
+  r0.setRegT<RegType::kX86_Xmm>(reg.id());
+  return cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), stackPtr, r0);
+
+MovXmmQ:
+  stackPtr.setSize(8);
+  r0.setRegT<RegType::kX86_Xmm>(reg.id());
+  return cc()->emit(choose(Inst::kIdMovlps, Inst::kIdVmovlps), stackPtr, r0);
+}
+
+// x86::RACFGBuilder - OnReg
+// =========================
+
+Error RACFGBuilder::onBeforeRet(FuncRetNode* funcRet) noexcept {
+  const FuncDetail& funcDetail = _pass->func()->detail();
+  const Operand* opArray = funcRet->operands();
+  uint32_t opCount = funcRet->opCount();
+
+  cc()->_setCursor(funcRet->prev());
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand& op = opArray[i];
+    const FuncValue& ret = funcDetail.ret(i);
+
+    if (!op.isReg())
+      continue;
+
+    if (ret.regType() == RegType::kX86_St) {
+      const Reg& reg = op.as<Reg>();
+      uint32_t vIndex = Operand::virtIdToIndex(reg.id());
+
+      if (vIndex < Operand::kVirtIdCount) {
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+        if (workReg->group() != RegGroup::kVec)
+          return DebugUtils::errored(kErrorInvalidAssignment);
+
+        Reg src(workReg->signature(), workReg->virtId());
+        Mem mem;
+
+        TypeId typeId = TypeUtils::scalarOf(workReg->typeId());
+        if (ret.hasTypeId())
+          typeId = ret.typeId();
+
+        switch (typeId) {
+          case TypeId::kFloat32:
+            ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 4, 4));
+            mem.setSize(4);
+            ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), mem, src.as<Xmm>()));
+            ASMJIT_PROPAGATE(cc()->fld(mem));
+            break;
+
+          case TypeId::kFloat64:
+            ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 8, 4));
+            mem.setSize(8);
+            ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovsd, Inst::kIdVmovsd), mem, src.as<Xmm>()));
+            ASMJIT_PROPAGATE(cc()->fld(mem));
+            break;
+
+          default:
+            return DebugUtils::errored(kErrorInvalidAssignment);
+        }
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+Error RACFGBuilder::onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept {
+  const FuncDetail& funcDetail = _pass->func()->detail();
+  const Operand* opArray = funcRet->operands();
+  uint32_t opCount = funcRet->opCount();
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand& op = opArray[i];
+    if (op.isNone()) continue;
+
+    const FuncValue& ret = funcDetail.ret(i);
+    if (ASMJIT_UNLIKELY(!ret.isReg()))
+      return DebugUtils::errored(kErrorInvalidAssignment);
+
+    // Not handled here...
+    if (ret.regType() == RegType::kX86_St)
+      continue;
+
+    if (op.isReg()) {
+      // Register return value.
+      const Reg& reg = op.as<Reg>();
+      uint32_t vIndex = Operand::virtIdToIndex(reg.id());
+
+      if (vIndex < Operand::kVirtIdCount) {
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+        RegGroup group = workReg->group();
+        RegMask inOutRegs = _pass->_availableRegs[group];
+        ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, inOutRegs, ret.regId(), 0, inOutRegs, BaseReg::kIdBad, 0));
+      }
+    }
+    else {
+      return DebugUtils::errored(kErrorInvalidAssignment);
+    }
+  }
+
+  return kErrorOk;
+}
+
+// x86::X86RAPass - Construction & Destruction
+// ===========================================
+
+X86RAPass::X86RAPass() noexcept
+  : BaseRAPass() { _iEmitHelper = &_emitHelper; }
+X86RAPass::~X86RAPass() noexcept {}
+
+// x86::X86RAPass - OnInit & OnDone
+// ================================
+
+void X86RAPass::onInit() noexcept {
+  Arch arch = cc()->arch();
+  uint32_t baseRegCount = Environment::is32Bit(arch) ? 8u : 16u;
+  uint32_t simdRegCount = baseRegCount;
+
+  if (Environment::is64Bit(arch) && _func->frame().isAvx512Enabled())
+    simdRegCount = 32u;
+
+  bool avxEnabled = _func->frame().isAvxEnabled();
+  bool avx512Enabled = _func->frame().isAvx512Enabled();
+
+  _emitHelper._emitter = _cb;
+  _emitHelper._avxEnabled = avxEnabled || avx512Enabled;
+  _emitHelper._avx512Enabled = avx512Enabled;
+
+  _archTraits = &ArchTraits::byArch(arch);
+  _physRegCount.set(RegGroup::kGp, baseRegCount);
+  _physRegCount.set(RegGroup::kVec, simdRegCount);
+  _physRegCount.set(RegGroup::kX86_K, 8);
+  _physRegCount.set(RegGroup::kX86_MM, 8);
+  _buildPhysIndex();
+
+  _availableRegCount = _physRegCount;
+  _availableRegs[RegGroup::kGp] = Support::lsbMask<RegMask>(_physRegCount.get(RegGroup::kGp));
+  _availableRegs[RegGroup::kVec] = Support::lsbMask<RegMask>(_physRegCount.get(RegGroup::kVec));
+  _availableRegs[RegGroup::kX86_K] = Support::lsbMask<RegMask>(_physRegCount.get(RegGroup::kX86_K)) ^ 1u;
+  _availableRegs[RegGroup::kX86_MM] = Support::lsbMask<RegMask>(_physRegCount.get(RegGroup::kX86_MM));
+
+  _scratchRegIndexes[0] = uint8_t(Gp::kIdCx);
+  _scratchRegIndexes[1] = uint8_t(baseRegCount - 1);
+
+  // The architecture specific setup makes implicitly all registers available. So
+  // make unavailable all registers that are special and cannot be used in general.
+  bool hasFP = _func->frame().hasPreservedFP();
+
+  makeUnavailable(RegGroup::kGp, Gp::kIdSp);            // ESP|RSP used as a stack-pointer (SP).
+  if (hasFP) makeUnavailable(RegGroup::kGp, Gp::kIdBp); // EBP|RBP used as a frame-pointer (FP).
+
+  _sp = cc()->zsp();
+  _fp = cc()->zbp();
+}
+
+void X86RAPass::onDone() noexcept {}
+
+// x86::X86RAPass - BuildCFG
+// =========================
+
+Error X86RAPass::buildCFG() noexcept {
+  return RACFGBuilder(this).run();
+}
+
+// x86::X86RAPass - Rewrite
+// ========================
+
+static InstId transformVexToEvex(InstId instId) {
+  switch (instId) {
+    case Inst::kIdVbroadcastf128: return Inst::kIdVbroadcastf32x4;
+    case Inst::kIdVbroadcasti128: return Inst::kIdVbroadcasti32x4;
+    case Inst::kIdVextractf128: return Inst::kIdVextractf32x4;
+    case Inst::kIdVextracti128: return Inst::kIdVextracti32x4;
+    case Inst::kIdVinsertf128: return Inst::kIdVinsertf32x4;
+    case Inst::kIdVinserti128: return Inst::kIdVinserti32x4;
+    case Inst::kIdVmovdqa: return Inst::kIdVmovdqa32;
+    case Inst::kIdVmovdqu: return Inst::kIdVmovdqu32;
+    case Inst::kIdVpand: return Inst::kIdVpandd;
+    case Inst::kIdVpandn: return Inst::kIdVpandnd;
+    case Inst::kIdVpor: return Inst::kIdVpord;
+    case Inst::kIdVpxor: return Inst::kIdVpxord;
+    case Inst::kIdVroundpd: return Inst::kIdVrndscalepd;
+    case Inst::kIdVroundps: return Inst::kIdVrndscaleps;
+    case Inst::kIdVroundsd: return Inst::kIdVrndscalesd;
+    case Inst::kIdVroundss: return Inst::kIdVrndscaless;
+
+    default:
+      // This should never happen as only transformable instructions should go this path.
+      ASMJIT_ASSERT(false);
+      return 0;
+  }
+}
+
+ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) noexcept {
+  uint32_t virtCount = cc()->_vRegArray.size();
+
+  BaseNode* node = first;
+  while (node != stop) {
+    BaseNode* next = node->next();
+    if (node->isInst()) {
+      InstNode* inst = node->as<InstNode>();
+      RAInst* raInst = node->passData<RAInst>();
+
+      Operand* operands = inst->operands();
+      uint32_t opCount = inst->opCount();
+      uint32_t maxRegId = 0;
+
+      uint32_t i;
+
+      // Rewrite virtual registers into physical registers.
+      if (raInst) {
+        // This data is allocated by Zone passed to `runOnFunction()`, which will be reset after the RA pass finishes.
+        // So reset this data to prevent having a dead pointer after the RA pass is complete.
+        node->resetPassData();
+
+        // If the instruction contains pass data (raInst) then it was a subject for register allocation and must be
+        // rewritten to use physical regs.
+        RATiedReg* tiedRegs = raInst->tiedRegs();
+        uint32_t tiedCount = raInst->tiedCount();
+
+        for (i = 0; i < tiedCount; i++) {
+          RATiedReg* tiedReg = &tiedRegs[i];
+
+          Support::BitWordIterator<uint32_t> useIt(tiedReg->useRewriteMask());
+          uint32_t useId = tiedReg->useId();
+          while (useIt.hasNext()) {
+            maxRegId = Support::max(maxRegId, useId);
+            inst->rewriteIdAtIndex(useIt.next(), useId);
+          }
+
+          Support::BitWordIterator<uint32_t> outIt(tiedReg->outRewriteMask());
+          uint32_t outId = tiedReg->outId();
+          while (outIt.hasNext()) {
+            maxRegId = Support::max(maxRegId, outId);
+            inst->rewriteIdAtIndex(outIt.next(), outId);
+          }
+        }
+
+        // Transform VEX instruction to EVEX when necessary.
+        if (raInst->isTransformable()) {
+          if (maxRegId > 15) {
+            inst->setId(transformVexToEvex(inst->id()));
+          }
+        }
+
+        // Remove moves that do not do anything.
+        //
+        // Usually these moves are inserted during code generation and originally they used different registers. If RA
+        // allocated these into the same register such redundant mov would appear.
+        if (raInst->hasInstRWFlag(InstRWFlags::kMovOp) && !inst->hasExtraReg()) {
+          if (inst->opCount() == 2) {
+            if (inst->op(0) == inst->op(1)) {
+              cc()->removeNode(node);
+              goto Next;
+            }
+          }
+        }
+
+        if (ASMJIT_UNLIKELY(node->type() != NodeType::kInst)) {
+          // FuncRet terminates the flow, it must either be removed if the exit label is next to it (optimization) or
+          // patched to an architecture dependent jump instruction that jumps to the function's exit before the epilog.
+          if (node->type() == NodeType::kFuncRet) {
+            RABlock* block = raInst->block();
+            if (!isNextTo(node, _func->exitNode())) {
+              cc()->_setCursor(node->prev());
+              ASMJIT_PROPAGATE(emitJump(_func->exitNode()->label()));
+            }
+
+            BaseNode* prev = node->prev();
+            cc()->removeNode(node);
+            block->setLast(prev);
+          }
+        }
+      }
+
+      // Rewrite stack slot addresses.
+      for (i = 0; i < opCount; i++) {
+        Operand& op = operands[i];
+        if (op.isMem()) {
+          BaseMem& mem = op.as<BaseMem>();
+          if (mem.isRegHome()) {
+            uint32_t virtIndex = Operand::virtIdToIndex(mem.baseId());
+            if (ASMJIT_UNLIKELY(virtIndex >= virtCount))
+              return DebugUtils::errored(kErrorInvalidVirtId);
+
+            VirtReg* virtReg = cc()->virtRegByIndex(virtIndex);
+            RAWorkReg* workReg = virtReg->workReg();
+            ASMJIT_ASSERT(workReg != nullptr);
+
+            RAStackSlot* slot = workReg->stackSlot();
+            int32_t offset = slot->offset();
+
+            mem._setBase(_sp.type(), slot->baseRegId());
+            mem.clearRegHome();
+            mem.addOffsetLo32(offset);
+          }
+        }
+      }
+    }
+
+Next:
+    node = next;
+  }
+
+  return kErrorOk;
+}
+
+// x86::X86RAPass - OnEmit
+// =======================
+
+Error X86RAPass::emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseReg dst(wReg->signature(), dstPhysId);
+  BaseReg src(wReg->signature(), srcPhysId);
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<MOVE> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dst, src, wReg->typeId(), comment);
+}
+
+Error X86RAPass::emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept {
+  RAWorkReg* waReg = workRegById(aWorkId);
+  RAWorkReg* wbReg = workRegById(bWorkId);
+
+  bool is64Bit = Support::max(waReg->typeId(), wbReg->typeId()) >= TypeId::kInt64;
+  OperandSignature sign = is64Bit ? OperandSignature{RegTraits<RegType::kX86_Gpq>::kSignature}
+                                  : OperandSignature{RegTraits<RegType::kX86_Gpd>::kSignature};
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<SWAP> %s, %s", waReg->name(), wbReg->name());
+    cc()->setInlineComment(_tmpString.data());
+  }
+#endif
+
+  return cc()->emit(Inst::kIdXchg, Reg(sign, aPhysId), Reg(sign, bPhysId));
+}
+
+Error X86RAPass::emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseReg dstReg(wReg->signature(), dstPhysId);
+  BaseMem srcMem(workRegAsMem(wReg));
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<LOAD> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dstReg, srcMem, wReg->typeId(), comment);
+}
+
+Error X86RAPass::emitSave(uint32_t workId, uint32_t srcPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseMem dstMem(workRegAsMem(wReg));
+  BaseReg srcReg(wReg->signature(), srcPhysId);
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<SAVE> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dstMem, srcReg, wReg->typeId(), comment);
+}
+
+Error X86RAPass::emitJump(const Label& label) noexcept {
+  return cc()->jmp(label);
+}
+
+Error X86RAPass::emitPreCall(InvokeNode* invokeNode) noexcept {
+  if (invokeNode->detail().hasVarArgs() && cc()->is64Bit()) {
+    const FuncDetail& fd = invokeNode->detail();
+    uint32_t argCount = invokeNode->argCount();
+
+    switch (invokeNode->detail().callConv().id()) {
+      case CallConvId::kX64SystemV: {
+        // AL register contains the number of arguments passed in XMM register(s).
+        uint32_t n = 0;
+        for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+          const FuncValuePack& argPack = fd.argPack(argIndex);
+          for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+            const FuncValue& arg = argPack[valueIndex];
+            if (!arg)
+              break;
+
+            if (arg.isReg() && Reg::groupOf(arg.regType()) == RegGroup::kVec)
+              n++;
+          }
+        }
+
+        if (!n)
+          ASMJIT_PROPAGATE(cc()->xor_(eax, eax));
+        else
+          ASMJIT_PROPAGATE(cc()->mov(eax, n));
+        break;
+      }
+
+      case CallConvId::kX64Windows: {
+        // Each double-precision argument passed in XMM must be also passed in GP.
+        for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+          const FuncValuePack& argPack = fd.argPack(argIndex);
+          for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+            const FuncValue& arg = argPack[valueIndex];
+            if (!arg)
+              break;
+
+            if (arg.isReg() && Reg::groupOf(arg.regType()) == RegGroup::kVec) {
+              Gp dst = gpq(fd.callConv().passedOrder(RegGroup::kGp)[argIndex]);
+              Xmm src = xmm(arg.regId());
+              ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovq, Inst::kIdVmovq), dst, src));
+            }
+          }
+        }
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidState);
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86 && !ASMJIT_NO_COMPILER
diff --git a/lib/lepton/asmjit/x86/x86rapass_p.h b/lib/lepton/asmjit/x86/x86rapass_p.h
new file mode 100644
index 0000000000..b9603c21af
--- /dev/null
+++ b/lib/lepton/asmjit/x86/x86rapass_p.h
@@ -0,0 +1,94 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86RAPASS_P_H_INCLUDED
+#define ASMJIT_X86_X86RAPASS_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/rabuilders_p.h"
+#include "../core/rapass_p.h"
+#include "../x86/x86assembler.h"
+#include "../x86/x86compiler.h"
+#include "../x86/x86emithelper_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86 register allocation pass.
+//!
+//! Takes care of generating function prologs and epilogs, and also performs register allocation.
+class X86RAPass : public BaseRAPass {
+public:
+  ASMJIT_NONCOPYABLE(X86RAPass)
+  typedef BaseRAPass Base;
+
+  EmitHelper _emitHelper;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  X86RAPass() noexcept;
+  virtual ~X86RAPass() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the compiler casted to `x86::Compiler`.
+  inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cb); }
+
+  //! Returns emit helper.
+  inline EmitHelper* emitHelper() noexcept { return &_emitHelper; }
+
+  inline bool avxEnabled() const noexcept { return _emitHelper._avxEnabled; }
+  inline bool avx512Enabled() const noexcept { return _emitHelper._avx512Enabled; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline uint32_t choose(uint32_t sseInstId, uint32_t avxInstId) noexcept {
+    return avxEnabled() ? avxInstId : sseInstId;
+  }
+
+  //! \}
+
+  //! \name Interface
+  //! \{
+
+  void onInit() noexcept override;
+  void onDone() noexcept override;
+
+  Error buildCFG() noexcept override;
+
+  Error _rewrite(BaseNode* first, BaseNode* stop) noexcept override;
+
+  Error emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept override;
+  Error emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept override;
+
+  Error emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept override;
+  Error emitSave(uint32_t workId, uint32_t srcPhysId) noexcept override;
+
+  Error emitJump(const Label& label) noexcept override;
+  Error emitPreCall(InvokeNode* invokeNode) noexcept override;
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_X86_X86RAPASS_P_H_INCLUDED

From 5b42064fcf5efba0cb7e3f470ad4ab78b7ee9444 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 22:50:24 -0500
Subject: [PATCH 25/79] add docs for lepton pair and bond style

---
 doc/src/bond_lepton.rst       |  89 +++++++++++++++++++++++++
 doc/src/lepton_expression.rst |  97 +++++++++++++++++++++++++++
 doc/src/pair_lepton.rst       | 121 ++++++++++++++++++++++++++++++++++
 3 files changed, 307 insertions(+)
 create mode 100644 doc/src/bond_lepton.rst
 create mode 100644 doc/src/lepton_expression.rst
 create mode 100644 doc/src/pair_lepton.rst

diff --git a/doc/src/bond_lepton.rst b/doc/src/bond_lepton.rst
new file mode 100644
index 0000000000..3f4b305b6e
--- /dev/null
+++ b/doc/src/bond_lepton.rst
@@ -0,0 +1,89 @@
+.. index:: bond_style lepton
+.. index:: bond_style lepton/omp
+
+bond_style lepton command
+=========================
+
+Accelerator Variants: *lepton/omp*
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   bond_style lepton
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   bond_style lepton
+   1 1.5 "k*r^2; k=250.0"
+   2 1.1 "k2*r^2 + k3*r^3 + k4*r^4; k2=300.0; k3=-100.0; k4=50.0"
+   3 1.3 "k*r^2; k=350.0"
+
+   bond_coeff 5 1.0 2.0 1.2
+
+Description
+"""""""""""
+
+Bond style *lepton* computes bonded interactions between two atom
+based on evaluating strings.  The potential function must be provided as an
+expression string using "r" as the distance variable relative to the
+reference distance :math:`r_0` which is provided as a bond coefficient.
+For example `"200.0*(r)^2"` represents a harmonic potential with a
+force constant *K* of 200.0 energy units:
+
+.. math::
+
+   U_{bond} = K (r_i - r_0)^2 = K r^2 \qquad r = r_i-r_0
+
+The `Lepton library <https://simtk.org/projects/lepton>`_, that the
+*lepton* bond style interfaces with, evaluates this expression string at
+run time to compute the pairwise energy.  It also creates an analytical
+representation of the differentiation of this expression with respect to
+"r" and then uses that to compute the force between the pairs atoms forming
+bonds as defined by the topology data.
+
+The following coefficients must be defined for each bond type via the
+:doc:`bond_coeff <bond_coeff>` command as in the example above, or in
+the data file or restart files read by the :doc:`read_data <read_data>`
+or :doc:`read_restart <read_restart>` commands:
+
+* Lepton expression (energy units)
+* :math:`r_0` (distance)
+
+The Lepton expression must be either enclosed in quotes or must not
+contain any whitespace so that LAMMPS recognizes it as a single keyword.
+More on valid Lepton expressions below.  The :math:`r_0` it the
+"equilibrium distance".  The potential energy function in the Lepton
+expression is shifted in such a way, that the potential energy is 0 for
+a bond length *r* of :math:`r_0`.
+
+----------
+
+.. include:: lepton_expression.rst
+
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
+Restrictions
+""""""""""""
+
+This bond style can only be used if LAMMPS was built with the LEPTON
+package.  See the :doc:`Build package <Build_package>` page for more
+info.
+
+Related commands
+""""""""""""""""
+
+:doc:`bond_coeff <bond_coeff>`, :doc:`delete_bonds <delete_bonds>`
+
+Default
+"""""""
+
+none
diff --git a/doc/src/lepton_expression.rst b/doc/src/lepton_expression.rst
new file mode 100644
index 0000000000..ace4d42ab2
--- /dev/null
+++ b/doc/src/lepton_expression.rst
@@ -0,0 +1,97 @@
+
+Lepton expression syntax and features
+"""""""""""""""""""""""""""""""""""""
+
+Lepton supports the following operators in expressions:
+
+.. table_from_list::
+   :columns: 14
+
+   * \+
+   * Add
+   *
+   * \-
+   * Subtract
+   *
+   * \*
+   * Multiply
+   *
+   * \/
+   * Divide
+   *
+   * \^
+   * Power
+
+The following mathematical functions are available:
+
+.. table_from_list::
+   :columns: 4
+
+   * sqrt(x)
+   * Square root
+   * exp(x)
+   * Exponential
+   * log(x)
+   * Natural logarithm
+   * sin(x)
+   * Sine (angle in radians)
+   * cos(x)
+   * Cosine (angle in radians)
+   * sec(x)
+   * Secant (angle in radians)
+   * csc(x)
+   * Cosecant (angle in radians)
+   * tan(x)
+   * Tangent (angle in radians)
+   * cot(x)
+   * Cotangent (angle in radians)
+   * asin(x)
+   * Inverse sine (in radians)
+   * acos(x)
+   * Inverse cosine (in radians)
+   * atan(x)
+   * Inverse tangent (in radians)
+   * sinh(x)
+   * Hyperbolic sine
+   * cosh(x)
+   * Hyperbolic cosine
+   * tanh(x)
+   * Hyperbolic tangent
+   * erf(x)
+   * Error function
+   * erfc(x)
+   * Complementary Error function
+   * abs(x)
+   * Absolute value
+   * min(x,y)
+   * Minimum of two values
+   * max(x,y)
+   * Maximum of two values
+   * delta(x)
+   * delta(x) is 1 for `x = 0`, otherwise 0
+   * step(x)
+   * step(x) is 0 for `x < 0`, otherwise 1
+
+Numbers may be given in either decimal or exponential form.  All of the following are valid
+numbers: `5`, `-3.1`, `1e6`, and `3.12e-2`.
+
+An expression may be followed by definitions for intermediate values that appear in the
+expression. A semicolon ";" is used as a delimiter between value definitions. For example,
+the expression:
+
+.. code-block:: C
+
+   a^2+a*b+b^2; a=a1+a2; b=b1+b2
+
+is exactly equivalent to
+
+.. code-block:: C
+
+   (a1+a2)^2+(a1+a2)*(b1+b2)+(b1+b2)^2
+
+The definition of an intermediate value may itself involve other
+intermediate values. Whitespace and quotation characters ('\'' and '"')
+are ignored.  All uses of a value must appear *before* that value’s
+definition.  For efficiency reasons, the expression string is parsed,
+optimized, and then stored in an internal, pre-parsed representation for
+evaluation.
diff --git a/doc/src/pair_lepton.rst b/doc/src/pair_lepton.rst
new file mode 100644
index 0000000000..eb5978b21e
--- /dev/null
+++ b/doc/src/pair_lepton.rst
@@ -0,0 +1,121 @@
+.. index:: pair_style lepton
+.. index:: pair_style lepton/omp
+
+pair_style lepton command
+=========================
+
+Accelerator Variants: *lepton/omp*
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   pair_style style args
+
+* style = *lepton*
+* args = list of arguments for a particular style
+
+.. parsed-literal::
+
+    *lepton* args = cutoff
+      cutoff = global cutoff for the interactions (distance units)
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   pair_style lepton 2.5
+   pair_coeff * * "k*((r-r0)^2*step(r0-r)); k=200; r0=1.5" 2.0
+   pair_coeff 1 2 "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=1.0;sig=1.0" 1.12246204830937
+   pair_coeff 2 2 "eps*(2.0*(sig/r)^9 - 3.0*(sig/r)^6);eps=1.0;sig=1.0"
+
+Description
+"""""""""""
+
+Pair style *lepton* computes spherical pairwise interactions based on
+evaluating strings.  The potential function must be provided as an
+expression string using "r" as the distance variable, for example
+`"200.0*(r-1.5)^2"` represents a harmonic potential with equilibrium
+distance :math:`r_0` of 1.5 distance units and a force constant *K* of
+200.0 energy units:
+
+.. math::
+
+   U_{ij} = K (r-r_0)^2
+
+The `Lepton library <https://simtk.org/projects/lepton>`_, that the
+*lepton* pair style interfaces with, evaluates this expression string at
+run time to compute the pairwise energy.  It also creates an
+analytical representation of the differentiation of this expression with
+respect to "r" and then uses that to compute the force between the pairs
+of particles within the given cutoff.
+
+The following coefficients must be defined for each pair of atoms types
+via the :doc:`pair_coeff <pair_coeff>` command as in the examples above,
+or in the data file or restart files read by the :doc:`read_data
+<read_data>` or :doc:`read_restart <read_restart>` commands:
+
+* Lepton expression (energy units)
+* cutoff (distance units)
+
+The Lepton expression must be either enclosed in quotes or must not
+contain any whitespace so that LAMMPS recognizes it as a single keyword.
+More on valid Lepton expressions below.  The last coefficient is
+optional; it allows to set the cutoff for a pair of atom types to a
+different value than the global cutoff.
+
+----------
+
+.. include:: lepton_expression.rst
+
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
+Mixing, shift, table, tail correction, restart, rRESPA info
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+Pair style *lepton* does **not** support mixing.  Thus, expressions for
+all I,J pairs must be specified explicitly.
+
+This pair style supports the :doc:`pair_modify <pair_modify>`
+shift option for the energy of the pair interaction.
+
+The :doc:`pair_modify <pair_modify>` table options are not relevant for
+the this pair style.
+
+This pair style does not support the :doc:`pair_modify <pair_modify>`
+tail option for adding long-range tail corrections to energy and
+pressure.
+
+This pair style writes its information to :doc:`binary restart files
+<restart>`, so pair_style and pair_coeff commands do not need to be
+specified in an input script that reads a restart file.
+
+This pair style can only be used via the *pair* keyword of the
+:doc:`run_style respa <run_style>` command.  It does not support the
+*inner*, *middle*, *outer* keywords.
+
+----------
+
+Restrictions
+""""""""""""
+
+The *lepton* pair style is part of the LEPTON package and only enabled if
+LAMMPS was built with this package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+Related commands
+""""""""""""""""
+
+:doc:`pair_coeff <pair_coeff>`, :doc:`pair_style python <pair_python>`,
+:doc:`pair_style table <pair_table>`, :doc:`pair_write <pair_write>`
+
+Default
+"""""""
+
+none

From a8c881aaf379fabf4126b08e04c8dda8514e6a82 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 23:09:09 -0500
Subject: [PATCH 26/79] try to address linker issues with asmjit on older Linux
 machines

---
 cmake/Modules/Packages/LEPTON.cmake | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cmake/Modules/Packages/LEPTON.cmake b/cmake/Modules/Packages/LEPTON.cmake
index 28ade58636..e35237500d 100644
--- a/cmake/Modules/Packages/LEPTON.cmake
+++ b/cmake/Modules/Packages/LEPTON.cmake
@@ -16,8 +16,12 @@ endif()
 
 add_library(lmplepton STATIC ${LEPTON_SOURCES} ${ASMJIT_SOURCES})
 set_target_properties(lmplepton PROPERTIES OUTPUT_NAME lammps_lmplepton${LAMMPS_MACHINE})
-target_compile_definitions(lmplepton PUBLIC -DLEPTON_BUILDING_STATIC_LIBRARY=1)
+target_compile_definitions(lmplepton PUBLIC LEPTON_BUILDING_STATIC_LIBRARY=1)
 target_include_directories(lmplepton PUBLIC ${LEPTON_SOURCE_DIR}/include)
+if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+  find_library(LIB_RT rt QUIET)
+  target_link_libraries(lmplepton PUBLIC ${LIB_RT})
+endif()
 
 if(LEPTON_ENABLE_JIT)
   target_compile_definitions(lmplepton PUBLIC "LEPTON_USE_JIT=1;ASMJIT_BUILD_X86=1;ASMJIT_EMBED=1;ASMJIT_BUILD_RELEASE=1")

From 09871a01780ac9ad6249262d539c22481cff1734 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 23:32:06 -0500
Subject: [PATCH 27/79] mention JIT

---
 doc/src/Build_extras.rst     | 5 +++++
 doc/src/Packages_details.rst | 5 ++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst
index 976e6e723d..ce67dd40de 100644
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@@ -890,6 +890,11 @@ included in the LAMMPS source distribution in the ``lib/lepton`` folder.
       LAMMPS. No additional settings are normally needed besides
       ``-D PKG_LEPTON=yes``.
 
+      On x86 hardware the Lepton library will also include a just-in-time
+      compiler for faster execution.  This is auto detected but can
+      be explicitly disabled by setting ``-D LEPTON_ENABLE_JIT=no``
+      (or enabled by setting it to yes).
+
    .. tab:: Traditional make
 
       Before building LAMMPS, one must build the Lepton library in lib/lepton.
diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst
index 96ab174a10..53845f29d8 100644
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@@ -1401,9 +1401,8 @@ expressions.  This is a more lightweight and efficient alternative
 for evaluating custom potential function to an embedded Python
 interpreter as used in the :ref:`PYTHON package <PKG-PYTHON>`.
 On the other hand, since the potentials are evaluated form analytical
-expressions, they are more accurate than what can be done with
-:ref:`tabulated potentials <tabulate>`.  Using the runtime evaluation
-comes with a significant increase in runtime.
+expressions, they are more precise than what can be done with
+:ref:`tabulated potentials <tabulate>`.
 
 **Authors:** Axel Kohlmeyer (Temple U).  Lepton itself is developed
 by Peter Eastman at Stanford University.

From 3a6492fc4293978ffcf8e83a9aa23c74d56ae38e Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 22 Dec 2022 23:46:21 -0500
Subject: [PATCH 28/79] use JIT compiler only on Linux for now

---
 cmake/Modules/Packages/LEPTON.cmake | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cmake/Modules/Packages/LEPTON.cmake b/cmake/Modules/Packages/LEPTON.cmake
index e35237500d..d15ce7518c 100644
--- a/cmake/Modules/Packages/LEPTON.cmake
+++ b/cmake/Modules/Packages/LEPTON.cmake
@@ -2,9 +2,7 @@ set(LEPTON_SOURCE_DIR ${LAMMPS_LIB_SOURCE_DIR}/lepton)
 
 file(GLOB LEPTON_SOURCES ${LEPTON_SOURCE_DIR}/src/[^.]*.cpp)
 
-if((CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") OR
-   (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "amd64") OR
-   (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "AMD64"))
+if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64")
    option(LEPTON_ENABLE_JIT "Enable Just-In-Time compiler for Lepton" ON)
 else()
    option(LEPTON_ENABLE_JIT "Enable Just-In-Time compiler for Lepton" OFF)

From acf683e9d025bbe40e6d70d4173c89d179b57927 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 23 Dec 2022 01:35:05 -0500
Subject: [PATCH 29/79] define ASMJIT_STATIC to work around Windows issues

---
 cmake/Modules/Packages/LEPTON.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Modules/Packages/LEPTON.cmake b/cmake/Modules/Packages/LEPTON.cmake
index d15ce7518c..fb532bad83 100644
--- a/cmake/Modules/Packages/LEPTON.cmake
+++ b/cmake/Modules/Packages/LEPTON.cmake
@@ -22,7 +22,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
 endif()
 
 if(LEPTON_ENABLE_JIT)
-  target_compile_definitions(lmplepton PUBLIC "LEPTON_USE_JIT=1;ASMJIT_BUILD_X86=1;ASMJIT_EMBED=1;ASMJIT_BUILD_RELEASE=1")
+  target_compile_definitions(lmplepton PUBLIC "LEPTON_USE_JIT=1;ASMJIT_BUILD_X86=1;ASMJIT_EMBED=1;ASMJIT_STATIC=1;ASMJIT_BUILD_RELEASE=1")
   target_include_directories(lmplepton PUBLIC ${LEPTON_SOURCE_DIR})
 endif()
 

From 749adf3a593ade4a0873ec695e03a029124ec657 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 23 Dec 2022 05:32:35 -0500
Subject: [PATCH 30/79] one more tweak to allow more x86 platforms to use JIT
 with Lepton

---
 cmake/Modules/Packages/LEPTON.cmake | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cmake/Modules/Packages/LEPTON.cmake b/cmake/Modules/Packages/LEPTON.cmake
index fb532bad83..a40047bae0 100644
--- a/cmake/Modules/Packages/LEPTON.cmake
+++ b/cmake/Modules/Packages/LEPTON.cmake
@@ -2,7 +2,9 @@ set(LEPTON_SOURCE_DIR ${LAMMPS_LIB_SOURCE_DIR}/lepton)
 
 file(GLOB LEPTON_SOURCES ${LEPTON_SOURCE_DIR}/src/[^.]*.cpp)
 
-if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64")
+if((CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "amd64") OR
+   (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "AMD64") OR
+   (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64"))
    option(LEPTON_ENABLE_JIT "Enable Just-In-Time compiler for Lepton" ON)
 else()
    option(LEPTON_ENABLE_JIT "Enable Just-In-Time compiler for Lepton" OFF)
@@ -22,7 +24,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
 endif()
 
 if(LEPTON_ENABLE_JIT)
-  target_compile_definitions(lmplepton PUBLIC "LEPTON_USE_JIT=1;ASMJIT_BUILD_X86=1;ASMJIT_EMBED=1;ASMJIT_STATIC=1;ASMJIT_BUILD_RELEASE=1")
+  target_compile_definitions(lmplepton PUBLIC "LEPTON_USE_JIT=1;ASMJIT_BUILD_X86=1;ASMJIT_STATIC=1;ASMJIT_BUILD_RELEASE=1")
   target_include_directories(lmplepton PUBLIC ${LEPTON_SOURCE_DIR})
 endif()
 

From b67dcd7ca3c81ebc035b64d855a0459c5651fa88 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 23 Dec 2022 06:17:33 -0500
Subject: [PATCH 31/79] small tweaks

---
 doc/src/lepton_expression.rst | 5 ++++-
 src/LEPTON/pair_lepton.cpp    | 3 +--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/doc/src/lepton_expression.rst b/doc/src/lepton_expression.rst
index ace4d42ab2..c286e82359 100644
--- a/doc/src/lepton_expression.rst
+++ b/doc/src/lepton_expression.rst
@@ -91,7 +91,10 @@ is exactly equivalent to
 
 The definition of an intermediate value may itself involve other
 intermediate values. Whitespace and quotation characters ('\'' and '"')
-are ignored.  All uses of a value must appear *before* that value’s
+are ignored.  All uses of a value must appear *before* that value's
 definition.  For efficiency reasons, the expression string is parsed,
 optimized, and then stored in an internal, pre-parsed representation for
 evaluation.
+
+Evaluating Lepton expressions is typically between 2 and 4 times
+slower than the corresponding compiled and optimized C++ code.
diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index 39b5ade806..c84a910876 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -188,8 +188,7 @@ void PairLepton::allocate()
 
 void PairLepton::settings(int narg, char **arg)
 {
-  if (narg != 1) error->all(FLERR, "Illegal pair_style command");
-
+  if (narg != 1) error->all(FLERR, "Incorrect number of arguments for pair_style lepton command");
   cut_global = utils::numeric(FLERR, arg[0], false, lmp);
 }
 

From 132a4cbc9121a2a1ff196014035f88627b01be81 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 23 Dec 2022 12:13:10 -0500
Subject: [PATCH 32/79] update traditional build for updated Lepton library and
 inclusion of asmjit

---
 lib/lepton/Common.mk             | 130 +++++++++++++++++++++++++++++++
 lib/lepton/Install.py            |  70 ++++++++++++++++-
 lib/lepton/Makefile.lammps.empty |   2 -
 lib/lepton/Makefile.mpi          |  33 +-------
 lib/lepton/Makefile.serial       |  35 +--------
 lib/lepton/README.md             |  19 +----
 src/LEPTON/Install.sh            |   2 +-
 7 files changed, 207 insertions(+), 84 deletions(-)
 create mode 100644 lib/lepton/Common.mk
 mode change 120000 => 100755 lib/lepton/Install.py

diff --git a/lib/lepton/Common.mk b/lib/lepton/Common.mk
new file mode 100644
index 0000000000..a43e425e46
--- /dev/null
+++ b/lib/lepton/Common.mk
@@ -0,0 +1,130 @@
+# -*- makefile -*-
+# common settings for Lepton library makefiles
+
+SRC= \
+    src/CompiledExpression.cpp \
+    src/CompiledVectorExpression.cpp \
+    src/ExpressionProgram.cpp \
+    src/ExpressionTreeNode.cpp \
+    src/Operation.cpp \
+    src/ParsedExpression.cpp \
+    src/Parser.cpp \
+    src/Utils.cpp
+OBJ=$(SRC:src/%.cpp=build/lepton.%.o)
+
+JITARM= \
+    asmjit/arm/a64assembler.cpp \
+    asmjit/arm/a64builder.cpp \
+    asmjit/arm/a64compiler.cpp \
+    asmjit/arm/a64emithelper.cpp \
+    asmjit/arm/a64formatter.cpp \
+    asmjit/arm/a64func.cpp \
+    asmjit/arm/a64instapi.cpp \
+    asmjit/arm/a64instdb.cpp \
+    asmjit/arm/a64operand.cpp \
+    asmjit/arm/a64rapass.cpp \
+    asmjit/arm/armformatter.cpp
+JITX86 = \
+    asmjit/x86/x86assembler.cpp \
+    asmjit/x86/x86builder.cpp \
+    asmjit/x86/x86compiler.cpp \
+    asmjit/x86/x86emithelper.cpp \
+    asmjit/x86/x86formatter.cpp \
+    asmjit/x86/x86func.cpp \
+    asmjit/x86/x86instapi.cpp \
+    asmjit/x86/x86instdb.cpp \
+    asmjit/x86/x86operand.cpp \
+    asmjit/x86/x86rapass.cpp
+JITCORE= \
+    asmjit/core/archtraits.cpp \
+    asmjit/core/assembler.cpp \
+    asmjit/core/builder.cpp \
+    asmjit/core/codeholder.cpp \
+    asmjit/core/codewriter.cpp \
+    asmjit/core/compiler.cpp \
+    asmjit/core/constpool.cpp \
+    asmjit/core/cpuinfo.cpp \
+    asmjit/core/emithelper.cpp \
+    asmjit/core/emitter.cpp \
+    asmjit/core/emitterutils.cpp \
+    asmjit/core/environment.cpp \
+    asmjit/core/errorhandler.cpp \
+    asmjit/core/formatter.cpp \
+    asmjit/core/funcargscontext.cpp \
+    asmjit/core/func.cpp \
+    asmjit/core/globals.cpp \
+    asmjit/core/inst.cpp \
+    asmjit/core/jitallocator.cpp \
+    asmjit/core/jitruntime.cpp \
+    asmjit/core/logger.cpp \
+    asmjit/core/operand.cpp \
+    asmjit/core/osutils.cpp \
+    asmjit/core/ralocal.cpp \
+    asmjit/core/rapass.cpp \
+    asmjit/core/rastack.cpp \
+    asmjit/core/string.cpp \
+    asmjit/core/support.cpp \
+    asmjit/core/target.cpp \
+    asmjit/core/type.cpp \
+    asmjit/core/virtmem.cpp \
+    asmjit/core/zone.cpp \
+    asmjit/core/zonehash.cpp \
+    asmjit/core/zonelist.cpp \
+    asmjit/core/zonestack.cpp \
+    asmjit/core/zonetree.cpp \
+    asmjit/core/zonevector.cpp
+
+JITOBJ=$(JITX86:asmjit/x86/%.cpp=build/x86.%.o) \
+       $(JITARM:asmjit/arm/%.cpp=build/arm.%.o) \
+       $(JIXCORE:asmjit/core/%.cpp=build/core.%.o)
+
+ENABLE_JIT=0
+ifeq ($(shell uname -m),x86_64)
+ENABLE_JIT=1
+endif
+ifeq ($(shell uname -m),amd64)
+ENABLE_JIT=1
+endif
+
+EXTRAMAKE=Makefile.lammps.empty
+INC=-I include
+DEF=-DLEPTON_BUILDING_STATIC_LIBRARY=1
+
+LIB=liblmplepton.a
+
+ifeq ($(ENABLE_JIT),1)
+OBJ += $(JITOBJ)
+INC += -I .
+DEF += -DLEPTON_USE_JIT=1 -DASMJIT_BUILD_X86=1 -DASMJIT_STATIC=1 -DASMJIT_BUILD_RELEASE=1
+endif
+
+all: $(LIB) Makefile.lammps
+
+build:
+	mkdir -p build
+
+build/lepton.%.o: src/%.cpp build
+	$(CXX) $(INC) $(CXXFLAGS) -c $< -o $@
+
+build/arm.%.o: asmjit/arm/%.cpp build
+	$(CXX) $(INC) $(CXXFLAGS) -c $< -o $@
+
+build/x86.%.o: asmjit/x86/%.cpp build
+	$(CXX) $(INC) $(CXXFLAGS) -c $< -o $@
+
+build/core.%.o: asmjit/core/%.cpp build
+	$(CXX) $(INC) $(CXXFLAGS) -c $< -o $@
+
+Makefile.lammps:
+	cp $(EXTRAMAKE) $@
+	sed -i -e 's,^.*lepton_SYSINC *=.*$$,lepton_SYSINC = $(DEF),' $@
+
+.PHONY: all lib clean
+
+$(LIB) : $(OBJ)
+	$(AR) $(ARFLAGS) $@ $^
+
+clean:
+	rm -f build/*.o $(LIB) *~ Makefile.lammps
+
+
diff --git a/lib/lepton/Install.py b/lib/lepton/Install.py
deleted file mode 120000
index ffe709d44c..0000000000
--- a/lib/lepton/Install.py
+++ /dev/null
@@ -1 +0,0 @@
-../Install.py
\ No newline at end of file
diff --git a/lib/lepton/Install.py b/lib/lepton/Install.py
new file mode 100755
index 0000000000..e3b1b836e1
--- /dev/null
+++ b/lib/lepton/Install.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+"""
+Install.py tool to build the Lepton library
+"""
+
+from __future__ import print_function
+import sys, os, subprocess
+from argparse import ArgumentParser
+
+sys.path.append('..')
+from install_helpers import get_cpus, fullpath
+
+parser = ArgumentParser(prog='Install.py',
+                        description="LAMMPS Lepton library build wrapper script")
+
+HELP = """
+Syntax from src dir: make lib-lepton args="-m machine"
+Syntax from lib dir: python Install.py -m machine
+
+specify -m
+
+Examples:
+
+make lib-lepton args="-m serial" # build Lepton lib with same settings as in the serial Makefile in src
+python Install.py -m mpi         # build Lepton lib with same settings as in the mpi Makefile in src
+"""
+
+# parse and process arguments
+
+parser.add_argument("-m", "--machine",
+                    help="suffix of a <libname>/Makefile.* file used for compiling this library")
+
+args = parser.parse_args()
+
+# print help message and exit, if neither build nor path options are given
+if not args.machine and not args.extramake:
+  parser.print_help()
+  sys.exit(HELP)
+
+machine = args.machine
+
+# set lib from working dir
+
+cwd = fullpath('.')
+lib = os.path.basename(cwd)
+
+if not os.path.exists("Makefile.%s" % machine):
+  sys.exit("lib/%s/Makefile.%s does not exist" % (lib, machine))
+
+# make the library with parallel make
+n_cpus = get_cpus()
+
+print("Building liblmp%s.a ..." % lib)
+cmd = "make -f Makefile.%s clean; make -f Makefile.%s -j%d" % (machine, machine, n_cpus)
+try:
+  txt = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
+  print(txt.decode('UTF-8'))
+except subprocess.CalledProcessError as e:
+  print("Make failed with:\n %s" % e.output.decode('UTF-8'))
+  sys.exit(1)
+
+if os.path.exists("liblmp%s.a" % lib):
+  print("Build was successful")
+else:
+  sys.exit("Build of lib/%s/liblmp%s.a was NOT successful" % (lib, lib))
+
+if not os.path.exists("Makefile.lammps"):
+  print("WARNING: lib/%s/Makefile.lammps was NOT created" % lib)
diff --git a/lib/lepton/Makefile.lammps.empty b/lib/lepton/Makefile.lammps.empty
index 9e74c23b1d..57d5846c1b 100644
--- a/lib/lepton/Makefile.lammps.empty
+++ b/lib/lepton/Makefile.lammps.empty
@@ -1,6 +1,4 @@
 # Settings that the LAMMPS build will import when this package library is used
-# The default settings assume that HDF5 support is integrated into the standard
-# distribution and search paths and thus only needs to link the HDF5 library.
 
 lepton_SYSINC =
 lepton_SYSLIB =
diff --git a/lib/lepton/Makefile.mpi b/lib/lepton/Makefile.mpi
index 3d9cc49310..ee5d9aafae 100644
--- a/lib/lepton/Makefile.mpi
+++ b/lib/lepton/Makefile.mpi
@@ -1,37 +1,8 @@
-EXTRAMAKE=Makefile.lammps.empty
+# -*- makefile -*-
 
 CC=mpicxx
 CXXFLAGS=-D_DEFAULT_SOURCE -O2 -Wall -fPIC -std=c++11
-INC=-I include
 AR=ar
 ARFLAGS=rc
-LIB=liblmplepton.a
-SRC=src/CompiledExpression.cpp \
-    src/ExpressionProgram.cpp \
-    src/ExpressionTreeNode.cpp \
-    src/Operation.cpp \
-    src/ParsedExpression.cpp \
-    src/Parser.cpp \
-    src/Utils.cpp
-
-OBJ=$(SRC:src/%.cpp=build/%.o)
-
-all: $(LIB) Makefile.lammps
-
-build:
-	mkdir -p build
-
-build/%.o: src/%.cpp build
-	$(CXX) $(INC) $(CXXFLAGS) -c $< -o $@
-
-Makefile.lammps:
-	cp $(EXTRAMAKE) $@
-
-.PHONY: all lib clean
-
-$(LIB) : $(OBJ)
-	$(AR) $(ARFLAGS) $@ $^
-
-clean:
-	rm -f build/*.o $(LIB) *~
 
+include Common.mk
diff --git a/lib/lepton/Makefile.serial b/lib/lepton/Makefile.serial
index c83951774e..bebe2ce873 100644
--- a/lib/lepton/Makefile.serial
+++ b/lib/lepton/Makefile.serial
@@ -1,37 +1,8 @@
-EXTRAMAKE=Makefile.lammps.empty
+# -*- makefile -*-
 
 CC=g++
-CXXFLAGS=-D_DEFAULT_SOURCE -O2 -Wall -fPIC -std=c++11
-INC=-I include
+CXXFLAGS=-D_DEFAULT_SOURCE -O3 -DNDEBUG -Wall -fPIC -std=c++11 -ffast-math -msse4.2
 AR=ar
 ARFLAGS=rc
-LIB=liblmplepton.a
-SRC=src/CompiledExpression.cpp \
-    src/ExpressionProgram.cpp \
-    src/ExpressionTreeNode.cpp \
-    src/Operation.cpp \
-    src/ParsedExpression.cpp \
-    src/Parser.cpp \
-    src/Utils.cpp
-
-OBJ=$(SRC:src/%.cpp=build/%.o)
-
-all: $(LIB) Makefile.lammps
-
-build:
-	mkdir -p build
-
-build/%.o: src/%.cpp build
-	$(CXX) $(INC) $(CXXFLAGS) -c $< -o $@
-
-Makefile.lammps:
-	cp $(EXTRAMAKE) $@
-
-.PHONY: all lib clean
-
-$(LIB) : $(OBJ)
-	$(AR) $(ARFLAGS) $@ $^
-
-clean:
-	rm -f build/*.o $(LIB) *~
 
+include Common.mk
diff --git a/lib/lepton/README.md b/lib/lepton/README.md
index d2e4240c92..d83fe7ffc1 100644
--- a/lib/lepton/README.md
+++ b/lib/lepton/README.md
@@ -22,22 +22,7 @@ analysis. Here are some of its major features:
 - Computing analytic derivatives.
 - Representing parsed expressions in two different forms (tree or program) suitable for
   further analysis or processing.
+- Support for just-in-time compilation via asmjit library on x86 (autodetected)
+  This should make evaluation about 2 times faster
 
 Lepton was originally created for use in the [OpenMM project](https://openmm.org)
-ch5md is developed by Pierre de Buyl and is released under the 3-clause BSD
-license that can be found in the file LICENSE.
-
-To use the h5md dump style in lammps, execute
-make -f Makefile.h5cc
-in this directory then
-make yes-h5md
-in the src directory of LAMMPS to rebuild LAMMPS.
-
-Note that you must have the h5cc compiler installed to use
-Makefile.h5cc.  It should be part
-
-If HDF5 is not in a standard system location, edit Makefile.lammps accordingly.
-
-In the case of 2015 and more recent debian and ubuntu systems where concurrent
-serial and mpi are possible, use the full platform depedent path, i.e.
-`HDF5_PATH=/usr/lib/x86_64-linux-gnu/hdf5/serial`
diff --git a/src/LEPTON/Install.sh b/src/LEPTON/Install.sh
index 937ddf28bc..b7e80c9d1c 100755
--- a/src/LEPTON/Install.sh
+++ b/src/LEPTON/Install.sh
@@ -38,7 +38,7 @@ if (test $1 = 1) then
 
   if (test -e ../Makefile.package) then
     sed -i -e 's/[^ \t]*lepton[^ \t]* //g' ../Makefile.package
-    sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/lepton\/include |' ../Makefile.package
+    sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/lepton\/include -I..\/..\/lib\/lepton |' ../Makefile.package
     sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/lepton$(LIBOBJDIR) |' ../Makefile.package
     sed -i -e 's|^PKG_LIB =[ \t]*|&-llmplepton |' ../Makefile.package
     sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(lepton_SYSINC) |' ../Makefile.package

From a2af2b413519098d9fa08e892e870d42d3ed3977 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 23 Dec 2022 12:15:36 -0500
Subject: [PATCH 33/79] add versionadded tags

---
 doc/src/Packages_details.rst | 2 ++
 doc/src/bond_lepton.rst      | 2 ++
 doc/src/pair_lepton.rst      | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst
index 53845f29d8..e4bc425aa2 100644
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@@ -1407,6 +1407,8 @@ expressions, they are more precise than what can be done with
 **Authors:** Axel Kohlmeyer (Temple U).  Lepton itself is developed
 by Peter Eastman at Stanford University.
 
+.. versionadded:: TBD
+
 **Install:**
 
 This package has :ref:`specific installation instructions <lepton>` on
diff --git a/doc/src/bond_lepton.rst b/doc/src/bond_lepton.rst
index 3f4b305b6e..c08d1284ff 100644
--- a/doc/src/bond_lepton.rst
+++ b/doc/src/bond_lepton.rst
@@ -28,6 +28,8 @@ Examples
 Description
 """""""""""
 
+.. versionadded:: TBD
+
 Bond style *lepton* computes bonded interactions between two atom
 based on evaluating strings.  The potential function must be provided as an
 expression string using "r" as the distance variable relative to the
diff --git a/doc/src/pair_lepton.rst b/doc/src/pair_lepton.rst
index eb5978b21e..570388dc13 100644
--- a/doc/src/pair_lepton.rst
+++ b/doc/src/pair_lepton.rst
@@ -34,6 +34,8 @@ Examples
 Description
 """""""""""
 
+.. versionadded:: TBD
+
 Pair style *lepton* computes spherical pairwise interactions based on
 evaluating strings.  The potential function must be provided as an
 expression string using "r" as the distance variable, for example

From 67f0c48781632298155bf0437b33da699128927b Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 23 Dec 2022 15:34:01 -0500
Subject: [PATCH 34/79] add angle styles lepton and lepton/omp

---
 src/LEPTON/angle_lepton.cpp                   | 384 ++++++++++++++++++
 src/LEPTON/angle_lepton.h                     |  53 +++
 src/LEPTON/bond_lepton.cpp                    |   4 +-
 src/LEPTON/bond_lepton.h                      |   2 +-
 src/OPENMP/angle_lepton_omp.cpp               | 190 +++++++++
 src/OPENMP/angle_lepton_omp.h                 |  44 ++
 unittest/force-styles/tests/angle-lepton.yaml |  89 ++++
 7 files changed, 763 insertions(+), 3 deletions(-)
 create mode 100644 src/LEPTON/angle_lepton.cpp
 create mode 100644 src/LEPTON/angle_lepton.h
 create mode 100644 src/OPENMP/angle_lepton_omp.cpp
 create mode 100644 src/OPENMP/angle_lepton_omp.h
 create mode 100644 unittest/force-styles/tests/angle-lepton.yaml

diff --git a/src/LEPTON/angle_lepton.cpp b/src/LEPTON/angle_lepton.cpp
new file mode 100644
index 0000000000..39294d16a0
--- /dev/null
+++ b/src/LEPTON/angle_lepton.cpp
@@ -0,0 +1,384 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "angle_lepton.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "math_const.h"
+#include "memory.h"
+#include "neighbor.h"
+
+#include <cmath>
+
+#include "LMP_Lepton.h"
+
+using namespace LAMMPS_NS;
+using MathConst::DEG2RAD;
+using MathConst::RAD2DEG;
+
+static constexpr double SMALL = 0.001;
+
+/* ---------------------------------------------------------------------- */
+
+AngleLepton::AngleLepton(LAMMPS *_lmp) :
+    Angle(_lmp), theta0(nullptr), type2expression(nullptr), offset(nullptr)
+{
+  writedata = 1;
+  reinitflag = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+AngleLepton::~AngleLepton()
+{
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(theta0);
+    memory->destroy(type2expression);
+    memory->destroy(offset);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AngleLepton::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+  ev_init(eflag, vflag);
+  if (evflag) {
+    if (eflag) {
+      if (force->newton_bond)
+        eval<1, 1, 1>();
+      else
+        eval<1, 1, 0>();
+    } else {
+      if (force->newton_bond)
+        eval<1, 0, 1>();
+      else
+        eval<1, 0, 0>();
+    }
+  } else {
+    if (force->newton_bond)
+      eval<0, 0, 1>();
+    else
+      eval<0, 0, 0>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND> void AngleLepton::eval()
+{
+  std::vector<LMP_Lepton::CompiledExpression> angleforce;
+  std::vector<LMP_Lepton::CompiledExpression> anglepot;
+  for (const auto &expr : expressions) {
+    auto parsed = LMP_Lepton::Parser::parse(expr);
+    angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression());
+    if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression());
+  }
+
+  const double *const *const x = atom->x;
+  double *const *const f = atom->f;
+  const int *const *const anglelist = neighbor->anglelist;
+  const int nanglelist = neighbor->nanglelist;
+  const int nlocal = atom->nlocal;
+
+  for (int n = 0; n < nanglelist; n++) {
+    const int i1 = anglelist[n][0];
+    const int i2 = anglelist[n][1];
+    const int i3 = anglelist[n][2];
+    const int type = anglelist[n][3];
+
+    // 1st bond
+
+    const double delx1 = x[i1][0] - x[i2][0];
+    const double dely1 = x[i1][1] - x[i2][1];
+    const double delz1 = x[i1][2] - x[i2][2];
+
+    const double rsq1 = delx1 * delx1 + dely1 * dely1 + delz1 * delz1;
+    const double r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    const double delx2 = x[i3][0] - x[i2][0];
+    const double dely2 = x[i3][1] - x[i2][1];
+    const double delz2 = x[i3][2] - x[i2][2];
+
+    const double rsq2 = delx2 * delx2 + dely2 * dely2 + delz2 * delz2;
+    const double r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    double c = delx1 * delx2 + dely1 * dely2 + delz1 * delz2;
+    c /= r1 * r2;
+
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    double s = sqrt(1.0 - c * c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0 / s;
+
+    // force and energy
+
+    const double dtheta = acos(c) - theta0[type];
+    const int idx = type2expression[type];
+    double &theta_for = angleforce[idx].getVariableReference("theta");
+    theta_for = dtheta;
+
+    const double a = -angleforce[idx].evaluate() * s;
+    const double a11 = a * c / rsq1;
+    const double a12 = -a / (r1 * r2);
+    const double a22 = a * c / rsq2;
+
+    double f1[3], f3[3];
+    f1[0] = a11 * delx1 + a12 * delx2;
+    f1[1] = a11 * dely1 + a12 * dely2;
+    f1[2] = a11 * delz1 + a12 * delz2;
+    f3[0] = a22 * delx2 + a12 * delx1;
+    f3[1] = a22 * dely2 + a12 * dely1;
+    f3[2] = a22 * delz2 + a12 * delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    double eangle = 0.0;
+    if (EFLAG) {
+      double &theta_pot = anglepot[idx].getVariableReference("theta");
+      theta_pot = dtheta;
+      eangle = anglepot[idx].evaluate() - offset[type];
+    }
+    if (EVFLAG)
+      ev_tally(i1, i2, i3, nlocal, NEWTON_BOND, eangle, f1, f3, delx1, dely1, delz1, delx2, dely2,
+               delz2);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AngleLepton::allocate()
+{
+  allocated = 1;
+  const int np1 = atom->nangletypes + 1;
+
+  memory->create(theta0, np1, "angle:theta0");
+  memory->create(type2expression, np1, "angle:type2expression");
+  memory->create(offset, np1, "angle:offset");
+  memory->create(setflag, np1, "angle:setflag");
+  for (int i = 1; i < np1; i++) setflag[i] = 0;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more types
+------------------------------------------------------------------------- */
+
+void AngleLepton::coeff(int narg, char **arg)
+{
+  if (narg != 3) error->all(FLERR, "Incorrect number of args for angle coefficients");
+  if (!allocated) allocate();
+
+  int ilo, ihi;
+  utils::bounds(FLERR, arg[0], 1, atom->nangletypes, ilo, ihi, error);
+
+  double theta0_one = utils::numeric(FLERR, arg[1], false, lmp);
+
+  // remove whitespace and quotes from expression string and then
+  // check if the expression can be parsed and evaluated without error
+  std::string exp_one = LMP_Lepton::condense(arg[2]);
+  double offset_one = 0.0;
+  try {
+    auto parsed = LMP_Lepton::Parser::parse(exp_one);
+    auto anglepot = parsed.createCompiledExpression();
+    auto angleforce = parsed.differentiate("theta").createCompiledExpression();
+    double &theta_pot = anglepot.getVariableReference("theta");
+    double &theta_for = angleforce.getVariableReference("theta");
+    theta_for = theta_pot = 0.0;
+    offset_one = anglepot.evaluate();
+    angleforce.evaluate();
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  std::size_t idx = 0;
+  for (const auto &exp : expressions) {
+    if (exp == exp_one) break;
+    ++idx;
+  }
+
+  // if not found, add to list
+  if ((expressions.size() == 0) || (idx == expressions.size())) expressions.push_back(exp_one);
+
+  // convert theta0 from degrees to radians
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    theta0[i] = DEG2RAD * theta0_one;
+    type2expression[i] = idx;
+    offset[i] = offset_one;
+    setflag[i] = 1;
+    count++;
+  }
+
+  if (count == 0) error->all(FLERR, "Incorrect args for angle coefficients");
+}
+
+/* ---------------------------------------------------------------------- */
+
+double AngleLepton::equilibrium_angle(int i)
+{
+  return theta0[i];
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes out coeffs to restart file
+------------------------------------------------------------------------- */
+
+void AngleLepton::write_restart(FILE *fp)
+{
+  fwrite(&theta0[1], sizeof(double), atom->nangletypes, fp);
+  fwrite(&type2expression[1], sizeof(int), atom->nangletypes, fp);
+  fwrite(&offset[1], sizeof(double), atom->nangletypes, fp);
+
+  int num = expressions.size();
+  int maxlen = 0;
+  for (const auto &exp : expressions) maxlen = MAX(maxlen, (int) exp.size());
+  ++maxlen;
+
+  fwrite(&num, sizeof(int), 1, fp);
+  fwrite(&maxlen, sizeof(int), 1, fp);
+  for (const auto &exp : expressions) {
+    int n = exp.size() + 1;
+    fwrite(&n, sizeof(int), 1, fp);
+    fwrite(exp.c_str(), sizeof(char), n, fp);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads coeffs from restart file, bcasts them
+------------------------------------------------------------------------- */
+
+void AngleLepton::read_restart(FILE *fp)
+{
+  allocate();
+
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &theta0[1], sizeof(double), atom->nangletypes, fp, nullptr, error);
+    utils::sfread(FLERR, &type2expression[1], sizeof(int), atom->nangletypes, fp, nullptr, error);
+    utils::sfread(FLERR, &offset[1], sizeof(double), atom->nangletypes, fp, nullptr, error);
+  }
+  MPI_Bcast(&theta0[1], atom->nangletypes, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&type2expression[1], atom->nangletypes, MPI_INT, 0, world);
+  MPI_Bcast(&offset[1], atom->nangletypes, MPI_DOUBLE, 0, world);
+  for (int i = 1; i <= atom->nangletypes; i++) setflag[i] = 1;
+
+  int num, maxlen, len;
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &num, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &maxlen, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&num, 1, MPI_INT, 0, world);
+  MPI_Bcast(&maxlen, 1, MPI_INT, 0, world);
+
+  char *buf = new char[maxlen];
+
+  for (int i = 0; i < num; ++i) {
+    if (comm->me == 0) {
+      utils::sfread(FLERR, &len, sizeof(int), 1, fp, nullptr, error);
+      utils::sfread(FLERR, buf, sizeof(char), len, fp, nullptr, error);
+    }
+    MPI_Bcast(buf, maxlen, MPI_CHAR, 0, world);
+    expressions.push_back(buf);
+  }
+
+  delete[] buf;
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to data file
+------------------------------------------------------------------------- */
+
+void AngleLepton::write_data(FILE *fp)
+{
+  for (int i = 1; i <= atom->nangletypes; i++)
+    fprintf(fp, "%d %g %s\n", i, RAD2DEG * theta0[i], expressions[type2expression[i]].c_str());
+}
+
+/* ---------------------------------------------------------------------- */
+
+double AngleLepton::single(int type, int i1, int i2, int i3)
+{
+  double **x = atom->x;
+
+  double delx1 = x[i1][0] - x[i2][0];
+  double dely1 = x[i1][1] - x[i2][1];
+  double delz1 = x[i1][2] - x[i2][2];
+  domain->minimum_image(delx1, dely1, delz1);
+  double r1 = sqrt(delx1 * delx1 + dely1 * dely1 + delz1 * delz1);
+
+  double delx2 = x[i3][0] - x[i2][0];
+  double dely2 = x[i3][1] - x[i2][1];
+  double delz2 = x[i3][2] - x[i2][2];
+  domain->minimum_image(delx2, dely2, delz2);
+  double r2 = sqrt(delx2 * delx2 + dely2 * dely2 + delz2 * delz2);
+
+  double c = delx1 * delx2 + dely1 * dely2 + delz1 * delz2;
+  c /= r1 * r2;
+  if (c > 1.0) c = 1.0;
+  if (c < -1.0) c = -1.0;
+
+  double dtheta = acos(c) - theta0[type];
+  auto parsed = LMP_Lepton::Parser::parse(expressions[type2expression[type]]);
+  auto anglepot = parsed.createCompiledExpression();
+  double &theta_pot = anglepot.getVariableReference("theta");
+  theta_pot = dtheta;
+  return anglepot.evaluate() - offset[type];
+}
+
+/* ----------------------------------------------------------------------
+   return ptr to internal members upon request
+------------------------------------------------------------------------ */
+
+void *AngleLepton::extract(const char *str, int &dim)
+{
+  dim = 1;
+  if (str) {
+    std::string keyword(str);
+    if (keyword == "theta0") return (void *) theta0;
+  }
+  return nullptr;
+}
diff --git a/src/LEPTON/angle_lepton.h b/src/LEPTON/angle_lepton.h
new file mode 100644
index 0000000000..67d2718fb6
--- /dev/null
+++ b/src/LEPTON/angle_lepton.h
@@ -0,0 +1,53 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+// clang-format off
+AngleStyle(lepton,AngleLepton);
+// clang-format on
+#else
+
+#ifndef LMP_ANGLE_LEPTON_H
+#define LMP_ANGLE_LEPTON_H
+
+#include "angle.h"
+
+namespace LAMMPS_NS {
+
+class AngleLepton : public Angle {
+ public:
+  AngleLepton(class LAMMPS *);
+  ~AngleLepton() override;
+  void compute(int, int) override;
+  void coeff(int, char **) override;
+  double equilibrium_angle(int) override;
+  void write_restart(FILE *) override;
+  void read_restart(FILE *) override;
+  void write_data(FILE *) override;
+  double single(int, int, int, int) override;
+  void *extract(const char *, int &) override;
+
+ protected:
+  std::vector<std::string> expressions;
+  double *theta0;
+  int *type2expression;
+  double *offset;
+
+  virtual void allocate();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND> void eval();
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/LEPTON/bond_lepton.cpp b/src/LEPTON/bond_lepton.cpp
index fb55571ca1..ae509a056b 100644
--- a/src/LEPTON/bond_lepton.cpp
+++ b/src/LEPTON/bond_lepton.cpp
@@ -156,7 +156,7 @@ void BondLepton::allocate()
 }
 
 /* ----------------------------------------------------------------------
-   set coeffs for one type
+   set coeffs for one or more types
 ------------------------------------------------------------------------- */
 
 void BondLepton::coeff(int narg, char **arg)
@@ -192,7 +192,7 @@ void BondLepton::coeff(int narg, char **arg)
     ++idx;
   }
 
-  // not found, add to list
+  // if not found, add to list
   if ((expressions.size() == 0) || (idx == expressions.size())) expressions.push_back(exp_one);
 
   int count = 0;
diff --git a/src/LEPTON/bond_lepton.h b/src/LEPTON/bond_lepton.h
index e91dda3187..9e693298a7 100644
--- a/src/LEPTON/bond_lepton.h
+++ b/src/LEPTON/bond_lepton.h
@@ -46,7 +46,7 @@ class BondLepton : public Bond {
   virtual void allocate();
 
  private:
-  template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void eval();
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND> void eval();
 };
 }    // namespace LAMMPS_NS
 #endif
diff --git a/src/OPENMP/angle_lepton_omp.cpp b/src/OPENMP/angle_lepton_omp.cpp
new file mode 100644
index 0000000000..84ca5052d0
--- /dev/null
+++ b/src/OPENMP/angle_lepton_omp.cpp
@@ -0,0 +1,190 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "angle_lepton_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "omp_compat.h"
+
+#include <cmath>
+
+#include "LMP_Lepton.h"
+#include "suffix.h"
+using namespace LAMMPS_NS;
+
+static constexpr double SMALL = 0.001;
+
+/* ---------------------------------------------------------------------- */
+
+AngleLeptonOMP::AngleLeptonOMP(class LAMMPS *lmp) : AngleLepton(lmp), ThrOMP(lmp, THR_ANGLE)
+{
+  suffix_flag |= Suffix::OMP;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AngleLeptonOMP::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag, vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
+
+    if (inum > 0) {
+      if (evflag) {
+        if (eflag) {
+          if (force->newton_bond)
+            eval<1, 1, 1>(ifrom, ito, thr);
+          else
+            eval<1, 1, 0>(ifrom, ito, thr);
+        } else {
+          if (force->newton_bond)
+            eval<1, 0, 1>(ifrom, ito, thr);
+          else
+            eval<1, 0, 0>(ifrom, ito, thr);
+        }
+      } else {
+        if (force->newton_bond)
+          eval<0, 0, 1>(ifrom, ito, thr);
+        else
+          eval<0, 0, 0>(ifrom, ito, thr);
+      }
+    }
+    thr->timer(Timer::BOND);
+    reduce_thr(this, eflag, vflag, thr);
+  }    // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
+{
+  std::vector<LMP_Lepton::CompiledExpression> angleforce;
+  std::vector<LMP_Lepton::CompiledExpression> anglepot;
+  for (const auto &expr : expressions) {
+    auto parsed = LMP_Lepton::Parser::parse(expr);
+    angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression());
+    if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression());
+  }
+
+  const auto *_noalias const x = (dbl3_t *) atom->x[0];
+  auto *_noalias const f = (dbl3_t *) thr->get_f()[0];
+  const int4_t *_noalias const anglelist = (int4_t *) neighbor->anglelist[0];
+  const int nlocal = atom->nlocal;
+
+  for (int n = nfrom; n < nto; n++) {
+    const int i1 = anglelist[n].a;
+    const int i2 = anglelist[n].b;
+    const int i3 = anglelist[n].c;
+    const int type = anglelist[n].t;
+
+    // 1st bond
+
+    const double delx1 = x[i1].x - x[i2].x;
+    const double dely1 = x[i1].y - x[i2].y;
+    const double delz1 = x[i1].z - x[i2].z;
+
+    const double rsq1 = delx1 * delx1 + dely1 * dely1 + delz1 * delz1;
+    const double r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    const double delx2 = x[i3].x - x[i2].x;
+    const double dely2 = x[i3].y - x[i2].y;
+    const double delz2 = x[i3].z - x[i2].z;
+
+    const double rsq2 = delx2 * delx2 + dely2 * dely2 + delz2 * delz2;
+    const double r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    double c = delx1 * delx2 + dely1 * dely2 + delz1 * delz2;
+    c /= r1 * r2;
+
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    double s = sqrt(1.0 - c * c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0 / s;
+
+    // force and energy
+
+    const double dtheta = acos(c) - theta0[type];
+    const int idx = type2expression[type];
+    double &theta_for = angleforce[idx].getVariableReference("theta");
+    theta_for = dtheta;
+
+    const double a = -angleforce[idx].evaluate() * s;
+    const double a11 = a * c / rsq1;
+    const double a12 = -a / (r1 * r2);
+    const double a22 = a * c / rsq2;
+
+    double f1[3], f3[3];
+    f1[0] = a11 * delx1 + a12 * delx2;
+    f1[1] = a11 * dely1 + a12 * dely2;
+    f1[2] = a11 * delz1 + a12 * delz2;
+    f3[0] = a22 * delx2 + a12 * delx1;
+    f3[1] = a22 * dely2 + a12 * dely1;
+    f3[2] = a22 * delz2 + a12 * delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1].x += f1[0];
+      f[i1].y += f1[1];
+      f[i1].z += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2].x -= f1[0] + f3[0];
+      f[i2].y -= f1[1] + f3[1];
+      f[i2].z -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3].x += f3[0];
+      f[i3].y += f3[1];
+      f[i3].z += f3[2];
+    }
+
+    double eangle = 0.0;
+    if (EFLAG) {
+      double &theta_pot = anglepot[idx].getVariableReference("theta");
+      theta_pot = dtheta;
+      eangle = anglepot[idx].evaluate() - offset[type];
+    }
+    if (EVFLAG)
+      ev_tally_thr(this, i1, i2, i3, nlocal, NEWTON_BOND, eangle, f1, f3, delx1, dely1, delz1,
+                   delx2, dely2, delz2, thr);
+  }
+}
diff --git a/src/OPENMP/angle_lepton_omp.h b/src/OPENMP/angle_lepton_omp.h
new file mode 100644
index 0000000000..ef5988aaa2
--- /dev/null
+++ b/src/OPENMP/angle_lepton_omp.h
@@ -0,0 +1,44 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+// clang-format off
+AngleStyle(lepton/omp,AngleLeptonOMP);
+// clang-format on
+#else
+
+#ifndef LMP_ANGLE_LEPTON_OMP_H
+#define LMP_ANGLE_LEPTON_OMP_H
+
+#include "angle_lepton.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleLeptonOMP : public AngleLepton, public ThrOMP {
+
+ public:
+  AngleLeptonOMP(class LAMMPS *lmp);
+  void compute(int, int) override;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData *const thr);
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/unittest/force-styles/tests/angle-lepton.yaml b/unittest/force-styles/tests/angle-lepton.yaml
new file mode 100644
index 0000000000..3b02f8c3a6
--- /dev/null
+++ b/unittest/force-styles/tests/angle-lepton.yaml
@@ -0,0 +1,89 @@
+---
+lammps_version: 22 Dec 2022
+date_generated: Fri Dec 23 15:10:29 2022
+epsilon: 7.5e-13
+skip_tests:
+prerequisites: ! |
+  atom full
+  angle lepton
+pre_commands: ! |
+  echo screen
+post_commands: ! ""
+input_file: in.fourmol
+angle_style: lepton
+angle_coeff: ! |
+  1 110.1 "k*theta^2; k=75.0"
+  2 111.0 "k*theta^2; k=45.0"
+  3 120.0 "k*theta^2; k=50.0"
+  4 108.5 "k*theta^2; k=100.0"
+equilibrium: 4 1.9216075064457567 1.9373154697137058 2.0943951023931953 1.8936822384138476
+extract: ! |
+  theta0 1
+natoms: 29
+init_energy: 41.53081789649104
+init_stress: ! |2-
+   8.9723357320869297e+01 -8.7188643750026529e+01 -2.5347135708427655e+00  9.2043419883119782e+01 -2.8187238090404904e+01 -1.5291148024926793e+00
+init_forces: ! |2
+    1  4.7865489310693540e+01  7.8760925902181516e+00 -3.2694525514709866e+01
+    2 -1.1124882516177341e+00 -9.0075464203887403e+00 -7.2431691227364459e+00
+    3 -5.9057050592859328e+00  5.3263619873546261e+01  5.2353380124691469e+01
+    4 -1.6032230038990633e+01 -2.4560529343731403e+01  1.2891625920422307e+01
+    5 -4.4802331573497639e+01 -4.8300919461089379e+01 -2.3310767889219324e+01
+    6  4.7083124388174824e+01 -9.5212933434476312e+00 -3.2526392870546800e+01
+    7 -1.6208182775476303e+01  1.4458587960739102e+01 -3.5314745459502710e+00
+    8 -6.5664612141881040e+00 -2.5126850154274202e+01  8.2187944731423329e+01
+    9 -1.5504395262358301e+01  1.6121044185227817e+01 -4.2007069622477866e-01
+   10  9.9863759179365275e+00  4.1873540105704549e+01 -6.6085640966037403e+01
+   11 -2.0441876158908627e+01 -6.5186824168985984e+00  9.0023620309811072e+00
+   12 -1.0772126658369565e+01 -1.0807367300158219e+01 -9.6049647456797871e+00
+   13  2.8847886813946291e+00  7.2973241014859198e+00 -1.0414233993842981e-01
+   14  1.5267407478336393e+01 -9.4754911480231776e+00 -6.6307012925544200e+00
+   15  1.2402914209534773e+01 -6.2644630791613967e+00  1.8484576795819933e+01
+   16  3.8927757686508357e-01  1.0690061587911176e+01  6.1542759189377696e+00
+   17  1.4664194297570785e+00 -1.9971277376602425e+00  1.0776844613215999e+00
+   18  1.5785371874873322e-01  1.6495665212200166e+00 -6.6944747776990434e+00
+   19 -1.9328033033421670e+00 -2.4078805870919706e+00  2.8669575541313534e+00
+   20  1.7749495845934338e+00  7.5831406587195394e-01  3.8275172235676900e+00
+   21  3.4186149299343742e+00  4.2795410364249484e+00 -1.2789555411020650e+01
+   22 -6.0875600315279677e+00 -4.1504951869796605e+00  4.5212856070195766e+00
+   23  2.6689451015935934e+00 -1.2904584944528752e-01  8.2682698040010738e+00
+   24 -1.3053945393770587e+00  5.0741459325183271e+00 -3.0209518576073018e+00
+   25 -1.0471133765834284e+00 -3.5082261409793856e+00  5.7374874908501228e-01
+   26  2.3525079159604871e+00 -1.5659197915389413e+00  2.4472031085222894e+00
+   27 -2.8720725187343754e-01  2.3577465459557132e+00 -8.0312673032168869e-01
+   28 -6.2799575211500369e-01 -1.4097313073755862e+00  3.2747938980616453e-02
+   29  9.1520300398844123e-01 -9.4801523858012704e-01  7.7037879134107223e-01
+run_energy: 41.28323739029462
+run_stress: ! |2-
+   8.8236221596506681e+01 -8.6492260623309562e+01 -1.7439609731970940e+00  9.0601855980531312e+01 -2.8735005690484968e+01 -2.6097632235197477e+00
+run_forces: ! |2
+    1  4.7316793853445830e+01  8.2815577813110188e+00 -3.2021703111755464e+01
+    2 -1.1508196824491330e+00 -9.3814982172707460e+00 -7.5761211707510139e+00
+    3 -5.1083163691832576e+00  5.2667553294971619e+01  5.1784852458007592e+01
+    4 -1.6078177452605999e+01 -2.4156048365236213e+01  1.3140924677013103e+01
+    5 -4.4915734474022280e+01 -4.8095168640411821e+01 -2.3331149037574161e+01
+    6  4.7077916942842350e+01 -9.5906213020090156e+00 -3.2570331503075487e+01
+    7 -1.6228599672412471e+01  1.4485102617342370e+01 -3.5441153194985300e+00
+    8 -6.5097893981550730e+00 -2.5117582302614530e+01  8.2131369512416001e+01
+    9 -1.5527440970965937e+01  1.6147270375910470e+01 -4.0812004993325646e-01
+   10  1.0070812216240984e+01  4.1571532807578805e+01 -6.5968810328796337e+01
+   11 -2.0431584971707451e+01 -6.4817395192247664e+00  8.9879981618991636e+00
+   12 -1.0884695976714678e+01 -1.1067390190389006e+01 -9.1551242768940568e+00
+   13  2.8052913970098801e+00  7.1296301666594912e+00  1.3173039168682621e-02
+   14  1.5254877537873529e+01 -8.9700095533297350e+00 -6.5719846903613162e+00
+   15  1.2392009100170984e+01 -6.0827695435257292e+00  1.7929674392339596e+01
+   16  4.7158712437377481e-01  1.0631038523396533e+01  6.0960085687560355e+00
+   17  1.4458707962589659e+00 -1.9708579331587350e+00  1.0634586790394520e+00
+   18  1.4201882413835909e-01  1.4265339757773337e+00 -5.7663956896747992e+00
+   19 -1.6609130686729365e+00 -2.0735307593211125e+00  2.4755525101127143e+00
+   20  1.5188942445345774e+00  6.4699678354377899e-01  3.2908431795620849e+00
+   21  3.2242729509516406e+00  4.0079233768386153e+00 -1.2047892238650988e+01
+   22 -5.7215184687399772e+00 -3.8871624402883409e+00  4.2679223469272234e+00
+   23  2.4972455177883366e+00 -1.2076093655027398e-01  7.7799698917237645e+00
+   24 -1.1661978296905471e+00  4.5271404898674854e+00 -2.6925565853370195e+00
+   25 -9.2712094527152167e-01 -3.1291890525017125e+00  5.1208215565053827e-01
+   26  2.0933187749620688e+00 -1.3979514373657731e+00  2.1804744296864813e+00
+   27 -2.6804542538020537e-01  2.1830651328698103e+00 -7.3931790038945400e-01
+   28 -5.7927072943128310e-01 -1.3052929090347909e+00  2.8365455885795865e-02
+   29  8.4731615481148848e-01 -8.7777222383501941e-01  7.1095244450365813e-01
+...

From 3d7082499d71cb54ab0f7b6b6ac0f0f4546e6431 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 23 Dec 2022 16:07:14 -0500
Subject: [PATCH 35/79] update docs to include angle style lepton

---
 doc/src/Commands_bond.rst    |  1 +
 doc/src/Packages_details.rst | 17 +++----
 doc/src/angle_lepton.rst     | 92 ++++++++++++++++++++++++++++++++++++
 doc/src/angle_style.rst      |  3 +-
 doc/src/bond_lepton.rst      | 15 +++---
 5 files changed, 111 insertions(+), 17 deletions(-)
 create mode 100644 doc/src/angle_lepton.rst

diff --git a/doc/src/Commands_bond.rst b/doc/src/Commands_bond.rst
index f5e5edcc5a..d067dfea7b 100644
--- a/doc/src/Commands_bond.rst
+++ b/doc/src/Commands_bond.rst
@@ -94,6 +94,7 @@ OPT.
    * :doc:`fourier/simple (o) <angle_fourier_simple>`
    * :doc:`gaussian <angle_gaussian>`
    * :doc:`harmonic (iko) <angle_harmonic>`
+   * :doc:`lepton (o) <angle_lepton>`
    * :doc:`mesocnt <angle_mesocnt>`
    * :doc:`mm3 <angle_mm3>`
    * :doc:`quartic (o) <angle_quartic>`
diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst
index e4bc425aa2..dc99e478fa 100644
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@@ -1392,17 +1392,17 @@ LEPTON package
 
 **Contents:**
 
-Styles for pair, bond forces that evaluate the potential function from a
-string using the `Lepton mathematical expression parser
+Styles for pair, bond, and angle forces that evaluate the potential
+function from a string using the `Lepton mathematical expression parser
 <https://simtk.org/projects/lepton>`_.  Lepton is a C++ library that is
 bundled with `OpenMM <https://openmm.org/>`_ and can be used for
 parsing, evaluating, differentiating, and analyzing mathematical
-expressions.  This is a more lightweight and efficient alternative
-for evaluating custom potential function to an embedded Python
-interpreter as used in the :ref:`PYTHON package <PKG-PYTHON>`.
-On the other hand, since the potentials are evaluated form analytical
-expressions, they are more precise than what can be done with
-:ref:`tabulated potentials <tabulate>`.
+expressions.  This is a more lightweight and efficient alternative for
+evaluating custom potential function to an embedded Python interpreter
+as used in the :ref:`PYTHON package <PKG-PYTHON>`.  On the other hand,
+since the potentials are evaluated form analytical expressions, they are
+more precise than what can be done with :ref:`tabulated potentials
+<tabulate>`.
 
 **Authors:** Axel Kohlmeyer (Temple U).  Lepton itself is developed
 by Peter Eastman at Stanford University.
@@ -1420,6 +1420,7 @@ the :doc:`Build extras <Build_extras>` page.
 * lib/lepton/README.md
 * :doc:`pair_style lepton <pair_lepton>`
 * :doc:`bond_style lepton <bond_lepton>`
+* :doc:`angle_style lepton <angle_lepton>`
 
 ----------
 
diff --git a/doc/src/angle_lepton.rst b/doc/src/angle_lepton.rst
new file mode 100644
index 0000000000..d65f3b6d73
--- /dev/null
+++ b/doc/src/angle_lepton.rst
@@ -0,0 +1,92 @@
+.. index:: angle_style lepton
+.. index:: angle_style lepton/omp
+
+angle_style lepton command
+==========================
+
+Accelerator Variants: *lepton/omp*
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   angle_style lepton
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   angle_style lepton
+
+   angle_coeff  1 120.0  "k*theta^2; k=250.0"
+   angle_coeff  2  90.0  "k2*theta^2 + k3*theta^3 + k4*theta^4; k2=300.0; k3=-100.0; k4=50.0"
+   angle_coeff  3 109.45 "k*theta^2; k=350.0"
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+Angle style *lepton* computes angular interactions between three atoms
+based on evaluating strings.  The potential function must be provided as an
+expression string using "theta" as the angle variable relative to the
+reference angle :math:`\theta_0` which is provided as an angle coefficient.
+For example `"200.0*(theta)^2"` represents a harmonic potential with a
+force constant *K* of 200.0 energy units:
+
+.. math::
+
+   U_{angle,i} = K (\theta_i - \theta_0)^2 = K \theta^2 \qquad theta = \theta_i - \theta_0
+
+The `Lepton library <https://simtk.org/projects/lepton>`_, that the
+*lepton* angle style interfaces with, evaluates this expression string at
+run time to compute the pairwise energy.  It also creates an analytical
+representation of the differentiation of this expression with respect to
+"r" and then uses that to compute the force between the pairs atoms forming
+angles as defined by the topology data.
+
+The following coefficients must be defined for each angle type via the
+:doc:`angle_coeff <angle_coeff>` command as in the example above, or in
+the data file or restart files read by the :doc:`read_data <read_data>`
+or :doc:`read_restart <read_restart>` commands:
+
+* Lepton expression (energy units)
+* :math:`\theta_0` (degrees)
+
+The Lepton expression must be either enclosed in quotes or must not
+contain any whitespace so that LAMMPS recognizes it as a single keyword.
+More on valid Lepton expressions below.  The :math:`\theta_0`
+coefficient is the "equilibrium angle".  It is entered in degrees, but
+internally converted to radians.  Thus the expression must assume "theta"
+is in radians.  The potential energy function in the Lepton expression
+is shifted in such a way, that the potential energy is 0 for a angle
+:math:`\theta_i == \theta_0`.
+
+----------
+
+.. include:: lepton_expression.rst
+
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
+Restrictions
+""""""""""""
+
+This angle style can only be used if LAMMPS was built with the LEPTON
+package.  See the :doc:`Build package <Build_package>` page for more
+info.
+
+Related commands
+""""""""""""""""
+
+:doc:`angle_coeff <angle_coeff>`
+
+Default
+"""""""
+
+none
diff --git a/doc/src/angle_style.rst b/doc/src/angle_style.rst
index 024481ce68..1f1ae72647 100644
--- a/doc/src/angle_style.rst
+++ b/doc/src/angle_style.rst
@@ -10,7 +10,7 @@ Syntax
 
    angle_style style
 
-* style = *none* or *zero* or *hybrid* or *amoeba* or *charmm* or *class2* or *class2/p6* or *cosine* or *cosine/buck6d* or *cosine/delta* or *cosine/periodic* or *cosine/shift* or *cosine/shift/exp* or *cosine/squared* or *cross* or *dipole* or *fourier* or *fourier/simple* or *gaussian* or *harmonic* or *mm3* or *quartic* or *spica* or *table*
+* style = *none* or *zero* or *hybrid* or *amoeba* or *charmm* or *class2* or *class2/p6* or *cosine* or *cosine/buck6d* or *cosine/delta* or *cosine/periodic* or *cosine/shift* or *cosine/shift/exp* or *cosine/squared* or *cross* or *dipole* or *fourier* or *fourier/simple* or *gaussian* or *harmonic* or *lepton* or *mm3* or *quartic* or *spica* or *table*
 
 Examples
 """"""""
@@ -90,6 +90,7 @@ of (g,i,k,o,t) to indicate which accelerated styles exist.
 * :doc:`fourier/simple <angle_fourier_simple>` - angle with a single cosine term
 * :doc:`gaussian <angle_gaussian>` - multi-centered Gaussian-based angle potential
 * :doc:`harmonic <angle_harmonic>` - harmonic angle
+* :doc:`lepton <angle_lepton>` - angle potential from evaluating a string
 * :doc:`mesocnt <angle_mesocnt>` - piecewise harmonic and linear angle for bending-buckling of nanotubes
 * :doc:`mm3 <angle_mm3>` - anharmonic angle
 * :doc:`quartic <angle_quartic>` - angle with cubic and quartic terms
diff --git a/doc/src/bond_lepton.rst b/doc/src/bond_lepton.rst
index c08d1284ff..faa4101f47 100644
--- a/doc/src/bond_lepton.rst
+++ b/doc/src/bond_lepton.rst
@@ -19,27 +19,26 @@ Examples
 .. code-block:: LAMMPS
 
    bond_style lepton
+
    1 1.5 "k*r^2; k=250.0"
    2 1.1 "k2*r^2 + k3*r^3 + k4*r^4; k2=300.0; k3=-100.0; k4=50.0"
    3 1.3 "k*r^2; k=350.0"
 
-   bond_coeff 5 1.0 2.0 1.2
-
 Description
 """""""""""
 
 .. versionadded:: TBD
 
-Bond style *lepton* computes bonded interactions between two atom
-based on evaluating strings.  The potential function must be provided as an
+Bond style *lepton* computes bonded interactions between two atom based
+on evaluating strings.  The potential function must be provided as an
 expression string using "r" as the distance variable relative to the
 reference distance :math:`r_0` which is provided as a bond coefficient.
-For example `"200.0*(r)^2"` represents a harmonic potential with a
-force constant *K* of 200.0 energy units:
+For example `"200.0*(r)^2"` represents a harmonic potential with a force
+constant *K* of 200.0 energy units:
 
 .. math::
 
-   U_{bond} = K (r_i - r_0)^2 = K r^2 \qquad r = r_i-r_0
+   U_{bond,i} = K (r_i - r_0)^2 = K r^2 \qquad r = r_i - r_0
 
 The `Lepton library <https://simtk.org/projects/lepton>`_, that the
 *lepton* bond style interfaces with, evaluates this expression string at
@@ -61,7 +60,7 @@ contain any whitespace so that LAMMPS recognizes it as a single keyword.
 More on valid Lepton expressions below.  The :math:`r_0` it the
 "equilibrium distance".  The potential energy function in the Lepton
 expression is shifted in such a way, that the potential energy is 0 for
-a bond length *r* of :math:`r_0`.
+a bond length :math:`r_i == r_0`.
 
 ----------
 

From 7b3866d04cab1d0b1d20572e078b246f03c25b92 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Sat, 24 Dec 2022 00:08:03 -0500
Subject: [PATCH 36/79] move lepton utilities to lepton_utils namespace in
 LEPTON package

---
 lib/lepton/Common.mk                          |   3 +-
 lib/lepton/include/LMP_Lepton.h               |   8 --
 src/.gitignore                                |   6 +
 src/LEPTON/angle_lepton.cpp                   |   3 +-
 src/LEPTON/bond_lepton.cpp                    |   4 +-
 src/LEPTON/lepton_utils.cpp                   | 110 ++++++++++++++++++
 .../Utils.cpp => src/LEPTON/lepton_utils.h    |  28 ++---
 src/LEPTON/pair_lepton.cpp                    |   4 +-
 src/OPENMP/angle_lepton_omp.h                 |   4 -
 src/OPENMP/bond_lepton_omp.h                  |   4 -
 src/OPENMP/pair_lepton_omp.h                  |   4 -
 11 files changed, 138 insertions(+), 40 deletions(-)
 create mode 100644 src/LEPTON/lepton_utils.cpp
 rename lib/lepton/src/Utils.cpp => src/LEPTON/lepton_utils.h (56%)

diff --git a/lib/lepton/Common.mk b/lib/lepton/Common.mk
index a43e425e46..40fecc9368 100644
--- a/lib/lepton/Common.mk
+++ b/lib/lepton/Common.mk
@@ -8,8 +8,7 @@ SRC= \
     src/ExpressionTreeNode.cpp \
     src/Operation.cpp \
     src/ParsedExpression.cpp \
-    src/Parser.cpp \
-    src/Utils.cpp
+    src/Parser.cpp
 OBJ=$(SRC:src/%.cpp=build/lepton.%.o)
 
 JITARM= \
diff --git a/lib/lepton/include/LMP_Lepton.h b/lib/lepton/include/LMP_Lepton.h
index d277bd2761..73b6b6fa38 100644
--- a/lib/lepton/include/LMP_Lepton.h
+++ b/lib/lepton/include/LMP_Lepton.h
@@ -32,8 +32,6 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
  * -------------------------------------------------------------------------- */
 
-#include <string>
-
 #include "lepton/CompiledExpression.h"
 #include "lepton/CustomFunction.h"
 #include "lepton/ExpressionProgram.h"
@@ -42,10 +40,4 @@
 #include "lepton/ParsedExpression.h"
 #include "lepton/Parser.h"
 
-// utility functions
-namespace LMP_Lepton
-{
-  /// remove whitespace and quotes from expression string
-  std::string condense(const std::string &);
-}
 #endif /*LMP_LEPTON_H_*/
diff --git a/src/.gitignore b/src/.gitignore
index 8662671ce3..7dd4ad0f3c 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -56,6 +56,12 @@
 
 /pair_lepton.cpp
 /pair_lepton.h
+/bond_lepton.cpp
+/bond_lepton.h
+/angle_lepton.cpp
+/angle_lepton.h
+/lepton_utils.cpp
+/lepton_utils.h
 
 /pair_pace.cpp
 /pair_pace.h
diff --git a/src/LEPTON/angle_lepton.cpp b/src/LEPTON/angle_lepton.cpp
index 39294d16a0..5ea2425941 100644
--- a/src/LEPTON/angle_lepton.cpp
+++ b/src/LEPTON/angle_lepton.cpp
@@ -29,6 +29,7 @@
 #include <cmath>
 
 #include "LMP_Lepton.h"
+#include "lepton_utils.h"
 
 using namespace LAMMPS_NS;
 using MathConst::DEG2RAD;
@@ -219,7 +220,7 @@ void AngleLepton::coeff(int narg, char **arg)
 
   // remove whitespace and quotes from expression string and then
   // check if the expression can be parsed and evaluated without error
-  std::string exp_one = LMP_Lepton::condense(arg[2]);
+  std::string exp_one = LeptonUtils::condense(arg[2]);
   double offset_one = 0.0;
   try {
     auto parsed = LMP_Lepton::Parser::parse(exp_one);
diff --git a/src/LEPTON/bond_lepton.cpp b/src/LEPTON/bond_lepton.cpp
index ae509a056b..3c6e45eed4 100644
--- a/src/LEPTON/bond_lepton.cpp
+++ b/src/LEPTON/bond_lepton.cpp
@@ -27,7 +27,7 @@
 #include <cmath>
 
 #include "LMP_Lepton.h"
-
+#include "lepton_utils.h"
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
@@ -171,7 +171,7 @@ void BondLepton::coeff(int narg, char **arg)
 
   // remove whitespace and quotes from expression string and then
   // check if the expression can be parsed and evaluated without error
-  std::string exp_one = LMP_Lepton::condense(arg[2]);
+  std::string exp_one = LeptonUtils::condense(arg[2]);
   double offset_one = 0.0;
   try {
     auto parsed = LMP_Lepton::Parser::parse(exp_one);
diff --git a/src/LEPTON/lepton_utils.cpp b/src/LEPTON/lepton_utils.cpp
new file mode 100644
index 0000000000..d0cfa576cb
--- /dev/null
+++ b/src/LEPTON/lepton_utils.cpp
@@ -0,0 +1,110 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lepton_utils.h"
+
+#include "error.h"
+#include "input.h"
+#include "lammps.h"
+#include "variable.h"
+
+#include "fmt/args.h"
+
+#include <cctype>
+#include <exception>
+#include <unordered_set>
+
+namespace LeptonUtils {
+class VariableException : public std::exception {
+  std::string message;
+
+ public:
+  // remove unused default constructor
+  VariableException() = delete;
+
+  explicit VariableException(const std::string &var, const std::string &expr)
+  {
+    message = fmt::format("Variable {} in expression {} does not exist", var, expr);
+  }
+  const char *what() const noexcept override { return message.c_str(); }
+};
+}    // namespace LeptonUtils
+
+/// remove whitespace and quotes from expression string
+std::string LeptonUtils::condense(const std::string &in)
+{
+  std::string out;
+  for (const auto &c : in)
+    if (!isspace(c) && (c != '"') && (c != '\'')) out.push_back(c);
+  return out;
+}
+
+/// substitute variable references with their values
+std::string LeptonUtils::substitute(const std::string &in, LAMMPS_NS::LAMMPS *lmp)
+{
+  std::string format, name;
+  std::unordered_set<std::string> vars;
+  bool in_var = false;
+  char hold = ' ';
+
+  for (const auto &c : in) {
+    if (in_var) {
+      if (isalnum(c) || (c == '_')) {
+        format.push_back(c);
+        name.push_back(c);
+      } else {
+        in_var = false;
+        format.push_back('}');
+        format.push_back(c);
+        vars.insert(name);
+      }
+    } else {
+      if (hold == 'v') {
+        if (c == '_') {
+          in_var = true;
+          hold = ' ';
+          name.clear();
+          format.push_back('{');
+        } else {
+          format.push_back(hold);
+          hold = ' ';
+          format.push_back(c);
+        }
+      } else {
+        if (c == 'v')
+          hold = c;
+        else
+          format.push_back(c);
+      }
+    }
+  }
+  if (in_var) {
+    format.push_back('}');
+    vars.insert(name);
+  }
+
+  auto variable = lmp->input->variable;
+  fmt::dynamic_format_arg_store<fmt::format_context> args;
+  for (const auto &v : vars) {
+    const char *val = variable->retrieve(v.c_str());
+    if (val)
+      args.push_back(fmt::arg(v.c_str(), val));
+    else
+      throw VariableException(v, in);
+  }
+  return fmt::vformat(format, args);
+}
diff --git a/lib/lepton/src/Utils.cpp b/src/LEPTON/lepton_utils.h
similarity index 56%
rename from lib/lepton/src/Utils.cpp
rename to src/LEPTON/lepton_utils.h
index 839da6cda2..89512e7552 100644
--- a/lib/lepton/src/Utils.cpp
+++ b/src/LEPTON/lepton_utils.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    https://www.lammps.org/, Sandia National Laboratories
    LAMMPS development team: developers@lammps.org
@@ -11,21 +11,21 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-/* ----------------------------------------------------------------------
-   Contributing author: Axel Kohlmeyer (Temple U)
-------------------------------------------------------------------------- */
+#include <string>
 
-#include "LMP_Lepton.h"
+// forward declarations
 
-#include <cctype>
-
-/// remove whitespace and quotes from expression string
-std::string LMP_Lepton::condense(const std::string & in)
-{
-  std::string out;
-  for (const auto &c : in)
-    if (!isspace(c) && (c != '"') && (c != '\'')) out.push_back(c);
-  return out;
+namespace LAMMPS_NS {
+class LAMMPS;
 }
 
+// utility functions and classes
 
+namespace LeptonUtils {
+
+/// remove whitespace and quotes from expression string
+std::string condense(const std::string &);
+/// substitute LAMMPS variable references with their value
+std::string substitute(const std::string &, LAMMPS_NS::LAMMPS *);
+
+}    // namespace LeptonUtils
diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index c84a910876..641e8136e3 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -28,6 +28,8 @@
 #include <cmath>
 
 #include "LMP_Lepton.h"
+#include "lepton_utils.h"
+#include <cmath>
 
 using namespace LAMMPS_NS;
 
@@ -210,7 +212,7 @@ void PairLepton::coeff(int narg, char **arg)
 
   // remove whitespace and quotes from expression string and then
   // check if the expression can be parsed and evaluated without error
-  std::string exp_one = LMP_Lepton::condense(arg[2]);
+  auto exp_one = LeptonUtils::condense(arg[2]);
   try {
     auto parsed = LMP_Lepton::Parser::parse(exp_one);
     auto pairpot = parsed.createCompiledExpression();
diff --git a/src/OPENMP/angle_lepton_omp.h b/src/OPENMP/angle_lepton_omp.h
index ef5988aaa2..e2f269c808 100644
--- a/src/OPENMP/angle_lepton_omp.h
+++ b/src/OPENMP/angle_lepton_omp.h
@@ -11,10 +11,6 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-/* ----------------------------------------------------------------------
-   Contributing author: Axel Kohlmeyer (Temple U)
-------------------------------------------------------------------------- */
-
 #ifdef ANGLE_CLASS
 // clang-format off
 AngleStyle(lepton/omp,AngleLeptonOMP);
diff --git a/src/OPENMP/bond_lepton_omp.h b/src/OPENMP/bond_lepton_omp.h
index bdcc36434e..7b755e5e9d 100644
--- a/src/OPENMP/bond_lepton_omp.h
+++ b/src/OPENMP/bond_lepton_omp.h
@@ -11,10 +11,6 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-/* ----------------------------------------------------------------------
-   Contributing author: Axel Kohlmeyer (Temple U)
-------------------------------------------------------------------------- */
-
 #ifdef BOND_CLASS
 // clang-format off
 BondStyle(lepton/omp,BondLeptonOMP);
diff --git a/src/OPENMP/pair_lepton_omp.h b/src/OPENMP/pair_lepton_omp.h
index 7d658dba1c..706e463bda 100644
--- a/src/OPENMP/pair_lepton_omp.h
+++ b/src/OPENMP/pair_lepton_omp.h
@@ -11,10 +11,6 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-/* ----------------------------------------------------------------------
-   Contributing author: Axel Kohlmeyer (Temple U)
-------------------------------------------------------------------------- */
-
 #ifdef PAIR_CLASS
 // clang-format off
 PairStyle(lepton/omp,PairLeptonOMP);

From 30a6a8a54e5049edcc09f2aa0d1f469a7ccd2fb8 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Sat, 24 Dec 2022 00:35:22 -0500
Subject: [PATCH 37/79] add support for substituting LAMMPS variables in Lepton
 expressions

---
 src/LEPTON/angle_lepton.cpp     | 21 ++++++-------
 src/LEPTON/bond_lepton.cpp      | 33 +++++++++++----------
 src/LEPTON/pair_lepton.cpp      | 52 +++++++++++++++++----------------
 src/OPENMP/angle_lepton_omp.cpp | 23 ++++++++-------
 src/OPENMP/bond_lepton_omp.cpp  | 25 +++++++++-------
 src/OPENMP/pair_lepton_omp.cpp  | 19 +++++++-----
 6 files changed, 91 insertions(+), 82 deletions(-)

diff --git a/src/LEPTON/angle_lepton.cpp b/src/LEPTON/angle_lepton.cpp
index 5ea2425941..8893b422f1 100644
--- a/src/LEPTON/angle_lepton.cpp
+++ b/src/LEPTON/angle_lepton.cpp
@@ -91,7 +91,7 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void AngleLepton::eval()
   std::vector<LMP_Lepton::CompiledExpression> angleforce;
   std::vector<LMP_Lepton::CompiledExpression> anglepot;
   for (const auto &expr : expressions) {
-    auto parsed = LMP_Lepton::Parser::parse(expr);
+    auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
     angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression());
     if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression());
   }
@@ -142,8 +142,7 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void AngleLepton::eval()
 
     const double dtheta = acos(c) - theta0[type];
     const int idx = type2expression[type];
-    double &theta_for = angleforce[idx].getVariableReference("theta");
-    theta_for = dtheta;
+    angleforce[idx].getVariableReference("theta") = dtheta;
 
     const double a = -angleforce[idx].evaluate() * s;
     const double a11 = a * c / rsq1;
@@ -180,8 +179,7 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void AngleLepton::eval()
 
     double eangle = 0.0;
     if (EFLAG) {
-      double &theta_pot = anglepot[idx].getVariableReference("theta");
-      theta_pot = dtheta;
+      anglepot[idx].getVariableReference("theta") = dtheta;
       eangle = anglepot[idx].evaluate() - offset[type];
     }
     if (EVFLAG)
@@ -223,12 +221,11 @@ void AngleLepton::coeff(int narg, char **arg)
   std::string exp_one = LeptonUtils::condense(arg[2]);
   double offset_one = 0.0;
   try {
-    auto parsed = LMP_Lepton::Parser::parse(exp_one);
+    auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
     auto anglepot = parsed.createCompiledExpression();
     auto angleforce = parsed.differentiate("theta").createCompiledExpression();
-    double &theta_pot = anglepot.getVariableReference("theta");
-    double &theta_for = angleforce.getVariableReference("theta");
-    theta_for = theta_pot = 0.0;
+    anglepot.getVariableReference("theta") = 0.0;
+    angleforce.getVariableReference("theta") = 0.0;
     offset_one = anglepot.evaluate();
     angleforce.evaluate();
   } catch (std::exception &e) {
@@ -363,10 +360,10 @@ double AngleLepton::single(int type, int i1, int i2, int i3)
   if (c < -1.0) c = -1.0;
 
   double dtheta = acos(c) - theta0[type];
-  auto parsed = LMP_Lepton::Parser::parse(expressions[type2expression[type]]);
+  auto expr = expressions[type2expression[type]];
+  auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
   auto anglepot = parsed.createCompiledExpression();
-  double &theta_pot = anglepot.getVariableReference("theta");
-  theta_pot = dtheta;
+  anglepot.getVariableReference("theta") = dtheta;
   return anglepot.evaluate() - offset[type];
 }
 
diff --git a/src/LEPTON/bond_lepton.cpp b/src/LEPTON/bond_lepton.cpp
index 3c6e45eed4..7ac999c5b2 100644
--- a/src/LEPTON/bond_lepton.cpp
+++ b/src/LEPTON/bond_lepton.cpp
@@ -82,10 +82,14 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void BondLepton::eval()
 {
   std::vector<LMP_Lepton::CompiledExpression> bondforce;
   std::vector<LMP_Lepton::CompiledExpression> bondpot;
-  for (const auto &expr : expressions) {
-    auto parsed = LMP_Lepton::Parser::parse(expr);
-    bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
-    if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression());
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+      bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
   }
 
   const double *const *const x = atom->x;
@@ -112,8 +116,7 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void BondLepton::eval()
 
     double fbond = 0.0;
     if (r > 0.0) {
-      double &r_for = bondforce[idx].getVariableReference("r");
-      r_for = dr;
+      bondforce[idx].getVariableReference("r") = dr;
       fbond = -bondforce[idx].evaluate() / r;
     }
 
@@ -133,8 +136,7 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void BondLepton::eval()
 
     double ebond = 0.0;
     if (EFLAG) {
-      double &r_pot = bondpot[idx].getVariableReference("r");
-      r_pot = dr;
+      bondpot[idx].getVariableReference("r") = dr;
       ebond = bondpot[idx].evaluate() - offset[type];
     }
     if (EVFLAG) ev_tally(i1, i2, nlocal, NEWTON_BOND, ebond, fbond, delx, dely, delz);
@@ -174,12 +176,11 @@ void BondLepton::coeff(int narg, char **arg)
   std::string exp_one = LeptonUtils::condense(arg[2]);
   double offset_one = 0.0;
   try {
-    auto parsed = LMP_Lepton::Parser::parse(exp_one);
+    auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
     auto bondpot = parsed.createCompiledExpression();
     auto bondforce = parsed.differentiate("r").createCompiledExpression();
-    double &r_pot = bondpot.getVariableReference("r");
-    double &r_for = bondforce.getVariableReference("r");
-    r_for = r_pot = 0.0;
+    bondpot.getVariableReference("r") = 0.0;
+    bondforce.getVariableReference("r") = 0.0;
     offset_one = bondpot.evaluate();
     bondforce.evaluate();
   } catch (std::exception &e) {
@@ -297,12 +298,12 @@ double BondLepton::single(int type, double rsq, int /*i*/, int /*j*/, double &ff
   const double r = sqrt(rsq);
   const double dr = r - r0[type];
 
-  auto parsed = LMP_Lepton::Parser::parse(expressions[type2expression[type]]);
+  auto expr = expressions[type2expression[type]];
+  auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
   auto bondpot = parsed.createCompiledExpression();
   auto bondforce = parsed.differentiate("r").createCompiledExpression();
-  double &r_for = bondforce.getVariableReference("r");
-  double &r_pot = bondpot.getVariableReference("r");
-  r_for = r_pot = dr;
+  bondforce.getVariableReference("r") = dr;
+  bondpot.getVariableReference("r") = dr;
 
   // force and energy
 
diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index 641e8136e3..bc4e97f2b6 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -25,8 +25,6 @@
 #include "neigh_list.h"
 #include "update.h"
 
-#include <cmath>
-
 #include "LMP_Lepton.h"
 #include "lepton_utils.h"
 #include <cmath>
@@ -103,10 +101,14 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
 
   std::vector<LMP_Lepton::CompiledExpression> pairforce;
   std::vector<LMP_Lepton::CompiledExpression> pairpot;
-  for (const auto &expr : expressions) {
-    auto parsed = LMP_Lepton::Parser::parse(expr);
-    pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
-    if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+      pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
   }
 
   // loop over neighbors of my atoms
@@ -150,8 +152,7 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
 
         double evdwl = 0.0;
         if (EFLAG) {
-          double &r_pot = pairpot[idx].getVariableReference("r");
-          r_pot = r;
+          pairpot[idx].getVariableReference("r") = r;
           evdwl = pairpot[idx].evaluate() - offset[itype][jtype];
           evdwl *= factor_lj;
         }
@@ -214,12 +215,11 @@ void PairLepton::coeff(int narg, char **arg)
   // check if the expression can be parsed and evaluated without error
   auto exp_one = LeptonUtils::condense(arg[2]);
   try {
-    auto parsed = LMP_Lepton::Parser::parse(exp_one);
-    auto pairpot = parsed.createCompiledExpression();
+    auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
     auto pairforce = parsed.differentiate("r").createCompiledExpression();
-    double &r_pot = pairpot.getVariableReference("r");
-    double &r_for = pairforce.getVariableReference("r");
-    r_for = r_pot = 1.0;
+    auto pairpot = parsed.createCompiledExpression();
+    pairpot.getVariableReference("r") = 1.0;
+    pairforce.getVariableReference("r") = 1.0;
     pairpot.evaluate();
     pairforce.evaluate();
   } catch (std::exception &e) {
@@ -254,14 +254,16 @@ double PairLepton::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
 
+  offset[i][j] = 0.0;
   if (offset_flag) {
-    auto parsed = LMP_Lepton::Parser::parse(expressions[type2expression[i][j]]);
-    auto pairpot = parsed.createCompiledExpression();
-    double &r_pot = pairpot.getVariableReference("r");
-    r_pot = cut[i][j];
-    offset[i][j] = pairpot.evaluate();
-  } else
-    offset[i][j] = 0.0;
+    try {
+      auto expr = LeptonUtils::substitute(expressions[type2expression[i][j]], lmp);
+      auto pairpot = LMP_Lepton::Parser::parse(expr).createCompiledExpression();
+      pairpot.getVariableReference("r") = cut[i][j];
+      offset[i][j] = pairpot.evaluate();
+    } catch (std::exception &) {
+    }
+  }
 
   cut[j][i] = cut[i][j];
   type2expression[j][i] = type2expression[i][j];
@@ -399,15 +401,15 @@ void PairLepton::write_data_all(FILE *fp)
 double PairLepton::single(int /* i */, int /* j */, int itype, int jtype, double rsq,
                           double /* factor_coul */, double factor_lj, double &fforce)
 {
-  auto parsed = LMP_Lepton::Parser::parse(expressions[type2expression[itype][jtype]]);
+  auto expr = expressions[type2expression[itype][jtype]];
+  auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
   auto pairpot = parsed.createCompiledExpression();
   auto pairforce = parsed.differentiate("r").createCompiledExpression();
 
-  double r = sqrt(rsq);
-  double &r_pot = pairpot.getVariableReference("r");
-  double &r_for = pairforce.getVariableReference("r");
+  const double r = sqrt(rsq);
+  pairpot.getVariableReference("r") = r;
+  pairforce.getVariableReference("r") = r;
 
-  r_pot = r_for = r;
   fforce = -pairforce.evaluate() / r * factor_lj;
   return (pairpot.evaluate() - offset[itype][jtype]) * factor_lj;
 }
diff --git a/src/OPENMP/angle_lepton_omp.cpp b/src/OPENMP/angle_lepton_omp.cpp
index 84ca5052d0..f9a6c6790b 100644
--- a/src/OPENMP/angle_lepton_omp.cpp
+++ b/src/OPENMP/angle_lepton_omp.cpp
@@ -20,12 +20,13 @@
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
-#include "omp_compat.h"
+#include "suffix.h"
 
 #include <cmath>
 
 #include "LMP_Lepton.h"
-#include "suffix.h"
+#include "lepton_utils.h"
+#include "omp_compat.h"
 using namespace LAMMPS_NS;
 
 static constexpr double SMALL = 0.001;
@@ -90,10 +91,14 @@ void AngleLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 {
   std::vector<LMP_Lepton::CompiledExpression> angleforce;
   std::vector<LMP_Lepton::CompiledExpression> anglepot;
-  for (const auto &expr : expressions) {
-    auto parsed = LMP_Lepton::Parser::parse(expr);
-    angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression());
-    if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression());
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression());
+      if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
   }
 
   const auto *_noalias const x = (dbl3_t *) atom->x[0];
@@ -141,8 +146,7 @@ void AngleLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 
     const double dtheta = acos(c) - theta0[type];
     const int idx = type2expression[type];
-    double &theta_for = angleforce[idx].getVariableReference("theta");
-    theta_for = dtheta;
+    angleforce[idx].getVariableReference("theta") = dtheta;
 
     const double a = -angleforce[idx].evaluate() * s;
     const double a11 = a * c / rsq1;
@@ -179,8 +183,7 @@ void AngleLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 
     double eangle = 0.0;
     if (EFLAG) {
-      double &theta_pot = anglepot[idx].getVariableReference("theta");
-      theta_pot = dtheta;
+      anglepot[idx].getVariableReference("theta") = dtheta;
       eangle = anglepot[idx].evaluate() - offset[type];
     }
     if (EVFLAG)
diff --git a/src/OPENMP/bond_lepton_omp.cpp b/src/OPENMP/bond_lepton_omp.cpp
index 560256076f..4b578ee73c 100644
--- a/src/OPENMP/bond_lepton_omp.cpp
+++ b/src/OPENMP/bond_lepton_omp.cpp
@@ -20,17 +20,18 @@
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
-#include "omp_compat.h"
+#include "suffix.h"
 
 #include <cmath>
 
 #include "LMP_Lepton.h"
-#include "suffix.h"
+#include "lepton_utils.h"
+#include "omp_compat.h"
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-BondLeptonOMP::BondLeptonOMP(class LAMMPS *lmp) : BondLepton(lmp), ThrOMP(lmp, THR_BOND)
+BondLeptonOMP::BondLeptonOMP(class LAMMPS *_lmp) : BondLepton(_lmp), ThrOMP(_lmp, THR_BOND)
 {
   suffix_flag |= Suffix::OMP;
 }
@@ -88,10 +89,14 @@ void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 {
   std::vector<LMP_Lepton::CompiledExpression> bondforce;
   std::vector<LMP_Lepton::CompiledExpression> bondpot;
-  for (const auto &expr : expressions) {
-    auto parsed = LMP_Lepton::Parser::parse(expr);
-    bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
-    if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression());
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
   }
 
   const auto *_noalias const x = (dbl3_t *) atom->x[0];
@@ -117,8 +122,7 @@ void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 
     double fbond = 0.0;
     if (r > 0.0) {
-      double &r_for = bondforce[idx].getVariableReference("r");
-      r_for = dr;
+      bondforce[idx].getVariableReference("r") = dr;
       fbond = -bondforce[idx].evaluate() / r;
     }
 
@@ -138,8 +142,7 @@ void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 
     double ebond = 0.0;
     if (EFLAG) {
-      double &r_pot = bondpot[idx].getVariableReference("r");
-      r_pot = dr;
+      bondpot[idx].getVariableReference("r") = dr;
       ebond = bondpot[idx].evaluate() - offset[type];
     }
     if (EVFLAG)
diff --git a/src/OPENMP/pair_lepton_omp.cpp b/src/OPENMP/pair_lepton_omp.cpp
index b3abfdd34b..ad10163747 100644
--- a/src/OPENMP/pair_lepton_omp.cpp
+++ b/src/OPENMP/pair_lepton_omp.cpp
@@ -23,6 +23,7 @@
 #include <cmath>
 
 #include "LMP_Lepton.h"
+#include "lepton_utils.h"
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
 
@@ -95,10 +96,14 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr)
 
   std::vector<LMP_Lepton::CompiledExpression> pairforce;
   std::vector<LMP_Lepton::CompiledExpression> pairpot;
-  for (const auto &expr : expressions) {
-    auto parsed = LMP_Lepton::Parser::parse(expr);
-    pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
-    if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
   }
 
   // loop over neighbors of my atoms
@@ -127,8 +132,7 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr)
       if (rsq < cutsq[itype][jtype]) {
         const double r = sqrt(rsq);
         const int idx = type2expression[itype][jtype];
-        double &r_for = pairforce[idx].getVariableReference("r");
-        r_for = r;
+        pairforce[idx].getVariableReference("r") = r;
         const double fpair = -pairforce[idx].evaluate() / r * factor_lj;
 
         fxtmp += delx * fpair;
@@ -142,8 +146,7 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr)
 
         double evdwl = 0.0;
         if (EFLAG) {
-          double &r_pot = pairpot[idx].getVariableReference("r");
-          r_pot = r;
+          pairpot[idx].getVariableReference("r") = r;
           evdwl = pairpot[idx].evaluate() - offset[itype][jtype];
           evdwl *= factor_lj;
         }

From be01ec2e07b09baabc41fb341d65f2fec039e668 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Sat, 24 Dec 2022 00:55:57 -0500
Subject: [PATCH 38/79] document variable substitution

---
 doc/src/lepton_expression.rst                 | 23 ++++++++++++++++---
 unittest/force-styles/tests/angle-lepton.yaml |  3 +--
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/doc/src/lepton_expression.rst b/doc/src/lepton_expression.rst
index c286e82359..618c5239f8 100644
--- a/doc/src/lepton_expression.rst
+++ b/doc/src/lepton_expression.rst
@@ -72,8 +72,25 @@ The following mathematical functions are available:
    * step(x)
    * step(x) is 0 for `x < 0`, otherwise 1
 
-Numbers may be given in either decimal or exponential form.  All of the following are valid
-numbers: `5`, `-3.1`, `1e6`, and `3.12e-2`.
+Numbers may be given in either decimal or exponential form.  All of the
+following are valid numbers: `5`, `-3.1`, `1e6`, and `3.12e-2`.
+
+As an extension to the standard Lepton syntax, it is also possible to
+use LAMMPS :doc:`variables <variable>` in the format "v_name".  Before
+evaluating the expression, "v_name" will be replaced with the value of
+the variable "name".  This is compatible with all kinds of scalar
+variables, but not with vectors, arrays, local, or per-atom
+variables.  If necessary, a custom scalar variable needs to be defined
+that can access the desired (single) item from a non-scalar variable.
+As an example, the following lines will instruct LAMMPS to ramp
+the force constant for a harmonic bond from 100.0 to 200.0 during the
+next run;
+
+.. code-block:: LAMMPS
+
+   variable fconst equal ramp(100.0, 200)
+   bond_style lepton
+   bond_coeff 1 1.5 "v_fconst * (r^2)"
 
 An expression may be followed by definitions for intermediate values that appear in the
 expression. A semicolon ";" is used as a delimiter between value definitions. For example,
@@ -96,5 +113,5 @@ definition.  For efficiency reasons, the expression string is parsed,
 optimized, and then stored in an internal, pre-parsed representation for
 evaluation.
 
-Evaluating Lepton expressions is typically between 2 and 4 times
+Evaluating Lepton expressions is typically between 2.5 and 5 times
 slower than the corresponding compiled and optimized C++ code.
diff --git a/unittest/force-styles/tests/angle-lepton.yaml b/unittest/force-styles/tests/angle-lepton.yaml
index 3b02f8c3a6..ea108cfdb1 100644
--- a/unittest/force-styles/tests/angle-lepton.yaml
+++ b/unittest/force-styles/tests/angle-lepton.yaml
@@ -6,8 +6,7 @@ skip_tests:
 prerequisites: ! |
   atom full
   angle lepton
-pre_commands: ! |
-  echo screen
+pre_commands: ! ""
 post_commands: ! ""
 input_file: in.fourmol
 angle_style: lepton

From 24e5fafd7f52d920fda2f763df6fd07ecfc205fa Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Sat, 24 Dec 2022 15:31:57 -0500
Subject: [PATCH 39/79] more documentation tweaks and corrections. make
 consistent across package styles

---
 doc/src/angle_lepton.rst      | 40 ++++++++++++++++++-----------------
 doc/src/bond_lepton.rst       | 34 +++++++++++++++--------------
 doc/src/lepton_expression.rst | 11 +++++++---
 doc/src/pair_lepton.rst       | 38 +++++++++++++++++----------------
 4 files changed, 67 insertions(+), 56 deletions(-)

diff --git a/doc/src/angle_lepton.rst b/doc/src/angle_lepton.rst
index d65f3b6d73..7365dd9ed9 100644
--- a/doc/src/angle_lepton.rst
+++ b/doc/src/angle_lepton.rst
@@ -20,30 +20,31 @@ Examples
 
    angle_style lepton
 
-   angle_coeff  1 120.0  "k*theta^2; k=250.0"
-   angle_coeff  2  90.0  "k2*theta^2 + k3*theta^3 + k4*theta^4; k2=300.0; k3=-100.0; k4=50.0"
-   angle_coeff  3 109.45 "k*theta^2; k=350.0"
+   angle_coeff  1  120.0  "k*theta^2; k=250.0"
+   angle_coeff  2   90.0  "k2*theta^2 + k3*theta^3 + k4*theta^4; k2=300.0; k3=-100.0; k4=50.0"
+   angle_coeff  3  109.47 "k*theta^2; k=350.0"
 
 Description
 """""""""""
 
 .. versionadded:: TBD
 
-Angle style *lepton* computes angular interactions between three atoms
-based on evaluating strings.  The potential function must be provided as an
-expression string using "theta" as the angle variable relative to the
-reference angle :math:`\theta_0` which is provided as an angle coefficient.
-For example `"200.0*(theta)^2"` represents a harmonic potential with a
-force constant *K* of 200.0 energy units:
+Angle style *lepton* computes angular interactions between three atoms,
+between which an angle has been defined, based on evaluating strings.
+The potential function must be provided as an expression string using
+"theta" as the angle variable relative to the reference angle
+:math:`\theta_0` which is provided as an angle coefficient.  For example
+`"200.0*theta^2"` represents a harmonic potential with a force constant
+*K* of 200.0 energy units:
 
 .. math::
 
-   U_{angle,i} = K (\theta_i - \theta_0)^2 = K \theta^2 \qquad theta = \theta_i - \theta_0
+   U_{angle,i} = K (\theta_i - \theta_0)^2 = K \theta^2 \qquad \theta = \theta_i - \theta_0
 
 The `Lepton library <https://simtk.org/projects/lepton>`_, that the
 *lepton* angle style interfaces with, evaluates this expression string at
 run time to compute the pairwise energy.  It also creates an analytical
-representation of the differentiation of this expression with respect to
+representation of the first derivative of this expression with respect to
 "r" and then uses that to compute the force between the pairs atoms forming
 angles as defined by the topology data.
 
@@ -59,10 +60,10 @@ The Lepton expression must be either enclosed in quotes or must not
 contain any whitespace so that LAMMPS recognizes it as a single keyword.
 More on valid Lepton expressions below.  The :math:`\theta_0`
 coefficient is the "equilibrium angle".  It is entered in degrees, but
-internally converted to radians.  Thus the expression must assume "theta"
-is in radians.  The potential energy function in the Lepton expression
-is shifted in such a way, that the potential energy is 0 for a angle
-:math:`\theta_i == \theta_0`.
+internally converted to radians.  Thus the expression must assume
+"theta" is in radians.  The potential energy function in the Lepton
+expression is shifted in such a way, that the potential energy is 0 for
+a angle :math:`\theta_i == \theta_0`.
 
 ----------
 
@@ -77,14 +78,15 @@ is shifted in such a way, that the potential energy is 0 for a angle
 Restrictions
 """"""""""""
 
-This angle style can only be used if LAMMPS was built with the LEPTON
-package.  See the :doc:`Build package <Build_package>` page for more
-info.
+This angle style is part of the LEPTON package and only enabled if LAMMPS
+was built with this package.  See the :doc:`Build package
+<Build_package>` page for more info.
 
 Related commands
 """"""""""""""""
 
-:doc:`angle_coeff <angle_coeff>`
+:doc:`angle_coeff <angle_coeff>`, :doc:`angle_style table <angle_table>`,
+:doc:`bond_style lepton <bond_lepton>`
 
 Default
 """""""
diff --git a/doc/src/bond_lepton.rst b/doc/src/bond_lepton.rst
index faa4101f47..bcd10e8033 100644
--- a/doc/src/bond_lepton.rst
+++ b/doc/src/bond_lepton.rst
@@ -20,21 +20,22 @@ Examples
 
    bond_style lepton
 
-   1 1.5 "k*r^2; k=250.0"
-   2 1.1 "k2*r^2 + k3*r^3 + k4*r^4; k2=300.0; k3=-100.0; k4=50.0"
-   3 1.3 "k*r^2; k=350.0"
+   bond_coeff  1  1.5 "k*r^2; k=250.0"
+   bond_coeff  2  1.1 "k2*r^2 + k3*r^3 + k4*r^4; k2=300.0; k3=-100.0; k4=50.0"
+   bond_coeff  3  1.3 "k*r^2; k=350.0"
 
 Description
 """""""""""
 
 .. versionadded:: TBD
 
-Bond style *lepton* computes bonded interactions between two atom based
-on evaluating strings.  The potential function must be provided as an
-expression string using "r" as the distance variable relative to the
-reference distance :math:`r_0` which is provided as a bond coefficient.
-For example `"200.0*(r)^2"` represents a harmonic potential with a force
-constant *K* of 200.0 energy units:
+Bond style *lepton* computes bonded interactions between two atoms,
+between which a bond has been defined, based on evaluating strings.  The
+potential function must be provided as an expression string using "r" as
+the distance variable relative to the reference distance :math:`r_0`
+which is provided as a bond coefficient.  For example `"200.0*r^2"`
+represents a harmonic potential with a force constant *K* of 200.0
+energy units:
 
 .. math::
 
@@ -43,12 +44,12 @@ constant *K* of 200.0 energy units:
 The `Lepton library <https://simtk.org/projects/lepton>`_, that the
 *lepton* bond style interfaces with, evaluates this expression string at
 run time to compute the pairwise energy.  It also creates an analytical
-representation of the differentiation of this expression with respect to
+representation of the first derivative of this expression with respect to
 "r" and then uses that to compute the force between the pairs atoms forming
 bonds as defined by the topology data.
 
 The following coefficients must be defined for each bond type via the
-:doc:`bond_coeff <bond_coeff>` command as in the example above, or in
+:doc:`bond_coeff <bond_coeff>` command as in the examples above, or in
 the data file or restart files read by the :doc:`read_data <read_data>`
 or :doc:`read_restart <read_restart>` commands:
 
@@ -57,7 +58,7 @@ or :doc:`read_restart <read_restart>` commands:
 
 The Lepton expression must be either enclosed in quotes or must not
 contain any whitespace so that LAMMPS recognizes it as a single keyword.
-More on valid Lepton expressions below.  The :math:`r_0` it the
+More on valid Lepton expressions below.  The :math:`r_0` is the
 "equilibrium distance".  The potential energy function in the Lepton
 expression is shifted in such a way, that the potential energy is 0 for
 a bond length :math:`r_i == r_0`.
@@ -75,14 +76,15 @@ a bond length :math:`r_i == r_0`.
 Restrictions
 """"""""""""
 
-This bond style can only be used if LAMMPS was built with the LEPTON
-package.  See the :doc:`Build package <Build_package>` page for more
-info.
+This bond style is part of the LEPTON package and only enabled if LAMMPS
+was built with this package.  See the :doc:`Build package
+<Build_package>` page for more info.
 
 Related commands
 """"""""""""""""
 
-:doc:`bond_coeff <bond_coeff>`, :doc:`delete_bonds <delete_bonds>`
+:doc:`bond_coeff <bond_coeff>`, :doc:`bond_style table <bond_table>`,
+:doc:`bond_write <bond_write>`, :doc:`angle_style lepton <angle_lepton>`
 
 Default
 """""""
diff --git a/doc/src/lepton_expression.rst b/doc/src/lepton_expression.rst
index 618c5239f8..9ecebc921e 100644
--- a/doc/src/lepton_expression.rst
+++ b/doc/src/lepton_expression.rst
@@ -84,7 +84,7 @@ variables.  If necessary, a custom scalar variable needs to be defined
 that can access the desired (single) item from a non-scalar variable.
 As an example, the following lines will instruct LAMMPS to ramp
 the force constant for a harmonic bond from 100.0 to 200.0 during the
-next run;
+next run:
 
 .. code-block:: LAMMPS
 
@@ -113,5 +113,10 @@ definition.  For efficiency reasons, the expression string is parsed,
 optimized, and then stored in an internal, pre-parsed representation for
 evaluation.
 
-Evaluating Lepton expressions is typically between 2.5 and 5 times
-slower than the corresponding compiled and optimized C++ code.
+Evaluating a Lepton expression is typically between 2.5 and 5 times
+slower than the corresponding compiled and optimized C++ code.  If
+additional speed or GPU acceleration (via GPU or KOKKOS) is required,
+the interaction can be represented as a table.  Suitable table files
+can be created either internally using the :doc:`pair_write <pair_write>`
+or :doc:`bond_write <bond_write>` command or through the Python scripts
+in the :ref:`tools/tabulate <tabulate>` folder.
diff --git a/doc/src/pair_lepton.rst b/doc/src/pair_lepton.rst
index 570388dc13..5cf11b6f91 100644
--- a/doc/src/pair_lepton.rst
+++ b/doc/src/pair_lepton.rst
@@ -27,9 +27,10 @@ Examples
 .. code-block:: LAMMPS
 
    pair_style lepton 2.5
-   pair_coeff * * "k*((r-r0)^2*step(r0-r)); k=200; r0=1.5" 2.0
-   pair_coeff 1 2 "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=1.0;sig=1.0" 1.12246204830937
-   pair_coeff 2 2 "eps*(2.0*(sig/r)^9 - 3.0*(sig/r)^6);eps=1.0;sig=1.0"
+
+   pair_coeff  * *  "k*((r-r0)^2*step(r0-r)); k=200; r0=1.5" 2.0
+   pair_coeff  1 2  "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=1.0;sig=1.0" 1.12246204830937
+   pair_coeff  2 2  "eps*(2.0*(sig/r)^9 - 3.0*(sig/r)^6);eps=1.0;sig=1.0"
 
 Description
 """""""""""
@@ -37,11 +38,11 @@ Description
 .. versionadded:: TBD
 
 Pair style *lepton* computes spherical pairwise interactions based on
-evaluating strings.  The potential function must be provided as an
-expression string using "r" as the distance variable, for example
-`"200.0*(r-1.5)^2"` represents a harmonic potential with equilibrium
-distance :math:`r_0` of 1.5 distance units and a force constant *K* of
-200.0 energy units:
+evaluating strings between neighboring atoms within the given cutoff.
+The potential function must be provided as an expression string using
+"r" as the distance variable. For example `"200.0*(r-1.5)^2"` represents
+a harmonic potential around the distance :math:`r_0` of 1.5 distance
+units and a force constant *K* of 200.0 energy units:
 
 .. math::
 
@@ -50,7 +51,7 @@ distance :math:`r_0` of 1.5 distance units and a force constant *K* of
 The `Lepton library <https://simtk.org/projects/lepton>`_, that the
 *lepton* pair style interfaces with, evaluates this expression string at
 run time to compute the pairwise energy.  It also creates an
-analytical representation of the differentiation of this expression with
+analytical representation of the first derivative of this expression with
 respect to "r" and then uses that to compute the force between the pairs
 of particles within the given cutoff.
 
@@ -81,18 +82,19 @@ different value than the global cutoff.
 Mixing, shift, table, tail correction, restart, rRESPA info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 
-Pair style *lepton* does **not** support mixing.  Thus, expressions for
-all I,J pairs must be specified explicitly.
+Pair style *lepton* does not support mixing.  Thus, expressions for
+*all* I,J pairs must be specified explicitly.
 
-This pair style supports the :doc:`pair_modify <pair_modify>`
-shift option for the energy of the pair interaction.
+This pair style does supports the :doc:`pair_modify shift <pair_modify>`
+option for shifting the energy of the pair interaction so that it is
+0 at the cutoff.
 
-The :doc:`pair_modify <pair_modify>` table options are not relevant for
+The :doc:`pair_modify table <pair_modify>` options are not relevant for
 the this pair style.
 
-This pair style does not support the :doc:`pair_modify <pair_modify>`
-tail option for adding long-range tail corrections to energy and
-pressure.
+This pair style does not support the :doc:`pair_modify tail
+<pair_modify>` option for adding long-range tail corrections to energy
+and pressure.
 
 This pair style writes its information to :doc:`binary restart files
 <restart>`, so pair_style and pair_coeff commands do not need to be
@@ -107,7 +109,7 @@ This pair style can only be used via the *pair* keyword of the
 Restrictions
 """"""""""""
 
-The *lepton* pair style is part of the LEPTON package and only enabled if
+This pair style is part of the LEPTON package and only enabled if
 LAMMPS was built with this package.  See the :doc:`Build package
 <Build_package>` page for more info.
 

From 5a99cf0dd5adab2f4543ac77010e6d3a749a774f Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Mon, 26 Dec 2022 16:44:49 -0500
Subject: [PATCH 40/79] add dihedral style lepton including /omp variant

---
 doc/src/Commands_bond.rst          |   1 +
 doc/src/dihedral_lepton.rst        |  89 +++++
 src/.gitignore                     |   2 +
 src/LEPTON/dihedral_lepton.cpp     | 515 +++++++++++++++++++++++++++++
 src/LEPTON/dihedral_lepton.h       |  50 +++
 src/OPENMP/dihedral_lepton_omp.cpp | 333 +++++++++++++++++++
 src/OPENMP/dihedral_lepton_omp.h   |  40 +++
 7 files changed, 1030 insertions(+)
 create mode 100644 doc/src/dihedral_lepton.rst
 create mode 100644 src/LEPTON/dihedral_lepton.cpp
 create mode 100644 src/LEPTON/dihedral_lepton.h
 create mode 100644 src/OPENMP/dihedral_lepton_omp.cpp
 create mode 100644 src/OPENMP/dihedral_lepton_omp.h

diff --git a/doc/src/Commands_bond.rst b/doc/src/Commands_bond.rst
index d067dfea7b..cfc896aa0e 100644
--- a/doc/src/Commands_bond.rst
+++ b/doc/src/Commands_bond.rst
@@ -129,6 +129,7 @@ OPT.
    * :doc:`fourier (io) <dihedral_fourier>`
    * :doc:`harmonic (iko) <dihedral_harmonic>`
    * :doc:`helix (o) <dihedral_helix>`
+   * :doc:`lepton (o) <dihedral_lepton>`
    * :doc:`multi/harmonic (o) <dihedral_multi_harmonic>`
    * :doc:`nharmonic (o) <dihedral_nharmonic>`
    * :doc:`opls (iko) <dihedral_opls>`
diff --git a/doc/src/dihedral_lepton.rst b/doc/src/dihedral_lepton.rst
new file mode 100644
index 0000000000..e030c3b7c4
--- /dev/null
+++ b/doc/src/dihedral_lepton.rst
@@ -0,0 +1,89 @@
+.. index:: dihedral_style lepton
+.. index:: dihedral_style lepton/omp
+
+dihedral_style lepton command
+=============================
+
+Accelerator Variants: *lepton/omp*
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   dihedral_style lepton
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   dihedral_style lepton
+
+   dihedral_coeff  1 "k*(1 + d*cos(n*phi)); k=75.0; d=1; n=2"
+   dihedral_coeff  2 "45*(1-cos(4*phi))"
+   dihedral_coeff  2 "k2*cos(phi) + k3*cos(phi)^2; k2=100.0"
+   dihedral_coeff  3 "k*(phi-phi0)^2; k=85.0; phi0=120.0"
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+Dihedral style *lepton* computes dihedral interactions between four
+atoms forming a dihedral angle with a custom potential function.  The
+potential function must be provided as an expression string using "phi"
+as the dihedral angle variable.  For example `"200.0*(phi-120.0)^2"`
+represents a :doc:`quadratic dihedral <dihedral_quadratic>` potential
+around a 120 degree dihedral angle with a force constant *K* of 200.0
+energy units:
+
+.. math::
+
+   U_{dihedral,i} = K (\phi_i - \phi_0)^2
+
+The `Lepton library <https://simtk.org/projects/lepton>`_, that the
+*lepton* dihedral style interfaces with, evaluates this expression
+string at run time to compute the pairwise energy.  It also creates an
+analytical representation of the first derivative of this expression
+with respect to "phi" and then uses that to compute the force between
+the dihedral atoms as defined by the topology data.
+
+The potential function expression for each dihedral type is provided via the
+:doc:`dihedral_coeff <dihedral_coeff>` command as in the example above, or in
+the data file or restart files read by the :doc:`read_data <read_data>`
+or :doc:`read_restart <read_restart>` commands.  The expression is in energy units.
+
+The Lepton expression must be either enclosed in quotes or must not
+contain any whitespace so that LAMMPS recognizes it as a single keyword.
+More on valid Lepton expressions below.  Dihedral angles are internally
+computed in radians and thus the expression must assume "phi" is in
+radians.
+
+----------
+
+.. include:: lepton_expression.rst
+
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
+Restrictions
+""""""""""""
+
+This dihedral style is part of the LEPTON package and only enabled if LAMMPS
+was built with this package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+Related commands
+""""""""""""""""
+
+:doc:`dihedral_coeff <dihedral_coeff>`, :doc:`dihedral_style table <dihedral_table>`,
+:doc:`bond_style lepton <bond_lepton>`, :doc:`angle_style lepton <angle_lepton>`
+
+Default
+"""""""
+
+none
diff --git a/src/.gitignore b/src/.gitignore
index 7dd4ad0f3c..4c55e31a83 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -60,6 +60,8 @@
 /bond_lepton.h
 /angle_lepton.cpp
 /angle_lepton.h
+/dihedral_lepton.cpp
+/dihedral_lepton.h
 /lepton_utils.cpp
 /lepton_utils.h
 
diff --git a/src/LEPTON/dihedral_lepton.cpp b/src/LEPTON/dihedral_lepton.cpp
new file mode 100644
index 0000000000..acd0a4574d
--- /dev/null
+++ b/src/LEPTON/dihedral_lepton.cpp
@@ -0,0 +1,515 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+   Using parts of dihedral_table.cpp by Andrew Jewett (jewett.aij at gmail)
+------------------------------------------------------------------------- */
+
+#include "dihedral_lepton.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "math_const.h"
+#include "math_extra.h"
+#include "memory.h"
+#include "neighbor.h"
+
+#include <cmath>
+
+#include "LMP_Lepton.h"
+#include "lepton_utils.h"
+
+using namespace LAMMPS_NS;
+using MathConst::DEG2RAD;
+using MathConst::MY_2PI;
+using MathConst::RAD2DEG;
+using MathExtra::cross3;
+using MathExtra::dot3;
+using MathExtra::norm3;
+
+static constexpr int g_dim = 3;
+
+/* ---------------------------------------------------------------------- */
+
+DihedralLepton::DihedralLepton(LAMMPS *_lmp) : Dihedral(_lmp), type2expression(nullptr)
+{
+  writedata = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+DihedralLepton::~DihedralLepton()
+{
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(type2expression);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralLepton::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+  ev_init(eflag, vflag);
+  if (evflag) {
+    if (eflag) {
+      if (force->newton_bond)
+        eval<1, 1, 1>();
+      else
+        eval<1, 1, 0>();
+    } else {
+      if (force->newton_bond)
+        eval<1, 0, 1>();
+      else
+        eval<1, 0, 0>();
+    }
+  } else {
+    if (force->newton_bond)
+      eval<0, 0, 1>();
+    else
+      eval<0, 0, 0>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND> void DihedralLepton::eval()
+{
+  std::vector<LMP_Lepton::CompiledExpression> dihedralforce;
+  std::vector<LMP_Lepton::CompiledExpression> dihedralpot;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+      dihedralforce.emplace_back(parsed.differentiate("phi").createCompiledExpression());
+      if (EFLAG) dihedralpot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  const double *const *const x = atom->x;
+  double *const *const f = atom->f;
+  const int *const *const dihedrallist = neighbor->dihedrallist;
+  const int ndihedrallist = neighbor->ndihedrallist;
+  const int nlocal = atom->nlocal;
+
+  // The dihedral angle "phi" is the angle between n123 and n234
+  // the planes defined by atoms i1,i2,i3, and i2,i3,i4.
+  //
+  // Definitions of vectors: vb12, vb23, vb34, perp12on23
+  //                         proj12on23, perp43on32, proj43on32
+  //
+  //  Note: The positions of the 4 atoms are labeled x[i1], x[i2], x[i3], x[i4]
+  //        (which are also vectors)
+  //
+  //             proj12on23                          proj34on23
+  //             --------->                         ----------->
+  //                           .
+  //                          .
+  //                         .
+  //                  x[i2] .                       x[i3]
+  //    .                __@----------vb23-------->@ . . . .           .
+  //   /|\                /|                        \                  |
+  //    |                /                           \                 |
+  //    |               /                             \                |
+  // perp12vs23        /                               \               |
+  //    |             /                                 \          perp34vs23
+  //    |          vb12                                  \             |
+  //    |           /                                   vb34           |
+  //    |          /                                       \           |
+  //    |         /                                         \          |
+  //    |        /                                           \         |
+  //            @                                             \        |
+  //                                                          _\|     \|/
+  //         x[i1]                                              @
+  //
+  //                                                           x[i4]
+  //
+
+  double vb12[g_dim];    // displacement vector from atom i1 towards atom i2
+  //     vb12[d]       = x[i2][d] - x[i1][d]      (for d=0,1,2)
+  double vb23[g_dim];    // displacement vector from atom i2 towards atom i3
+  //     vb23[d]       = x[i3][d] - x[i2][d]      (for d=0,1,2)
+  double vb34[g_dim];    // displacement vector from atom i3 towards atom i4
+  //     vb34[d]       = x[i4][d] - x[i3][d]      (for d=0,1,2)
+
+  //  n123 & n234: These two unit vectors are normal to the planes
+  //               defined by atoms 1,2,3 and 2,3,4.
+  double n123[g_dim];    //n123=vb23 x vb12 / |vb23 x vb12|  ("x" is cross product)
+  double n234[g_dim];    //n234=vb23 x vb34 / |vb23 x vb34|  ("x" is cross product)
+
+  double proj12on23[g_dim];
+  //    proj12on23[d] = (vb23[d]/|vb23|) * dot3(vb12,vb23)/|vb12|*|vb23|
+  double proj34on23[g_dim];
+  //    proj34on23[d] = (vb34[d]/|vb23|) * dot3(vb34,vb23)/|vb34|*|vb23|
+  double perp12on23[g_dim];
+  //    perp12on23[d] = v12[d] - proj12on23[d]
+  double perp34on23[g_dim];
+  //    perp34on23[d] = v34[d] - proj34on23[d]
+
+  double f1[3], f2[3], f3[3], f4[3];
+
+  for (int n = 0; n < ndihedrallist; n++) {
+    const int i1 = dihedrallist[n][0];
+    const int i2 = dihedrallist[n][1];
+    const int i3 = dihedrallist[n][2];
+    const int i4 = dihedrallist[n][3];
+    const int type = dihedrallist[n][4];
+
+    // ------ Step 1: Compute the dihedral angle "phi" ------
+    //
+
+    // get_phi() calculates the dihedral angle.
+    // This function also calculates the vectors:
+    // vb12, vb23, vb34, n123, and n234, which we will need later.
+
+    const double phi = get_phi(x[i1], x[i2], x[i3], x[i4], domain, vb12, vb23, vb34, n123, n234);
+
+    // ------ Step 2: Compute the gradient of phi with atomic position: ------
+    //
+    // Gradient variables:
+    //
+    // dphi_dx1, dphi_dx2, dphi_dx3, dphi_dx4 are the gradients of phi with
+    // respect to the atomic positions of atoms i1, i2, i3, i4, respectively.
+    // As an example, consider dphi_dx1.  The d'th element is:
+    double dphi_dx1[g_dim];    //                 d phi
+    double dphi_dx2[g_dim];    // dphi_dx1[d] = ----------    (partial derivatives)
+    double dphi_dx3[g_dim];    //               d x[i1][d]
+    double dphi_dx4[g_dim];    //where d=0,1,2 corresponds to x,y,z  (if g_dim==3)
+
+    double dot123 = dot3(vb12, vb23);
+    double dot234 = dot3(vb23, vb34);
+    double L23sqr = dot3(vb23, vb23);
+    double L23 = sqrt(L23sqr);    // (central bond length)
+    double inv_L23sqr = 0.0;
+    double inv_L23 = 0.0;
+    if (L23sqr != 0.0) {
+      inv_L23sqr = 1.0 / L23sqr;
+      inv_L23 = 1.0 / L23;
+    }
+    double neg_inv_L23 = -inv_L23;
+    double dot123_over_L23sqr = dot123 * inv_L23sqr;
+    double dot234_over_L23sqr = dot234 * inv_L23sqr;
+
+    for (int d = 0; d < g_dim; ++d) {
+      // See figure above for a visual definitions of these vectors:
+      proj12on23[d] = vb23[d] * dot123_over_L23sqr;
+      proj34on23[d] = vb23[d] * dot234_over_L23sqr;
+      perp12on23[d] = vb12[d] - proj12on23[d];
+      perp34on23[d] = vb34[d] - proj34on23[d];
+    }
+
+    // --- Compute the gradient vectors dphi/dx1 and dphi/dx4: ---
+
+    // These two gradients point in the direction of n123 and n234,
+    // and are scaled by the distances of atoms 1 and 4 from the central axis.
+    // Distance of atom 1 to central axis:
+    double perp12on23_len = sqrt(dot3(perp12on23, perp12on23));
+    // Distance of atom 4 to central axis:
+    double perp34on23_len = sqrt(dot3(perp34on23, perp34on23));
+
+    double inv_perp12on23 = 0.0;
+    if (perp12on23_len != 0.0) inv_perp12on23 = 1.0 / perp12on23_len;
+    double inv_perp34on23 = 0.0;
+    if (perp34on23_len != 0.0) inv_perp34on23 = 1.0 / perp34on23_len;
+
+    for (int d = 0; d < g_dim; ++d) {
+      dphi_dx1[d] = n123[d] * inv_perp12on23;
+      dphi_dx4[d] = n234[d] * inv_perp34on23;
+    }
+
+    // --- Compute the gradient vectors dphi/dx2 and dphi/dx3: ---
+    //
+    // This is more tricky because atoms 2 and 3 are shared by both planes
+    // 123 and 234 (the angle between which defines "phi").  Moving either
+    // one of these atoms effects both the 123 and 234 planes
+    // Both the 123 and 234 planes intersect with the plane perpendicular to the
+    // central bond axis (vb23).  The two lines where these intersections occur
+    // will shift when you move either atom 2 or atom 3.  The angle between
+    // these lines is the dihedral angle, phi.  We can define four quantities:
+    // dphi123_dx2 is the change in "phi" due to the movement of the 123 plane
+    //             ...as a result of moving atom 2.
+    // dphi234_dx2 is the change in "phi" due to the movement of the 234 plane
+    //             ...as a result of moving atom 2.
+    // dphi123_dx3 is the change in "phi" due to the movement of the 123 plane
+    //             ...as a result of moving atom 3.
+    // dphi234_dx3 is the change in "phi" due to the movement of the 234 plane
+    //             ...as a result of moving atom 3.
+
+    double proj12on23_len = dot123 * inv_L23;
+    double proj34on23_len = dot234 * inv_L23;
+    // Interpretation:
+    //The magnitude of "proj12on23_len" is the length of the proj12on23 vector.
+    //The sign is positive if it points in the same direction as the central
+    //bond (vb23).  Otherwise it is negative.  The same goes for "proj34on23".
+    //(In the example figure in the comment above, both variables are positive.)
+
+    // The forumula used in the 8 lines below explained here:
+    //   "supporting_information/doc/gradient_formula_explanation/"
+    double dphi123_dx2_coef = neg_inv_L23 * (L23 + proj12on23_len);
+    double dphi234_dx2_coef = inv_L23 * proj34on23_len;
+
+    double dphi234_dx3_coef = neg_inv_L23 * (L23 + proj34on23_len);
+    double dphi123_dx3_coef = inv_L23 * proj12on23_len;
+
+    for (int d = 0; d < g_dim; ++d) {
+      // Recall that the n123 and n234 plane normal vectors are proportional to
+      // the dphi/dx1 and dphi/dx2 gradients vectors
+      // It turns out we can save slightly more CPU cycles by expressing
+      // dphi/dx2 and dphi/dx3 as linear combinations of dphi/dx1 and dphi/dx2
+      // which we computed already (instead of n123 & n234).
+      dphi_dx2[d] = dphi123_dx2_coef * dphi_dx1[d] + dphi234_dx2_coef * dphi_dx4[d];
+      dphi_dx3[d] = dphi123_dx3_coef * dphi_dx1[d] + dphi234_dx3_coef * dphi_dx4[d];
+    }
+
+    const int idx = type2expression[type];
+    dihedralforce[idx].getVariableReference("phi") = phi;
+    double m_du_dphi = -dihedralforce[idx].evaluate();
+
+    // ----- Step 4: Calculate the force direction in real space -----
+
+    // chain rule:
+    //          d U          d U      d phi
+    // -f  =   -----   =    -----  *  -----
+    //          d x         d phi      d x
+    for (int d = 0; d < g_dim; ++d) {
+      f1[d] = m_du_dphi * dphi_dx1[d];
+      f2[d] = m_du_dphi * dphi_dx2[d];
+      f3[d] = m_du_dphi * dphi_dx3[d];
+      f4[d] = m_du_dphi * dphi_dx4[d];
+    }
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f2[0];
+      f[i2][1] += f2[1];
+      f[i2][2] += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0];
+      f[i4][1] += f4[1];
+      f[i4][2] += f4[2];
+    }
+
+    double edihedral = 0.0;
+    if (EFLAG) {
+      dihedralpot[idx].getVariableReference("phi") = phi;
+      edihedral = dihedralpot[idx].evaluate();
+    }
+    if (EVFLAG)
+      ev_tally(i1, i2, i3, i4, nlocal, NEWTON_BOND, edihedral, f1, f3, f4, -vb12[0], -vb12[1],
+               -vb12[2], vb23[0], vb23[1], vb23[2], vb34[0], vb34[1], vb34[2]);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralLepton::allocate()
+{
+  allocated = 1;
+  const int np1 = atom->ndihedraltypes + 1;
+
+  memory->create(type2expression, np1, "dihedral:type2expression");
+  memory->create(setflag, np1, "dihedral:setflag");
+  for (int i = 1; i < np1; i++) setflag[i] = 0;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more types
+------------------------------------------------------------------------- */
+
+void DihedralLepton::coeff(int narg, char **arg)
+{
+  if (narg != 2) error->all(FLERR, "Incorrect number of args for dihedral coefficients");
+  if (!allocated) allocate();
+
+  int ilo, ihi;
+  utils::bounds(FLERR, arg[0], 1, atom->ndihedraltypes, ilo, ihi, error);
+
+  // remove whitespace and quotes from expression string and then
+  // check if the expression can be parsed and evaluated without error
+  std::string exp_one = LeptonUtils::condense(arg[1]);
+  try {
+    auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
+    auto dihedralpot = parsed.createCompiledExpression();
+    auto dihedralforce = parsed.differentiate("phi").createCompiledExpression();
+    dihedralpot.getVariableReference("phi") = 0.0;
+    dihedralforce.getVariableReference("phi") = 0.0;
+    dihedralforce.evaluate();
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  std::size_t idx = 0;
+  for (const auto &exp : expressions) {
+    if (exp == exp_one) break;
+    ++idx;
+  }
+
+  // if not found, add to list
+  if ((expressions.size() == 0) || (idx == expressions.size())) expressions.push_back(exp_one);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    type2expression[i] = idx;
+    setflag[i] = 1;
+    count++;
+  }
+
+  if (count == 0) error->all(FLERR, "Incorrect args for dihedral coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes out coeffs to restart file
+------------------------------------------------------------------------- */
+
+void DihedralLepton::write_restart(FILE *fp)
+{
+  fwrite(&type2expression[1], sizeof(int), atom->ndihedraltypes, fp);
+
+  int num = expressions.size();
+  int maxlen = 0;
+  for (const auto &exp : expressions) maxlen = MAX(maxlen, (int) exp.size());
+  ++maxlen;
+
+  fwrite(&num, sizeof(int), 1, fp);
+  fwrite(&maxlen, sizeof(int), 1, fp);
+  for (const auto &exp : expressions) {
+    int n = exp.size() + 1;
+    fwrite(&n, sizeof(int), 1, fp);
+    fwrite(exp.c_str(), sizeof(char), n, fp);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads coeffs from restart file, bcasts them
+------------------------------------------------------------------------- */
+
+void DihedralLepton::read_restart(FILE *fp)
+{
+  allocate();
+
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &type2expression[1], sizeof(int), atom->ndihedraltypes, fp, nullptr,
+                  error);
+  }
+  MPI_Bcast(&type2expression[1], atom->ndihedraltypes, MPI_INT, 0, world);
+  for (int i = 1; i <= atom->ndihedraltypes; i++) setflag[i] = 1;
+
+  int num, maxlen, len;
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &num, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &maxlen, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&num, 1, MPI_INT, 0, world);
+  MPI_Bcast(&maxlen, 1, MPI_INT, 0, world);
+
+  char *buf = new char[maxlen];
+
+  for (int i = 0; i < num; ++i) {
+    if (comm->me == 0) {
+      utils::sfread(FLERR, &len, sizeof(int), 1, fp, nullptr, error);
+      utils::sfread(FLERR, buf, sizeof(char), len, fp, nullptr, error);
+    }
+    MPI_Bcast(buf, maxlen, MPI_CHAR, 0, world);
+    expressions.push_back(buf);
+  }
+
+  delete[] buf;
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to data file
+------------------------------------------------------------------------- */
+
+void DihedralLepton::write_data(FILE *fp)
+{
+  for (int i = 1; i <= atom->ndihedraltypes; i++)
+    fprintf(fp, "%d %s\n", i, expressions[type2expression[i]].c_str());
+}
+
+// --------------------------------------------
+// ------- Calculate the dihedral angle -------
+// --------------------------------------------
+
+double DihedralLepton::get_phi(double const *x1,    //array holding x,y,z coords atom 1
+                               double const *x2,    // :       :      :      :        2
+                               double const *x3,    // :       :      :      :        3
+                               double const *x4,    // :       :      :      :        4
+                               Domain *domain,      //<-periodic boundary information
+                               // The following arrays are of doubles with g_dim elements.
+                               // (g_dim is a constant known at compile time, usually 3).
+                               // Their contents is calculated by this function.
+                               // Space for these vectors must be allocated in advance.
+                               // (This is not hidden internally because these vectors
+                               //  may be needed outside the function, later on.)
+                               double *vb12,    // will store x2-x1
+                               double *vb23,    // will store x3-x2
+                               double *vb34,    // will store x4-x3
+                               double *n123,    // will store normal to plane x1,x2,x3
+                               double *n234)    // will store normal to plane x2,x3,x4
+    const
+{
+  for (int d = 0; d < g_dim; ++d) {
+    vb12[d] = x2[d] - x1[d];    // 1st bond
+    vb23[d] = x3[d] - x2[d];    // 2nd bond
+    vb34[d] = x4[d] - x3[d];    // 3rd bond
+  }
+
+  //Consider periodic boundary conditions:
+  domain->minimum_image(vb12[0], vb12[1], vb12[2]);
+  domain->minimum_image(vb23[0], vb23[1], vb23[2]);
+  domain->minimum_image(vb34[0], vb34[1], vb34[2]);
+
+  //--- Compute the normal to the planes formed by atoms 1,2,3 and 2,3,4 ---
+
+  cross3(vb23, vb12, n123);    // <- n123=vb23 x vb12
+  cross3(vb23, vb34, n234);    // <- n234=vb23 x vb34
+
+  norm3(n123);
+  norm3(n234);
+
+  double cos_phi = -dot3(n123, n234);
+
+  if (cos_phi > 1.0)
+    cos_phi = 1.0;
+  else if (cos_phi < -1.0)
+    cos_phi = -1.0;
+
+  double phi = acos(cos_phi);
+
+  if (dot3(n123, vb34) > 0.0) {
+    phi = -phi;       //(Note: Negative dihedral angles are possible only in 3-D.)
+    phi += MY_2PI;    //<- This insures phi is always in the range 0 to 2*PI
+  }
+  return phi;
+}
diff --git a/src/LEPTON/dihedral_lepton.h b/src/LEPTON/dihedral_lepton.h
new file mode 100644
index 0000000000..4baf46c057
--- /dev/null
+++ b/src/LEPTON/dihedral_lepton.h
@@ -0,0 +1,50 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+// clang-format off
+DihedralStyle(lepton,DihedralLepton);
+// clang-format on
+#else
+
+#ifndef LMP_DIHEDRAL_LEPTON_H
+#define LMP_DIHEDRAL_LEPTON_H
+
+#include "dihedral.h"
+
+namespace LAMMPS_NS {
+
+class DihedralLepton : public Dihedral {
+ public:
+  DihedralLepton(class LAMMPS *);
+  ~DihedralLepton() override;
+  void compute(int, int) override;
+  void coeff(int, char **) override;
+  void write_restart(FILE *) override;
+  void read_restart(FILE *) override;
+  void write_data(FILE *) override;
+
+ protected:
+  std::vector<std::string> expressions;
+  int *type2expression;
+
+  virtual void allocate();
+  double get_phi(double const *, double const *, double const *, double const *,
+                 class Domain *domain, double *, double *, double *, double *, double *) const;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND> void eval();
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/OPENMP/dihedral_lepton_omp.cpp b/src/OPENMP/dihedral_lepton_omp.cpp
new file mode 100644
index 0000000000..655d0807d4
--- /dev/null
+++ b/src/OPENMP/dihedral_lepton_omp.cpp
@@ -0,0 +1,333 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "dihedral_lepton_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "suffix.h"
+#include "math_extra.h"
+
+#include <cmath>
+
+#include "LMP_Lepton.h"
+#include "lepton_utils.h"
+#include "omp_compat.h"
+using namespace LAMMPS_NS;
+using MathExtra::dot3;
+
+static constexpr int g_dim = 3;
+
+/* ---------------------------------------------------------------------- */
+
+DihedralLeptonOMP::DihedralLeptonOMP(class LAMMPS *lmp) :
+    DihedralLepton(lmp), ThrOMP(lmp, THR_DIHEDRAL)
+{
+  suffix_flag |= Suffix::OMP;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralLeptonOMP::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->ndihedrallist;
+
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag, vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
+
+    if (inum > 0) {
+      if (evflag) {
+        if (eflag) {
+          if (force->newton_bond)
+            eval<1, 1, 1>(ifrom, ito, thr);
+          else
+            eval<1, 1, 0>(ifrom, ito, thr);
+        } else {
+          if (force->newton_bond)
+            eval<1, 0, 1>(ifrom, ito, thr);
+          else
+            eval<1, 0, 0>(ifrom, ito, thr);
+        }
+      } else {
+        if (force->newton_bond)
+          eval<0, 0, 1>(ifrom, ito, thr);
+        else
+          eval<0, 0, 0>(ifrom, ito, thr);
+      }
+    }
+    thr->timer(Timer::BOND);
+    reduce_thr(this, eflag, vflag, thr);
+  }    // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void DihedralLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
+{
+  std::vector<LMP_Lepton::CompiledExpression> dihedralforce;
+  std::vector<LMP_Lepton::CompiledExpression> dihedralpot;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      dihedralforce.emplace_back(parsed.differentiate("phi").createCompiledExpression());
+      if (EFLAG) dihedralpot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  const double *const *const x = atom->x;
+  auto *_noalias const f = (dbl3_t *) thr->get_f()[0];
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
+
+  // The dihedral angle "phi" is the angle between n123 and n234
+  // the planes defined by atoms i1,i2,i3, and i2,i3,i4.
+  //
+  // Definitions of vectors: vb12, vb23, vb34, perp12on23
+  //                         proj12on23, perp43on32, proj43on32
+  //
+  //  Note: The positions of the 4 atoms are labeled x[i1], x[i2], x[i3], x[i4]
+  //        (which are also vectors)
+  //
+  //             proj12on23                          proj34on23
+  //             --------->                         ----------->
+  //                           .
+  //                          .
+  //                         .
+  //                  x[i2] .                       x[i3]
+  //    .                __@----------vb23-------->@ . . . .           .
+  //   /|\                /|                        \                  |
+  //    |                /                           \                 |
+  //    |               /                             \                |
+  // perp12vs23        /                               \               |
+  //    |             /                                 \          perp34vs23
+  //    |          vb12                                  \             |
+  //    |           /                                   vb34           |
+  //    |          /                                       \           |
+  //    |         /                                         \          |
+  //    |        /                                           \         |
+  //            @                                             \        |
+  //                                                          _\|     \|/
+  //         x[i1]                                              @
+  //
+  //                                                           x[i4]
+  //
+
+  double vb12[g_dim];    // displacement vector from atom i1 towards atom i2
+  //     vb12[d]       = x[i2][d] - x[i1][d]      (for d=0,1,2)
+  double vb23[g_dim];    // displacement vector from atom i2 towards atom i3
+  //     vb23[d]       = x[i3][d] - x[i2][d]      (for d=0,1,2)
+  double vb34[g_dim];    // displacement vector from atom i3 towards atom i4
+  //     vb34[d]       = x[i4][d] - x[i3][d]      (for d=0,1,2)
+
+  //  n123 & n234: These two unit vectors are normal to the planes
+  //               defined by atoms 1,2,3 and 2,3,4.
+  double n123[g_dim];    //n123=vb23 x vb12 / |vb23 x vb12|  ("x" is cross product)
+  double n234[g_dim];    //n234=vb23 x vb34 / |vb23 x vb34|  ("x" is cross product)
+
+  double proj12on23[g_dim];
+  //    proj12on23[d] = (vb23[d]/|vb23|) * dot3(vb12,vb23)/|vb12|*|vb23|
+  double proj34on23[g_dim];
+  //    proj34on23[d] = (vb34[d]/|vb23|) * dot3(vb34,vb23)/|vb34|*|vb23|
+  double perp12on23[g_dim];
+  //    perp12on23[d] = v12[d] - proj12on23[d]
+  double perp34on23[g_dim];
+  //    perp34on23[d] = v34[d] - proj34on23[d]
+
+  double f1[3], f2[3], f3[3], f4[3];
+
+  for (int n = nfrom; n < nto; n++) {
+    const int i1 = dihedrallist[n][0];
+    const int i2 = dihedrallist[n][1];
+    const int i3 = dihedrallist[n][2];
+    const int i4 = dihedrallist[n][3];
+    const int type = dihedrallist[n][4];
+
+    // ------ Step 1: Compute the dihedral angle "phi" ------
+    //
+
+    // get_phi() calculates the dihedral angle.
+    // This function also calculates the vectors:
+    // vb12, vb23, vb34, n123, and n234, which we will need later.
+
+    const double phi = get_phi(x[i1], x[i2], x[i3], x[i4], domain, vb12, vb23, vb34, n123, n234);
+
+    // ------ Step 2: Compute the gradient of phi with atomic position: ------
+    //
+    // Gradient variables:
+    //
+    // dphi_dx1, dphi_dx2, dphi_dx3, dphi_dx4 are the gradients of phi with
+    // respect to the atomic positions of atoms i1, i2, i3, i4, respectively.
+    // As an example, consider dphi_dx1.  The d'th element is:
+    double dphi_dx1[g_dim];    //                 d phi
+    double dphi_dx2[g_dim];    // dphi_dx1[d] = ----------    (partial derivatives)
+    double dphi_dx3[g_dim];    //               d x[i1][d]
+    double dphi_dx4[g_dim];    //where d=0,1,2 corresponds to x,y,z  (if g_dim==3)
+
+    double dot123 = dot3(vb12, vb23);
+    double dot234 = dot3(vb23, vb34);
+    double L23sqr = dot3(vb23, vb23);
+    double L23 = sqrt(L23sqr);    // (central bond length)
+    double inv_L23sqr = 0.0;
+    double inv_L23 = 0.0;
+    if (L23sqr != 0.0) {
+      inv_L23sqr = 1.0 / L23sqr;
+      inv_L23 = 1.0 / L23;
+    }
+    double neg_inv_L23 = -inv_L23;
+    double dot123_over_L23sqr = dot123 * inv_L23sqr;
+    double dot234_over_L23sqr = dot234 * inv_L23sqr;
+
+    for (int d = 0; d < g_dim; ++d) {
+      // See figure above for a visual definitions of these vectors:
+      proj12on23[d] = vb23[d] * dot123_over_L23sqr;
+      proj34on23[d] = vb23[d] * dot234_over_L23sqr;
+      perp12on23[d] = vb12[d] - proj12on23[d];
+      perp34on23[d] = vb34[d] - proj34on23[d];
+    }
+
+    // --- Compute the gradient vectors dphi/dx1 and dphi/dx4: ---
+
+    // These two gradients point in the direction of n123 and n234,
+    // and are scaled by the distances of atoms 1 and 4 from the central axis.
+    // Distance of atom 1 to central axis:
+    double perp12on23_len = sqrt(dot3(perp12on23, perp12on23));
+    // Distance of atom 4 to central axis:
+    double perp34on23_len = sqrt(dot3(perp34on23, perp34on23));
+
+    double inv_perp12on23 = 0.0;
+    if (perp12on23_len != 0.0) inv_perp12on23 = 1.0 / perp12on23_len;
+    double inv_perp34on23 = 0.0;
+    if (perp34on23_len != 0.0) inv_perp34on23 = 1.0 / perp34on23_len;
+
+    for (int d = 0; d < g_dim; ++d) {
+      dphi_dx1[d] = n123[d] * inv_perp12on23;
+      dphi_dx4[d] = n234[d] * inv_perp34on23;
+    }
+
+    // --- Compute the gradient vectors dphi/dx2 and dphi/dx3: ---
+    //
+    // This is more tricky because atoms 2 and 3 are shared by both planes
+    // 123 and 234 (the angle between which defines "phi").  Moving either
+    // one of these atoms effects both the 123 and 234 planes
+    // Both the 123 and 234 planes intersect with the plane perpendicular to the
+    // central bond axis (vb23).  The two lines where these intersections occur
+    // will shift when you move either atom 2 or atom 3.  The angle between
+    // these lines is the dihedral angle, phi.  We can define four quantities:
+    // dphi123_dx2 is the change in "phi" due to the movement of the 123 plane
+    //             ...as a result of moving atom 2.
+    // dphi234_dx2 is the change in "phi" due to the movement of the 234 plane
+    //             ...as a result of moving atom 2.
+    // dphi123_dx3 is the change in "phi" due to the movement of the 123 plane
+    //             ...as a result of moving atom 3.
+    // dphi234_dx3 is the change in "phi" due to the movement of the 234 plane
+    //             ...as a result of moving atom 3.
+
+    double proj12on23_len = dot123 * inv_L23;
+    double proj34on23_len = dot234 * inv_L23;
+    // Interpretation:
+    //The magnitude of "proj12on23_len" is the length of the proj12on23 vector.
+    //The sign is positive if it points in the same direction as the central
+    //bond (vb23).  Otherwise it is negative.  The same goes for "proj34on23".
+    //(In the example figure in the comment above, both variables are positive.)
+
+    // The forumula used in the 8 lines below explained here:
+    //   "supporting_information/doc/gradient_formula_explanation/"
+    double dphi123_dx2_coef = neg_inv_L23 * (L23 + proj12on23_len);
+    double dphi234_dx2_coef = inv_L23 * proj34on23_len;
+
+    double dphi234_dx3_coef = neg_inv_L23 * (L23 + proj34on23_len);
+    double dphi123_dx3_coef = inv_L23 * proj12on23_len;
+
+    for (int d = 0; d < g_dim; ++d) {
+      // Recall that the n123 and n234 plane normal vectors are proportional to
+      // the dphi/dx1 and dphi/dx2 gradients vectors
+      // It turns out we can save slightly more CPU cycles by expressing
+      // dphi/dx2 and dphi/dx3 as linear combinations of dphi/dx1 and dphi/dx2
+      // which we computed already (instead of n123 & n234).
+      dphi_dx2[d] = dphi123_dx2_coef * dphi_dx1[d] + dphi234_dx2_coef * dphi_dx4[d];
+      dphi_dx3[d] = dphi123_dx3_coef * dphi_dx1[d] + dphi234_dx3_coef * dphi_dx4[d];
+    }
+
+    const int idx = type2expression[type];
+    dihedralforce[idx].getVariableReference("phi") = phi;
+    double m_du_dphi = -dihedralforce[idx].evaluate();
+
+    // ----- Step 4: Calculate the force direction in real space -----
+
+    // chain rule:
+    //          d U          d U      d phi
+    // -f  =   -----   =    -----  *  -----
+    //          d x         d phi      d x
+    for (int d = 0; d < g_dim; ++d) {
+      f1[d] = m_du_dphi * dphi_dx1[d];
+      f2[d] = m_du_dphi * dphi_dx2[d];
+      f3[d] = m_du_dphi * dphi_dx3[d];
+      f4[d] = m_du_dphi * dphi_dx4[d];
+    }
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1].x += f1[0];
+      f[i1].y += f1[1];
+      f[i1].z += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2].x += f2[0];
+      f[i2].y += f2[1];
+      f[i2].z += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3].x += f3[0];
+      f[i3].y += f3[1];
+      f[i3].z += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4].x += f4[0];
+      f[i4].y += f4[1];
+      f[i4].z += f4[2];
+    }
+
+    double edihedral = 0.0;
+    if (EFLAG) {
+      dihedralpot[idx].getVariableReference("phi") = phi;
+      edihedral = dihedralpot[idx].evaluate();
+    }
+    if (EVFLAG)
+      ev_tally_thr(this, i1, i2, i3, i4, nlocal, NEWTON_BOND, edihedral, f1, f3, f4, -vb12[0],
+                   -vb12[1], -vb12[2], vb23[0], vb23[1], vb23[2], vb34[0], vb34[1], vb34[2], thr);
+  }
+}
diff --git a/src/OPENMP/dihedral_lepton_omp.h b/src/OPENMP/dihedral_lepton_omp.h
new file mode 100644
index 0000000000..174aea8f26
--- /dev/null
+++ b/src/OPENMP/dihedral_lepton_omp.h
@@ -0,0 +1,40 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+// clang-format off
+DihedralStyle(lepton/omp,DihedralLeptonOMP);
+// clang-format on
+#else
+
+#ifndef LMP_DIHEDRAL_LEPTON_OMP_H
+#define LMP_DIHEDRAL_LEPTON_OMP_H
+
+#include "dihedral_lepton.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class DihedralLeptonOMP : public DihedralLepton, public ThrOMP {
+
+ public:
+  DihedralLeptonOMP(class LAMMPS *lmp);
+  void compute(int, int) override;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData *const thr);
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif

From a5742a91471595aa7faa944ce614d8670e2b14ad Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Mon, 26 Dec 2022 16:45:49 -0500
Subject: [PATCH 41/79] make lepton package docs more consistent

---
 doc/src/angle_lepton.rst | 24 ++++++++++++------------
 doc/src/bond_lepton.rst  | 18 +++++++++---------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/doc/src/angle_lepton.rst b/doc/src/angle_lepton.rst
index 7365dd9ed9..ea948c6a5f 100644
--- a/doc/src/angle_lepton.rst
+++ b/doc/src/angle_lepton.rst
@@ -29,12 +29,12 @@ Description
 
 .. versionadded:: TBD
 
-Angle style *lepton* computes angular interactions between three atoms,
-between which an angle has been defined, based on evaluating strings.
-The potential function must be provided as an expression string using
-"theta" as the angle variable relative to the reference angle
-:math:`\theta_0` which is provided as an angle coefficient.  For example
-`"200.0*theta^2"` represents a harmonic potential with a force constant
+Angle style *lepton* computes angular interactions between three atoms
+with a custom potential function.  The potential function must be
+provided as an expression string using "theta" as the angle variable
+relative to the reference angle :math:`\theta_0` which is provided as an
+angle coefficient.  For example `"200.0*theta^2"` represents a
+:doc:`harmonic angle <angle_harmonic>` potential with a force constant
 *K* of 200.0 energy units:
 
 .. math::
@@ -42,11 +42,11 @@ The potential function must be provided as an expression string using
    U_{angle,i} = K (\theta_i - \theta_0)^2 = K \theta^2 \qquad \theta = \theta_i - \theta_0
 
 The `Lepton library <https://simtk.org/projects/lepton>`_, that the
-*lepton* angle style interfaces with, evaluates this expression string at
-run time to compute the pairwise energy.  It also creates an analytical
-representation of the first derivative of this expression with respect to
-"r" and then uses that to compute the force between the pairs atoms forming
-angles as defined by the topology data.
+*lepton* angle style interfaces with, evaluates this expression string
+at run time to compute the pairwise energy.  It also creates an
+analytical representation of the first derivative of this expression
+with respect to "theta" and then uses that to compute the force between
+the angle atoms as defined by the topology data.
 
 The following coefficients must be defined for each angle type via the
 :doc:`angle_coeff <angle_coeff>` command as in the example above, or in
@@ -86,7 +86,7 @@ Related commands
 """"""""""""""""
 
 :doc:`angle_coeff <angle_coeff>`, :doc:`angle_style table <angle_table>`,
-:doc:`bond_style lepton <bond_lepton>`
+:doc:`bond_style lepton <bond_lepton>`,:doc:`dihedral_style lepton <dihedral_lepton>`
 
 Default
 """""""
diff --git a/doc/src/bond_lepton.rst b/doc/src/bond_lepton.rst
index bcd10e8033..91f040e183 100644
--- a/doc/src/bond_lepton.rst
+++ b/doc/src/bond_lepton.rst
@@ -29,13 +29,12 @@ Description
 
 .. versionadded:: TBD
 
-Bond style *lepton* computes bonded interactions between two atoms,
-between which a bond has been defined, based on evaluating strings.  The
-potential function must be provided as an expression string using "r" as
-the distance variable relative to the reference distance :math:`r_0`
-which is provided as a bond coefficient.  For example `"200.0*r^2"`
-represents a harmonic potential with a force constant *K* of 200.0
-energy units:
+Bond style *lepton* computes bonded interactions between two atoms with
+a custom function.  The potential function must be provided as an
+expression string using "r" as the distance variable relative to the
+reference distance :math:`r_0` which is provided as a bond coefficient.
+For example `"200.0*r^2"` represents a harmonic potential with a force
+constant *K* of 200.0 energy units:
 
 .. math::
 
@@ -45,7 +44,7 @@ The `Lepton library <https://simtk.org/projects/lepton>`_, that the
 *lepton* bond style interfaces with, evaluates this expression string at
 run time to compute the pairwise energy.  It also creates an analytical
 representation of the first derivative of this expression with respect to
-"r" and then uses that to compute the force between the pairs atoms forming
+"r" and then uses that to compute the force between the atom pairs forming
 bonds as defined by the topology data.
 
 The following coefficients must be defined for each bond type via the
@@ -84,7 +83,8 @@ Related commands
 """"""""""""""""
 
 :doc:`bond_coeff <bond_coeff>`, :doc:`bond_style table <bond_table>`,
-:doc:`bond_write <bond_write>`, :doc:`angle_style lepton <angle_lepton>`
+:doc:`bond_write <bond_write>`, :doc:`angle_style lepton <angle_lepton>`,
+:doc:`dihedral_style lepton <dihedral_lepton>`
 
 Default
 """""""

From 989ec1b85912aff02433a81cd3378fdbff1b57df Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 10:57:43 -0500
Subject: [PATCH 42/79] remove lmp/LMP_ prefix from Lepton namespace and files
 to share it with colvars

---
 cmake/Modules/Packages/COLVARS.cmake          | 15 ++++----------
 cmake/Modules/Packages/LEPTON.cmake           | 20 +++++++++++--------
 lib/lepton/Common.mk                          |  2 +-
 lib/lepton/Install.py                         |  6 +++---
 lib/lepton/include/{LMP_Lepton.h => Lepton.h} |  0
 .../include/lepton/CompiledExpression.h       |  4 ++--
 .../include/lepton/CompiledVectorExpression.h |  4 ++--
 lib/lepton/include/lepton/CustomFunction.h    |  4 ++--
 lib/lepton/include/lepton/Exception.h         |  4 ++--
 lib/lepton/include/lepton/ExpressionProgram.h |  4 ++--
 .../include/lepton/ExpressionTreeNode.h       |  4 ++--
 lib/lepton/include/lepton/Operation.h         |  4 ++--
 lib/lepton/include/lepton/ParsedExpression.h  |  4 ++--
 lib/lepton/include/lepton/Parser.h            |  4 ++--
 lib/lepton/src/CompiledExpression.cpp         |  2 +-
 lib/lepton/src/CompiledVectorExpression.cpp   |  2 +-
 lib/lepton/src/ExpressionProgram.cpp          |  2 +-
 lib/lepton/src/ExpressionTreeNode.cpp         |  2 +-
 lib/lepton/src/Operation.cpp                  |  2 +-
 lib/lepton/src/ParsedExpression.cpp           |  6 +++---
 lib/lepton/src/Parser.cpp                     |  4 ++--
 src/LEPTON/Install.sh                         |  2 +-
 src/LEPTON/angle_lepton.cpp                   | 12 +++++------
 src/LEPTON/bond_lepton.cpp                    | 12 +++++------
 src/LEPTON/dihedral_lepton.cpp                | 10 +++++-----
 src/LEPTON/pair_lepton.cpp                    | 14 ++++++-------
 26 files changed, 73 insertions(+), 76 deletions(-)
 rename lib/lepton/include/{LMP_Lepton.h => Lepton.h} (100%)

diff --git a/cmake/Modules/Packages/COLVARS.cmake b/cmake/Modules/Packages/COLVARS.cmake
index da5b685536..2a29553a67 100644
--- a/cmake/Modules/Packages/COLVARS.cmake
+++ b/cmake/Modules/Packages/COLVARS.cmake
@@ -8,13 +8,9 @@ option(COLVARS_DEBUG "Debugging messages for Colvars (quite verbose)" OFF)
 option(COLVARS_LEPTON "Build and link the Lepton library" ON)
 
 if(COLVARS_LEPTON)
-  set(LEPTON_DIR ${LAMMPS_LIB_SOURCE_DIR}/colvars/lepton)
-  file(GLOB LEPTON_SOURCES ${LEPTON_DIR}/src/[^.]*.cpp)
-  add_library(lepton STATIC ${LEPTON_SOURCES})
-  # Change the define below to LEPTON_BUILDING_SHARED_LIBRARY when linking Lepton as a DLL with MSVC
-  target_compile_definitions(lepton PRIVATE -DLEPTON_BUILDING_STATIC_LIBRARY)
-  set_target_properties(lepton PROPERTIES OUTPUT_NAME lammps_lepton${LAMMPS_MACHINE})
-  target_include_directories(lepton PRIVATE ${LEPTON_DIR}/include)
+  if(NOT LEPTON_SOURCE_DIR)
+    include(Packages/LEPTON)
+  endif()
 endif()
 
 add_library(colvars STATIC ${COLVARS_SOURCES})
@@ -35,9 +31,6 @@ if(COLVARS_DEBUG)
 endif()
 
 if(COLVARS_LEPTON)
-  target_link_libraries(lammps PRIVATE lepton)
   target_compile_definitions(colvars PRIVATE -DLEPTON)
-  # Disable the line below when linking Lepton as a DLL with MSVC
-  target_compile_definitions(colvars PRIVATE -DLEPTON_USE_STATIC_LIBRARIES)
-  target_include_directories(colvars PUBLIC ${LEPTON_DIR}/include)
+  target_link_libraries(colvars PRIVATE lepton)
 endif()
diff --git a/cmake/Modules/Packages/LEPTON.cmake b/cmake/Modules/Packages/LEPTON.cmake
index a40047bae0..df8a201c6b 100644
--- a/cmake/Modules/Packages/LEPTON.cmake
+++ b/cmake/Modules/Packages/LEPTON.cmake
@@ -1,3 +1,7 @@
+# avoid including this file twice
+if(LEPTON_SOURCE_DIR)
+   return()
+endif()
 set(LEPTON_SOURCE_DIR ${LAMMPS_LIB_SOURCE_DIR}/lepton)
 
 file(GLOB LEPTON_SOURCES ${LEPTON_SOURCE_DIR}/src/[^.]*.cpp)
@@ -14,18 +18,18 @@ if(LEPTON_ENABLE_JIT)
   file(GLOB ASMJIT_SOURCES ${LEPTON_SOURCE_DIR}/asmjit/*/[^.]*.cpp)
 endif()
 
-add_library(lmplepton STATIC ${LEPTON_SOURCES} ${ASMJIT_SOURCES})
-set_target_properties(lmplepton PROPERTIES OUTPUT_NAME lammps_lmplepton${LAMMPS_MACHINE})
-target_compile_definitions(lmplepton PUBLIC LEPTON_BUILDING_STATIC_LIBRARY=1)
-target_include_directories(lmplepton PUBLIC ${LEPTON_SOURCE_DIR}/include)
+add_library(lepton STATIC ${LEPTON_SOURCES} ${ASMJIT_SOURCES})
+set_target_properties(lepton PROPERTIES OUTPUT_NAME lammps_lepton${LAMMPS_MACHINE})
+target_compile_definitions(lepton PUBLIC LEPTON_BUILDING_STATIC_LIBRARY=1)
+target_include_directories(lepton PUBLIC ${LEPTON_SOURCE_DIR}/include)
 if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
   find_library(LIB_RT rt QUIET)
-  target_link_libraries(lmplepton PUBLIC ${LIB_RT})
+  target_link_libraries(lepton PUBLIC ${LIB_RT})
 endif()
 
 if(LEPTON_ENABLE_JIT)
-  target_compile_definitions(lmplepton PUBLIC "LEPTON_USE_JIT=1;ASMJIT_BUILD_X86=1;ASMJIT_STATIC=1;ASMJIT_BUILD_RELEASE=1")
-  target_include_directories(lmplepton PUBLIC ${LEPTON_SOURCE_DIR})
+  target_compile_definitions(lepton PUBLIC "LEPTON_USE_JIT=1;ASMJIT_BUILD_X86=1;ASMJIT_STATIC=1;ASMJIT_BUILD_RELEASE=1")
+  target_include_directories(lepton PUBLIC ${LEPTON_SOURCE_DIR})
 endif()
 
-target_link_libraries(lammps PRIVATE lmplepton)
+target_link_libraries(lammps PRIVATE lepton)
diff --git a/lib/lepton/Common.mk b/lib/lepton/Common.mk
index 40fecc9368..4dc2b95d94 100644
--- a/lib/lepton/Common.mk
+++ b/lib/lepton/Common.mk
@@ -89,7 +89,7 @@ EXTRAMAKE=Makefile.lammps.empty
 INC=-I include
 DEF=-DLEPTON_BUILDING_STATIC_LIBRARY=1
 
-LIB=liblmplepton.a
+LIB=liblepton.a
 
 ifeq ($(ENABLE_JIT),1)
 OBJ += $(JITOBJ)
diff --git a/lib/lepton/Install.py b/lib/lepton/Install.py
index e3b1b836e1..84ac16b0d0 100755
--- a/lib/lepton/Install.py
+++ b/lib/lepton/Install.py
@@ -51,7 +51,7 @@ if not os.path.exists("Makefile.%s" % machine):
 # make the library with parallel make
 n_cpus = get_cpus()
 
-print("Building liblmp%s.a ..." % lib)
+print("Building lib%s.a ..." % lib)
 cmd = "make -f Makefile.%s clean; make -f Makefile.%s -j%d" % (machine, machine, n_cpus)
 try:
   txt = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
@@ -60,10 +60,10 @@ except subprocess.CalledProcessError as e:
   print("Make failed with:\n %s" % e.output.decode('UTF-8'))
   sys.exit(1)
 
-if os.path.exists("liblmp%s.a" % lib):
+if os.path.exists("lib%s.a" % lib):
   print("Build was successful")
 else:
-  sys.exit("Build of lib/%s/liblmp%s.a was NOT successful" % (lib, lib))
+  sys.exit("Build of lib/%s/lib%s.a was NOT successful" % (lib, lib))
 
 if not os.path.exists("Makefile.lammps"):
   print("WARNING: lib/%s/Makefile.lammps was NOT created" % lib)
diff --git a/lib/lepton/include/LMP_Lepton.h b/lib/lepton/include/Lepton.h
similarity index 100%
rename from lib/lepton/include/LMP_Lepton.h
rename to lib/lepton/include/Lepton.h
diff --git a/lib/lepton/include/lepton/CompiledExpression.h b/lib/lepton/include/lepton/CompiledExpression.h
index 6c940e081c..82d66d5c6a 100644
--- a/lib/lepton/include/lepton/CompiledExpression.h
+++ b/lib/lepton/include/lepton/CompiledExpression.h
@@ -47,7 +47,7 @@
 #endif
 #endif
 
-namespace LMP_Lepton {
+namespace Lepton {
 
 class Operation;
 class ParsedExpression;
@@ -119,6 +119,6 @@ private:
 #endif
 };
 
-} // namespace LMP_Lepton
+} // namespace Lepton
 
 #endif /*LEPTON_COMPILED_EXPRESSION_H_*/
diff --git a/lib/lepton/include/lepton/CompiledVectorExpression.h b/lib/lepton/include/lepton/CompiledVectorExpression.h
index e097e3eae1..ea3586f1b0 100644
--- a/lib/lepton/include/lepton/CompiledVectorExpression.h
+++ b/lib/lepton/include/lepton/CompiledVectorExpression.h
@@ -48,7 +48,7 @@
 #endif
 #endif
 
-namespace LMP_Lepton {
+namespace Lepton {
 
 class Operation;
 class ParsedExpression;
@@ -140,6 +140,6 @@ private:
 #endif
 };
 
-} // namespace LMP_Lepton
+} // namespace Lepton
 
 #endif /*LEPTON_VECTOR_EXPRESSION_H_*/
diff --git a/lib/lepton/include/lepton/CustomFunction.h b/lib/lepton/include/lepton/CustomFunction.h
index 4b8121a87f..cbfff26637 100644
--- a/lib/lepton/include/lepton/CustomFunction.h
+++ b/lib/lepton/include/lepton/CustomFunction.h
@@ -34,7 +34,7 @@
 
 #include "windowsIncludes.h"
 
-namespace LMP_Lepton {
+namespace Lepton {
 
 /**
  * This class is the interface for defining your own function that may be included in expressions.
@@ -104,6 +104,6 @@ private:
     int numArgs;
 };
 
-} // namespace LMP_Lepton
+} // namespace Lepton
 
 #endif /*LEPTON_CUSTOM_FUNCTION_H_*/
diff --git a/lib/lepton/include/lepton/Exception.h b/lib/lepton/include/lepton/Exception.h
index 413b08f52e..5ad55714d1 100644
--- a/lib/lepton/include/lepton/Exception.h
+++ b/lib/lepton/include/lepton/Exception.h
@@ -35,7 +35,7 @@
 #include <exception>
 #include <string>
 
-namespace LMP_Lepton {
+namespace Lepton {
 
 /**
  * This class is used for all exceptions thrown by Lepton.
@@ -54,6 +54,6 @@ private:
     std::string message;
 };
 
-} // namespace LMP_Lepton
+} // namespace Lepton
 
 #endif /*LEPTON_EXCEPTION_H_*/
diff --git a/lib/lepton/include/lepton/ExpressionProgram.h b/lib/lepton/include/lepton/ExpressionProgram.h
index 3737cf8082..a49a9094d0 100644
--- a/lib/lepton/include/lepton/ExpressionProgram.h
+++ b/lib/lepton/include/lepton/ExpressionProgram.h
@@ -38,7 +38,7 @@
 #include <string>
 #include <vector>
 
-namespace LMP_Lepton {
+namespace Lepton {
 
 class ParsedExpression;
 
@@ -98,6 +98,6 @@ private:
     int maxArgs, stackSize;
 };
 
-} // namespace LMP_Lepton
+} // namespace Lepton
 
 #endif /*LEPTON_EXPRESSION_PROGRAM_H_*/
diff --git a/lib/lepton/include/lepton/ExpressionTreeNode.h b/lib/lepton/include/lepton/ExpressionTreeNode.h
index eba791fbaa..dde26103cb 100644
--- a/lib/lepton/include/lepton/ExpressionTreeNode.h
+++ b/lib/lepton/include/lepton/ExpressionTreeNode.h
@@ -36,7 +36,7 @@
 #include <string>
 #include <vector>
 
-namespace LMP_Lepton {
+namespace Lepton {
 
 class Operation;
 class ParsedExpression;
@@ -106,6 +106,6 @@ private:
     mutable int tag;
 };
 
-} // namespace LMP_Lepton
+} // namespace Lepton
 
 #endif /*LEPTON_EXPRESSION_TREE_NODE_H_*/
diff --git a/lib/lepton/include/lepton/Operation.h b/lib/lepton/include/lepton/Operation.h
index bde9cfe37f..165a56b332 100644
--- a/lib/lepton/include/lepton/Operation.h
+++ b/lib/lepton/include/lepton/Operation.h
@@ -42,7 +42,7 @@
 #include <sstream>
 #include <algorithm>
 
-namespace LMP_Lepton {
+namespace Lepton {
 
 class ExpressionTreeNode;
 
@@ -1188,6 +1188,6 @@ public:
     ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
 };
 
-} // namespace LMP_Lepton
+} // namespace Lepton
 
 #endif /*LEPTON_OPERATION_H_*/
diff --git a/lib/lepton/include/lepton/ParsedExpression.h b/lib/lepton/include/lepton/ParsedExpression.h
index 05081f677c..e2a7572c4a 100644
--- a/lib/lepton/include/lepton/ParsedExpression.h
+++ b/lib/lepton/include/lepton/ParsedExpression.h
@@ -37,7 +37,7 @@
 #include <map>
 #include <string>
 
-namespace LMP_Lepton {
+namespace Lepton {
 
 class CompiledExpression;
 class ExpressionProgram;
@@ -137,6 +137,6 @@ LEPTON_EXPORT std::ostream& operator<<(std::ostream& out, const ExpressionTreeNo
 
 LEPTON_EXPORT std::ostream& operator<<(std::ostream& out, const ParsedExpression& exp);
 
-} // namespace LMP_Lepton
+} // namespace Lepton
 
 #endif /*LEPTON_PARSED_EXPRESSION_H_*/
diff --git a/lib/lepton/include/lepton/Parser.h b/lib/lepton/include/lepton/Parser.h
index 9eefe3f59e..63d5988d5f 100644
--- a/lib/lepton/include/lepton/Parser.h
+++ b/lib/lepton/include/lepton/Parser.h
@@ -37,7 +37,7 @@
 #include <string>
 #include <vector>
 
-namespace LMP_Lepton {
+namespace Lepton {
 
 class CustomFunction;
 class ExpressionTreeNode;
@@ -72,6 +72,6 @@ private:
     static Operation* getFunctionOperation(const std::string& name, const std::map<std::string, CustomFunction*>& customFunctions);
 };
 
-} // namespace LMP_Lepton
+} // namespace Lepton
 
 #endif /*LEPTON_PARSER_H_*/
diff --git a/lib/lepton/src/CompiledExpression.cpp b/lib/lepton/src/CompiledExpression.cpp
index b85c3a08f7..61dd942c62 100644
--- a/lib/lepton/src/CompiledExpression.cpp
+++ b/lib/lepton/src/CompiledExpression.cpp
@@ -34,7 +34,7 @@
 #include "lepton/ParsedExpression.h"
 #include <utility>
 
-using namespace LMP_Lepton;
+using namespace Lepton;
 using namespace std;
 #ifdef LEPTON_USE_JIT
     using namespace asmjit;
diff --git a/lib/lepton/src/CompiledVectorExpression.cpp b/lib/lepton/src/CompiledVectorExpression.cpp
index 7e4dfcad9c..c8262b3873 100644
--- a/lib/lepton/src/CompiledVectorExpression.cpp
+++ b/lib/lepton/src/CompiledVectorExpression.cpp
@@ -35,7 +35,7 @@
 #include <algorithm>
 #include <utility>
 
-using namespace LMP_Lepton;
+using namespace Lepton;
 using namespace std;
 #ifdef LEPTON_USE_JIT
 using namespace asmjit;
diff --git a/lib/lepton/src/ExpressionProgram.cpp b/lib/lepton/src/ExpressionProgram.cpp
index 74c545287b..bbbae8533f 100644
--- a/lib/lepton/src/ExpressionProgram.cpp
+++ b/lib/lepton/src/ExpressionProgram.cpp
@@ -33,7 +33,7 @@
 #include "lepton/Operation.h"
 #include "lepton/ParsedExpression.h"
 
-using namespace LMP_Lepton;
+using namespace Lepton;
 using namespace std;
 
 ExpressionProgram::ExpressionProgram() : maxArgs(0), stackSize(0) {
diff --git a/lib/lepton/src/ExpressionTreeNode.cpp b/lib/lepton/src/ExpressionTreeNode.cpp
index 3b34a0a1c0..758515b123 100644
--- a/lib/lepton/src/ExpressionTreeNode.cpp
+++ b/lib/lepton/src/ExpressionTreeNode.cpp
@@ -34,7 +34,7 @@
 #include "lepton/Operation.h"
 #include <utility>
 
-using namespace LMP_Lepton;
+using namespace Lepton;
 using namespace std;
 
 ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const vector<ExpressionTreeNode>& children) : operation(operation), children(children) {
diff --git a/lib/lepton/src/Operation.cpp b/lib/lepton/src/Operation.cpp
index 08deff8584..8bddc8d1c8 100644
--- a/lib/lepton/src/Operation.cpp
+++ b/lib/lepton/src/Operation.cpp
@@ -34,7 +34,7 @@
 #include "lepton/ExpressionTreeNode.h"
 #include "MSVC_erfc.h"
 
-using namespace LMP_Lepton;
+using namespace Lepton;
 using namespace std;
 
 static bool isZero(const ExpressionTreeNode& node) {
diff --git a/lib/lepton/src/ParsedExpression.cpp b/lib/lepton/src/ParsedExpression.cpp
index a6f41ae354..6a9df8097a 100644
--- a/lib/lepton/src/ParsedExpression.cpp
+++ b/lib/lepton/src/ParsedExpression.cpp
@@ -37,7 +37,7 @@
 #include <limits>
 #include <vector>
 
-using namespace LMP_Lepton;
+using namespace Lepton;
 using namespace std;
 
 ParsedExpression::ParsedExpression() : rootNode(ExpressionTreeNode()) {
@@ -402,7 +402,7 @@ ExpressionTreeNode ParsedExpression::renameNodeVariables(const ExpressionTreeNod
     return ExpressionTreeNode(node.getOperation().clone(), children);
 }
 
-ostream& LMP_Lepton::operator<<(ostream& out, const ExpressionTreeNode& node) {
+ostream& Lepton::operator<<(ostream& out, const ExpressionTreeNode& node) {
     if (node.getOperation().isInfixOperator() && node.getChildren().size() == 2) {
         out << "(" << node.getChildren()[0] << ")" << node.getOperation().getName() << "(" << node.getChildren()[1] << ")";
     }
@@ -424,7 +424,7 @@ ostream& LMP_Lepton::operator<<(ostream& out, const ExpressionTreeNode& node) {
     return out;
 }
 
-ostream& LMP_Lepton::operator<<(ostream& out, const ParsedExpression& exp) {
+ostream& Lepton::operator<<(ostream& out, const ParsedExpression& exp) {
     out << exp.getRootNode();
     return out;
 }
diff --git a/lib/lepton/src/Parser.cpp b/lib/lepton/src/Parser.cpp
index d094b8e4e4..2829b443b6 100644
--- a/lib/lepton/src/Parser.cpp
+++ b/lib/lepton/src/Parser.cpp
@@ -38,7 +38,7 @@
 #include <cctype>
 #include <iostream>
 
-using namespace LMP_Lepton;
+using namespace Lepton;
 using namespace std;
 
 static const string Digits = "0123456789";
@@ -47,7 +47,7 @@ static const bool LeftAssociative[] = {true, true, true, true, false};
 static const int Precedence[] = {0, 0, 1, 1, 3};
 static const Operation::Id OperationId[] = {Operation::ADD, Operation::SUBTRACT, Operation::MULTIPLY, Operation::DIVIDE, Operation::POWER};
 
-class LMP_Lepton::ParseToken {
+class Lepton::ParseToken {
 public:
     enum Type {Number, Operator, Variable, Function, LeftParen, RightParen, Comma, Whitespace};
 
diff --git a/src/LEPTON/Install.sh b/src/LEPTON/Install.sh
index b7e80c9d1c..6696d0c776 100755
--- a/src/LEPTON/Install.sh
+++ b/src/LEPTON/Install.sh
@@ -40,7 +40,7 @@ if (test $1 = 1) then
     sed -i -e 's/[^ \t]*lepton[^ \t]* //g' ../Makefile.package
     sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/lepton\/include -I..\/..\/lib\/lepton |' ../Makefile.package
     sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/lepton$(LIBOBJDIR) |' ../Makefile.package
-    sed -i -e 's|^PKG_LIB =[ \t]*|&-llmplepton |' ../Makefile.package
+    sed -i -e 's|^PKG_LIB =[ \t]*|&-llepton |' ../Makefile.package
     sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(lepton_SYSINC) |' ../Makefile.package
     sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(lepton_SYSLIB) |' ../Makefile.package
     sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(lepton_SYSPATH) |' ../Makefile.package
diff --git a/src/LEPTON/angle_lepton.cpp b/src/LEPTON/angle_lepton.cpp
index 8893b422f1..e985509b1f 100644
--- a/src/LEPTON/angle_lepton.cpp
+++ b/src/LEPTON/angle_lepton.cpp
@@ -28,7 +28,7 @@
 
 #include <cmath>
 
-#include "LMP_Lepton.h"
+#include "Lepton.h"
 #include "lepton_utils.h"
 
 using namespace LAMMPS_NS;
@@ -88,10 +88,10 @@ void AngleLepton::compute(int eflag, int vflag)
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND> void AngleLepton::eval()
 {
-  std::vector<LMP_Lepton::CompiledExpression> angleforce;
-  std::vector<LMP_Lepton::CompiledExpression> anglepot;
+  std::vector<Lepton::CompiledExpression> angleforce;
+  std::vector<Lepton::CompiledExpression> anglepot;
   for (const auto &expr : expressions) {
-    auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+    auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
     angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression());
     if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression());
   }
@@ -221,7 +221,7 @@ void AngleLepton::coeff(int narg, char **arg)
   std::string exp_one = LeptonUtils::condense(arg[2]);
   double offset_one = 0.0;
   try {
-    auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
+    auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
     auto anglepot = parsed.createCompiledExpression();
     auto angleforce = parsed.differentiate("theta").createCompiledExpression();
     anglepot.getVariableReference("theta") = 0.0;
@@ -361,7 +361,7 @@ double AngleLepton::single(int type, int i1, int i2, int i3)
 
   double dtheta = acos(c) - theta0[type];
   auto expr = expressions[type2expression[type]];
-  auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+  auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
   auto anglepot = parsed.createCompiledExpression();
   anglepot.getVariableReference("theta") = dtheta;
   return anglepot.evaluate() - offset[type];
diff --git a/src/LEPTON/bond_lepton.cpp b/src/LEPTON/bond_lepton.cpp
index 7ac999c5b2..4a168902b8 100644
--- a/src/LEPTON/bond_lepton.cpp
+++ b/src/LEPTON/bond_lepton.cpp
@@ -26,7 +26,7 @@
 
 #include <cmath>
 
-#include "LMP_Lepton.h"
+#include "Lepton.h"
 #include "lepton_utils.h"
 using namespace LAMMPS_NS;
 
@@ -80,11 +80,11 @@ void BondLepton::compute(int eflag, int vflag)
 /* ---------------------------------------------------------------------- */
 template <int EVFLAG, int EFLAG, int NEWTON_BOND> void BondLepton::eval()
 {
-  std::vector<LMP_Lepton::CompiledExpression> bondforce;
-  std::vector<LMP_Lepton::CompiledExpression> bondpot;
+  std::vector<Lepton::CompiledExpression> bondforce;
+  std::vector<Lepton::CompiledExpression> bondpot;
   try {
     for (const auto &expr : expressions) {
-      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
       bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
       if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression());
     }
@@ -176,7 +176,7 @@ void BondLepton::coeff(int narg, char **arg)
   std::string exp_one = LeptonUtils::condense(arg[2]);
   double offset_one = 0.0;
   try {
-    auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
+    auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
     auto bondpot = parsed.createCompiledExpression();
     auto bondforce = parsed.differentiate("r").createCompiledExpression();
     bondpot.getVariableReference("r") = 0.0;
@@ -299,7 +299,7 @@ double BondLepton::single(int type, double rsq, int /*i*/, int /*j*/, double &ff
   const double dr = r - r0[type];
 
   auto expr = expressions[type2expression[type]];
-  auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+  auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
   auto bondpot = parsed.createCompiledExpression();
   auto bondforce = parsed.differentiate("r").createCompiledExpression();
   bondforce.getVariableReference("r") = dr;
diff --git a/src/LEPTON/dihedral_lepton.cpp b/src/LEPTON/dihedral_lepton.cpp
index acd0a4574d..7549d771e0 100644
--- a/src/LEPTON/dihedral_lepton.cpp
+++ b/src/LEPTON/dihedral_lepton.cpp
@@ -30,7 +30,7 @@
 
 #include <cmath>
 
-#include "LMP_Lepton.h"
+#include "Lepton.h"
 #include "lepton_utils.h"
 
 using namespace LAMMPS_NS;
@@ -90,11 +90,11 @@ void DihedralLepton::compute(int eflag, int vflag)
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND> void DihedralLepton::eval()
 {
-  std::vector<LMP_Lepton::CompiledExpression> dihedralforce;
-  std::vector<LMP_Lepton::CompiledExpression> dihedralpot;
+  std::vector<Lepton::CompiledExpression> dihedralforce;
+  std::vector<Lepton::CompiledExpression> dihedralpot;
   try {
     for (const auto &expr : expressions) {
-      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
       dihedralforce.emplace_back(parsed.differentiate("phi").createCompiledExpression());
       if (EFLAG) dihedralpot.emplace_back(parsed.createCompiledExpression());
     }
@@ -359,7 +359,7 @@ void DihedralLepton::coeff(int narg, char **arg)
   // check if the expression can be parsed and evaluated without error
   std::string exp_one = LeptonUtils::condense(arg[1]);
   try {
-    auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
+    auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
     auto dihedralpot = parsed.createCompiledExpression();
     auto dihedralforce = parsed.differentiate("phi").createCompiledExpression();
     dihedralpot.getVariableReference("phi") = 0.0;
diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index bc4e97f2b6..ab23d18368 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -25,7 +25,7 @@
 #include "neigh_list.h"
 #include "update.h"
 
-#include "LMP_Lepton.h"
+#include "Lepton.h"
 #include "lepton_utils.h"
 #include <cmath>
 
@@ -99,11 +99,11 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
   const int *const *const firstneigh = list->firstneigh;
   double fxtmp, fytmp, fztmp;
 
-  std::vector<LMP_Lepton::CompiledExpression> pairforce;
-  std::vector<LMP_Lepton::CompiledExpression> pairpot;
+  std::vector<Lepton::CompiledExpression> pairforce;
+  std::vector<Lepton::CompiledExpression> pairpot;
   try {
     for (const auto &expr : expressions) {
-      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
       pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
       if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
     }
@@ -215,7 +215,7 @@ void PairLepton::coeff(int narg, char **arg)
   // check if the expression can be parsed and evaluated without error
   auto exp_one = LeptonUtils::condense(arg[2]);
   try {
-    auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
+    auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
     auto pairforce = parsed.differentiate("r").createCompiledExpression();
     auto pairpot = parsed.createCompiledExpression();
     pairpot.getVariableReference("r") = 1.0;
@@ -258,7 +258,7 @@ double PairLepton::init_one(int i, int j)
   if (offset_flag) {
     try {
       auto expr = LeptonUtils::substitute(expressions[type2expression[i][j]], lmp);
-      auto pairpot = LMP_Lepton::Parser::parse(expr).createCompiledExpression();
+      auto pairpot = Lepton::Parser::parse(expr).createCompiledExpression();
       pairpot.getVariableReference("r") = cut[i][j];
       offset[i][j] = pairpot.evaluate();
     } catch (std::exception &) {
@@ -402,7 +402,7 @@ double PairLepton::single(int /* i */, int /* j */, int itype, int jtype, double
                           double /* factor_coul */, double factor_lj, double &fforce)
 {
   auto expr = expressions[type2expression[itype][jtype]];
-  auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+  auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
   auto pairpot = parsed.createCompiledExpression();
   auto pairforce = parsed.differentiate("r").createCompiledExpression();
 

From 7e984bfa2ca1fa276ee01e6823eb990b0b0cefa1 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 11:16:09 -0500
Subject: [PATCH 43/79] update traditional make build support for shaking
 Lepton between LEPTON and COLVARS

---
 lib/colvars/Makefile.common      |  21 +-
 lib/colvars/Makefile.deps        | 697 +++++++++++++++++--------------
 lib/colvars/Makefile.lepton.deps |  50 ---
 lib/colvars/README               |  33 +-
 src/COLVARS/Install.sh           |  18 +-
 5 files changed, 398 insertions(+), 421 deletions(-)
 delete mode 100644 lib/colvars/Makefile.lepton.deps

diff --git a/lib/colvars/Makefile.common b/lib/colvars/Makefile.common
index 31a93652ae..f6d6ba84bb 100644
--- a/lib/colvars/Makefile.common
+++ b/lib/colvars/Makefile.common
@@ -61,22 +61,12 @@ COLVARS_SRCS = \
         colvarvalue.cpp \
         colvar_neuralnetworkcompute.cpp
 
-LEPTON_SRCS = \
-	lepton/src/CompiledExpression.cpp \
-	lepton/src/CompiledVectorExpression.cpp \
-	lepton/src/ExpressionProgram.cpp \
-	lepton/src/ExpressionTreeNode.cpp \
-	lepton/src/Operation.cpp \
-	lepton/src/ParsedExpression.cpp \
-	lepton/src/Parser.cpp
-
 # Allow to selectively turn off Lepton
 ifeq ($(COLVARS_LEPTON),no)
 LEPTON_INCFLAGS = 
 COLVARS_OBJS = $(COLVARS_SRCS:.cpp=.o)
 else
-LEPTON_INCFLAGS = -Ilepton/include -DLEPTON
-COLVARS_OBJS = $(COLVARS_SRCS:.cpp=.o) $(LEPTON_SRCS:.cpp=.o)
+LEPTON_INCFLAGS = -I../lepton/include -DLEPTON
 endif
 
 
@@ -97,12 +87,3 @@ Makefile.deps: $(COLVARS_SRCS)
 
 include Makefile.deps
 
-Makefile.lepton.deps: $(LEPTON_SRCS)
-	@echo > $@
-	@for src in $^ ; do \
-	  obj=`basename $$src .cpp`.o ; \
-	  $(CXX) $(CXXFLAGS) -MM $(LEPTON_INCFLAGS) \
-	    -MT '$$(COLVARS_OBJ_DIR)'$$obj $$src >> $@ ; \
-	  done
-
-include Makefile.lepton.deps
diff --git a/lib/colvars/Makefile.deps b/lib/colvars/Makefile.deps
index d26df41995..6619653af0 100644
--- a/lib/colvars/Makefile.deps
+++ b/lib/colvars/Makefile.deps
@@ -5,327 +5,367 @@ $(COLVARS_OBJ_DIR)colvaratoms.o: colvaratoms.cpp colvarmodule.h \
  colvaratoms.h colvardeps.h
 $(COLVARS_OBJ_DIR)colvarbias_abf.o: colvarbias_abf.cpp colvarmodule.h \
  colvars_version.h colvar.h colvarvalue.h colvartypes.h colvarparse.h \
- colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarbias_abf.h colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
- colvarbias.h colvargrid.h colvar_UIestimator.h
+ colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarbias_abf.h colvarproxy.h \
+ colvarproxy_tcl.h colvarproxy_volmaps.h colvarbias.h colvargrid.h \
+ colvar_UIestimator.h
 $(COLVARS_OBJ_DIR)colvarbias_alb.o: colvarbias_alb.cpp colvarmodule.h \
  colvars_version.h colvarbias.h colvar.h colvarvalue.h colvartypes.h \
- colvarparse.h colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarbias_alb.h
+ colvarparse.h colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarbias_alb.h
 $(COLVARS_OBJ_DIR)colvarbias.o: colvarbias.cpp colvarmodule.h \
  colvars_version.h colvarproxy.h colvartypes.h colvarvalue.h \
  colvarproxy_tcl.h colvarproxy_volmaps.h colvarbias.h colvar.h \
- colvarparse.h colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvargrid.h
+ colvarparse.h colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvargrid.h
 $(COLVARS_OBJ_DIR)colvarbias_histogram.o: colvarbias_histogram.cpp \
  colvarmodule.h colvars_version.h colvarproxy.h colvartypes.h \
  colvarvalue.h colvarproxy_tcl.h colvarproxy_volmaps.h colvar.h \
- colvarparse.h colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarbias_histogram.h colvarbias.h colvargrid.h
+ colvarparse.h colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarbias_histogram.h colvarbias.h \
+ colvargrid.h
 $(COLVARS_OBJ_DIR)colvarbias_histogram_reweight_amd.o: \
  colvarbias_histogram_reweight_amd.cpp \
  colvarbias_histogram_reweight_amd.h colvarbias_histogram.h colvarbias.h \
  colvar.h colvarmodule.h colvars_version.h colvarvalue.h colvartypes.h \
- colvarparse.h colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvargrid.h colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h
+ colvarparse.h colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvargrid.h colvarproxy.h \
+ colvarproxy_tcl.h colvarproxy_volmaps.h
 $(COLVARS_OBJ_DIR)colvarbias_meta.o: colvarbias_meta.cpp colvarmodule.h \
  colvars_version.h colvarproxy.h colvartypes.h colvarvalue.h \
  colvarproxy_tcl.h colvarproxy_volmaps.h colvar.h colvarparse.h \
- colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarbias_meta.h colvarbias.h colvargrid.h
+ colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarbias_meta.h colvarbias.h \
+ colvargrid.h
 $(COLVARS_OBJ_DIR)colvarbias_restraint.o: colvarbias_restraint.cpp \
  colvarmodule.h colvars_version.h colvarproxy.h colvartypes.h \
  colvarvalue.h colvarproxy_tcl.h colvarproxy_volmaps.h \
  colvarbias_restraint.h colvarbias.h colvar.h colvarparse.h \
- colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h
+ colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h
 $(COLVARS_OBJ_DIR)colvarcomp_alchlambda.o: colvarcomp_alchlambda.cpp \
  colvarmodule.h colvars_version.h colvarvalue.h colvartypes.h \
  colvarparse.h colvarparams.h colvar.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_angles.o: colvarcomp_angles.cpp \
  colvarmodule.h colvars_version.h colvar.h colvarvalue.h colvartypes.h \
- colvarparse.h colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ colvarparse.h colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_apath.o: colvarcomp_apath.cpp colvarmodule.h \
  colvars_version.h colvarvalue.h colvartypes.h colvarparse.h \
- colvarparams.h colvar.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ colvarparams.h colvar.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_coordnums.o: colvarcomp_coordnums.cpp \
  colvarmodule.h colvars_version.h colvarparse.h colvarvalue.h \
  colvartypes.h colvarparams.h colvaratoms.h colvarproxy.h \
  colvarproxy_tcl.h colvarproxy_volmaps.h colvardeps.h colvar.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvar_arithmeticpath.h \
+ colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp.o: colvarcomp.cpp colvarmodule.h \
  colvars_version.h colvarvalue.h colvartypes.h colvar.h colvarparse.h \
- colvarparams.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ colvarparams.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_distances.o: colvarcomp_distances.cpp \
  colvarmodule.h colvars_version.h colvarvalue.h colvartypes.h \
  colvarparse.h colvarparams.h colvar.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_gpath.o: colvarcomp_gpath.cpp colvarmodule.h \
  colvars_version.h colvarvalue.h colvartypes.h colvarparse.h \
- colvarparams.h colvar.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ colvarparams.h colvar.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_neuralnetwork.o: \
  colvarcomp_neuralnetwork.cpp colvarmodule.h colvars_version.h \
  colvarvalue.h colvartypes.h colvarparse.h colvarparams.h colvar.h \
- colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h \
+ colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h \
  colvar_neuralnetworkcompute.h
 $(COLVARS_OBJ_DIR)colvarcomp_combination.o: colvarcomp_combination.cpp \
  colvarcomp.h colvarmodule.h colvars_version.h colvar.h colvarvalue.h \
  colvartypes.h colvarparse.h colvarparams.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvaratoms.h colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
- colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvaratoms.h colvarproxy.h \
+ colvarproxy_tcl.h colvarproxy_volmaps.h colvar_arithmeticpath.h \
+ colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_protein.o: colvarcomp_protein.cpp \
  colvarmodule.h colvars_version.h colvarvalue.h colvartypes.h \
  colvarparse.h colvarparams.h colvar.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_rotations.o: colvarcomp_rotations.cpp \
  colvarmodule.h colvars_version.h colvarvalue.h colvartypes.h \
  colvarparse.h colvarparams.h colvar.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvarcomp_volmaps.o: colvarcomp_volmaps.cpp \
  colvarmodule.h colvars_version.h colvarvalue.h colvartypes.h \
  colvarparse.h colvarparams.h colvar.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvar.o: colvar.cpp colvarmodule.h colvars_version.h \
  colvarvalue.h colvartypes.h colvarparse.h colvarparams.h colvar.h \
- colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h \
- colvarscript.h colvarbias.h colvarscript_commands.h \
- colvarscript_commands_colvar.h colvarscript_commands_bias.h
+ colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h colvarscript.h \
+ colvarbias.h colvarscript_commands.h colvarscript_commands_colvar.h \
+ colvarscript_commands_bias.h
 $(COLVARS_OBJ_DIR)colvardeps.o: colvardeps.cpp colvarmodule.h \
  colvars_version.h colvarproxy.h colvartypes.h colvarvalue.h \
  colvarproxy_tcl.h colvarproxy_volmaps.h colvardeps.h colvarparse.h \
  colvarparams.h
 $(COLVARS_OBJ_DIR)colvargrid.o: colvargrid.cpp colvarmodule.h \
  colvars_version.h colvarvalue.h colvartypes.h colvarparse.h \
- colvarparams.h colvar.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_tcl.h \
- colvarproxy_volmaps.h colvar_arithmeticpath.h colvar_geometricpath.h \
- colvargrid.h
+ colvarparams.h colvar.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarcomp.h colvaratoms.h \
+ colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h colvargrid.h
 $(COLVARS_OBJ_DIR)colvarmodule.o: colvarmodule.cpp colvarmodule.h \
  colvars_version.h colvarparse.h colvarvalue.h colvartypes.h \
  colvarparams.h colvarproxy.h colvarproxy_tcl.h colvarproxy_volmaps.h \
- colvar.h colvardeps.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarbias.h colvarbias_abf.h colvargrid.h colvar_UIestimator.h \
- colvarbias_alb.h colvarbias_histogram.h \
- colvarbias_histogram_reweight_amd.h colvarbias_meta.h \
- colvarbias_restraint.h colvarscript.h colvarscript_commands.h \
- colvarscript_commands_colvar.h colvarscript_commands_bias.h \
- colvaratoms.h colvarcomp.h colvar_arithmeticpath.h \
- colvar_geometricpath.h colvarmodule_refs.h
+ colvar.h colvardeps.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarbias.h colvarbias_abf.h \
+ colvargrid.h colvar_UIestimator.h colvarbias_alb.h \
+ colvarbias_histogram.h colvarbias_histogram_reweight_amd.h \
+ colvarbias_meta.h colvarbias_restraint.h colvarscript.h \
+ colvarscript_commands.h colvarscript_commands_colvar.h \
+ colvarscript_commands_bias.h colvaratoms.h colvarcomp.h \
+ colvar_arithmeticpath.h colvar_geometricpath.h colvarmodule_refs.h
 $(COLVARS_OBJ_DIR)colvarparams.o: colvarparams.cpp colvarmodule.h \
  colvars_version.h colvarvalue.h colvartypes.h colvarparams.h
 $(COLVARS_OBJ_DIR)colvarparse.o: colvarparse.cpp colvarmodule.h \
@@ -335,17 +375,19 @@ $(COLVARS_OBJ_DIR)colvarproxy.o: colvarproxy.cpp colvarmodule.h \
  colvars_version.h colvarproxy.h colvartypes.h colvarvalue.h \
  colvarproxy_tcl.h colvarproxy_volmaps.h colvarscript.h colvarbias.h \
  colvar.h colvarparse.h colvarparams.h colvardeps.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarscript_commands.h colvarscript_commands_colvar.h \
- colvarscript_commands_bias.h colvaratoms.h colvarmodule_utils.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarscript_commands.h \
+ colvarscript_commands_colvar.h colvarscript_commands_bias.h \
+ colvaratoms.h colvarmodule_utils.h
 $(COLVARS_OBJ_DIR)colvarproxy_replicas.o: colvarproxy_replicas.cpp \
  colvarmodule.h colvars_version.h colvarproxy.h colvartypes.h \
  colvarvalue.h colvarproxy_tcl.h colvarproxy_volmaps.h
@@ -360,64 +402,68 @@ $(COLVARS_OBJ_DIR)colvarscript.o: colvarscript.cpp colvarproxy.h \
  colvarmodule.h colvars_version.h colvartypes.h colvarvalue.h \
  colvarproxy_tcl.h colvarproxy_volmaps.h colvardeps.h colvarparse.h \
  colvarparams.h colvarscript.h colvarbias.h colvar.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarscript_commands.h colvarscript_commands_colvar.h \
- colvarscript_commands_bias.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarscript_commands.h \
+ colvarscript_commands_colvar.h colvarscript_commands_bias.h
 $(COLVARS_OBJ_DIR)colvarscript_commands.o: colvarscript_commands.cpp \
  colvarproxy.h colvarmodule.h colvars_version.h colvartypes.h \
  colvarvalue.h colvarproxy_tcl.h colvarproxy_volmaps.h colvardeps.h \
  colvarparse.h colvarparams.h colvarscript.h colvarbias.h colvar.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarscript_commands.h colvarscript_commands_colvar.h \
- colvarscript_commands_bias.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarscript_commands.h \
+ colvarscript_commands_colvar.h colvarscript_commands_bias.h
 $(COLVARS_OBJ_DIR)colvarscript_commands_bias.o: \
  colvarscript_commands_bias.cpp colvarproxy.h colvarmodule.h \
  colvars_version.h colvartypes.h colvarvalue.h colvarproxy_tcl.h \
  colvarproxy_volmaps.h colvardeps.h colvarparse.h colvarparams.h \
- colvarscript.h colvarbias.h colvar.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarscript_commands.h colvarscript_commands_colvar.h \
- colvarscript_commands_bias.h
+ colvarscript.h colvarbias.h colvar.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarscript_commands.h \
+ colvarscript_commands_colvar.h colvarscript_commands_bias.h
 $(COLVARS_OBJ_DIR)colvarscript_commands_colvar.o: \
  colvarscript_commands_colvar.cpp colvarproxy.h colvarmodule.h \
  colvars_version.h colvartypes.h colvarvalue.h colvarproxy_tcl.h \
  colvarproxy_volmaps.h colvardeps.h colvarparse.h colvarparams.h \
- colvarscript.h colvarbias.h colvar.h lepton/include/Lepton.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarscript_commands.h colvarscript_commands_colvar.h \
- colvarscript_commands_bias.h
+ colvarscript.h colvarbias.h colvar.h ../lepton/include/Lepton.h \
+ ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarscript_commands.h \
+ colvarscript_commands_colvar.h colvarscript_commands_bias.h
 $(COLVARS_OBJ_DIR)colvartypes.o: colvartypes.cpp colvarmodule.h \
  colvars_version.h colvartypes.h colvarparse.h colvarvalue.h \
  colvarparams.h ../../src/math_eigen_impl.h
@@ -425,14 +471,15 @@ $(COLVARS_OBJ_DIR)colvarvalue.o: colvarvalue.cpp colvarmodule.h \
  colvars_version.h colvarvalue.h colvartypes.h
 $(COLVARS_OBJ_DIR)colvar_neuralnetworkcompute.o: \
  colvar_neuralnetworkcompute.cpp colvar_neuralnetworkcompute.h \
- lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
- colvarparse.h colvarmodule.h colvars_version.h colvarvalue.h \
- colvartypes.h colvarparams.h
+ ../lepton/include/Lepton.h ../lepton/include/lepton/CompiledExpression.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/windowsIncludes.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/ExpressionProgram.h \
+ ../lepton/include/lepton/ExpressionTreeNode.h \
+ ../lepton/include/lepton/Operation.h \
+ ../lepton/include/lepton/CustomFunction.h \
+ ../lepton/include/lepton/Exception.h \
+ ../lepton/include/lepton/ParsedExpression.h \
+ ../lepton/include/lepton/Parser.h colvarparse.h colvarmodule.h \
+ colvars_version.h colvarvalue.h colvartypes.h colvarparams.h
diff --git a/lib/colvars/Makefile.lepton.deps b/lib/colvars/Makefile.lepton.deps
deleted file mode 100644
index 4546339de6..0000000000
--- a/lib/colvars/Makefile.lepton.deps
+++ /dev/null
@@ -1,50 +0,0 @@
-
-$(COLVARS_OBJ_DIR)CompiledExpression.o: lepton/src/CompiledExpression.cpp \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h
-$(COLVARS_OBJ_DIR)CompiledVectorExpression.o: \
- lepton/src/CompiledVectorExpression.cpp \
- lepton/include/lepton/CompiledVectorExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h
-$(COLVARS_OBJ_DIR)ExpressionProgram.o: lepton/src/ExpressionProgram.cpp \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h
-$(COLVARS_OBJ_DIR)ExpressionTreeNode.o: lepton/src/ExpressionTreeNode.cpp \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/Exception.h lepton/include/lepton/Operation.h \
- lepton/include/lepton/CustomFunction.h lepton/include/lepton/Exception.h
-$(COLVARS_OBJ_DIR)Operation.o: lepton/src/Operation.cpp \
- lepton/include/lepton/Operation.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h lepton/include/lepton/Exception.h \
- lepton/include/lepton/ExpressionTreeNode.h lepton/src/MSVC_erfc.h
-$(COLVARS_OBJ_DIR)ParsedExpression.o: lepton/src/ParsedExpression.cpp \
- lepton/include/lepton/ParsedExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CompiledExpression.h \
- lepton/include/lepton/CompiledVectorExpression.h \
- lepton/include/lepton/ExpressionProgram.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h
-$(COLVARS_OBJ_DIR)Parser.o: lepton/src/Parser.cpp \
- lepton/include/lepton/Parser.h lepton/include/lepton/windowsIncludes.h \
- lepton/include/lepton/CustomFunction.h lepton/include/lepton/Exception.h \
- lepton/include/lepton/ExpressionTreeNode.h \
- lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
- lepton/include/lepton/Exception.h \
- lepton/include/lepton/ParsedExpression.h \
- lepton/include/lepton/ExpressionTreeNode.h
diff --git a/lib/colvars/README b/lib/colvars/README
index eeba557ce3..cd1e08b6de 100644
--- a/lib/colvars/README
+++ b/lib/colvars/README
@@ -35,39 +35,32 @@ The reference article is:
 The Colvars library can be built for the most part with all major versions of
 the C++ language.
 
-A few of the most recent features require C++11 support.  In particular, the
-library is optionally built together with the
-"Lepton"_https://simtk.org/projects/lepton library, a copy of which is also
-included in the LAMMPS distribution.  Lepton implements the
-"customFunction"_http://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html#colvar|customFunction
-feature, and requires C++11 support.
-
-See "here"_https://colvars.github.io/README-c++11.html for a detailed list of
-C++11-only features.
-
+A few of the most recent features require C++11 support, which is also required
+by LAMMPS, so no additional notes are needed.
 
 ## How to build (CMake)
 
 This is the recommended build recipe: no additional settings are normally
 needed besides "-D PKG_COLVARS=yes".
 
-Building and linking of Lepton (or other C++11-only features) is enabled
-automatically when compilation is carried out with C++11 support, and disabled
-otherwise.  Optionally, Lepton build may be manually controlled with the flag
-"-D COLVARS_LEPTON=yes|no".
-
+Linking to the Lepton library, which is also used by the LEPTON LAMMPS package,
+is enabled automatically.  Optionally, support for Lepton within Colvars may
+be manually controlled with the￼CMake setting "-D COLVARS_LEPTON=yes|no".
 
 ## How to build (traditional make)
 
-Before building LAMMPS, one must build the Colvars library in lib/colvars.
+Before building LAMMPS, one must build the Colvars library in lib/colvars
+and the Lepton library in lib/lepton.  For building Lepton please see the
+README.md file in the lib/lepton folder.
 
-This can be done manually in the same folder by using or adapting one of the
-provided Makefiles: for example, Makefile.g++ for the GNU compiler.
+Building the Colvars library can be done manually in the respective
+folders by using or adapting one of the provided Makefiles: for example,
+Makefile.g++ for the GNU compiler.
 
 In general, it is safer to use build setting consistent with the rest of
 LAMMPS.  This is best carried out from the LAMMPS src directory using a
-command like these, which simply invoke the lib/colvars/Install.py script with
-the specified args:
+command like these, which simply invoke the lib/colvars/Install.py script
+with the specified args:
 
 make lib-colvars                      # print help message
 make lib-colvars args="-m serial"     # build with GNU g++ compiler (settings as with "make serial")
diff --git a/src/COLVARS/Install.sh b/src/COLVARS/Install.sh
index ebabfd7a1e..6b9b9febe0 100755
--- a/src/COLVARS/Install.sh
+++ b/src/COLVARS/Install.sh
@@ -38,12 +38,12 @@ if (test $1 = 1) then
 
   if (test -e ../Makefile.package) then
     sed -i -e 's/[^ \t]*colvars[^ \t]* //g' ../Makefile.package
-    sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/colvars |' ../Makefile.package
-    sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/colvars$(LIBOBJDIR) |' ../Makefile.package
-    sed -i -e 's|^PKG_LIB =[ \t]*|&-lcolvars |' ../Makefile.package
-    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(colvars_SYSINC) |' ../Makefile.package
-    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(colvars_SYSLIB) |' ../Makefile.package
-    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(colvars_SYSPATH) |' ../Makefile.package
+    sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/colvars -I..\/..\/lib\/lepton\/include |' ../Makefile.package
+    sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/colvars$(LIBOBJDIR) -L..\/..\/lib\/lepton$(LIBOBJDIR) |' ../Makefile.package
+    sed -i -e 's|^PKG_LIB =[ \t]*|&-lcolvars -llepton|' ../Makefile.package
+    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(colvars_SYSINC) $(lepton_SYSINC) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(colvars_SYSLIB) $(lepton_SYSLIB) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(colvars_SYSPATH) $(lepton_SYSPATH) |' ../Makefile.package
   fi
 
   if (test -e ../Makefile.package.settings) then
@@ -51,6 +51,12 @@ if (test $1 = 1) then
     # multiline form needed for BSD sed on Macs
     sed -i -e '4 i \
 include ..\/..\/lib\/colvars\/Makefile.lammps
+' ../Makefile.package.settings
+
+    sed -i -e '/^[ \t]*include.*lepton.*$/d' ../Makefile.package.settings
+    # multiline form needed for BSD sed on Macs
+    sed -i -e '4 i \
+include ..\/..\/lib\/lepton\/Makefile.lammps
 ' ../Makefile.package.settings
 
   fi

From 973dd04c872ded4de596baa290dde400a878b27d Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 11:23:18 -0500
Subject: [PATCH 44/79] update OPENMP package versions

---
 src/OPENMP/angle_lepton_omp.cpp    | 8 ++++----
 src/OPENMP/bond_lepton_omp.cpp     | 8 ++++----
 src/OPENMP/dihedral_lepton_omp.cpp | 8 ++++----
 src/OPENMP/pair_lepton_omp.cpp     | 8 ++++----
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/OPENMP/angle_lepton_omp.cpp b/src/OPENMP/angle_lepton_omp.cpp
index f9a6c6790b..7e86a9e9bb 100644
--- a/src/OPENMP/angle_lepton_omp.cpp
+++ b/src/OPENMP/angle_lepton_omp.cpp
@@ -24,7 +24,7 @@
 
 #include <cmath>
 
-#include "LMP_Lepton.h"
+#include "Lepton.h"
 #include "lepton_utils.h"
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
@@ -89,11 +89,11 @@ void AngleLeptonOMP::compute(int eflag, int vflag)
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
 void AngleLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 {
-  std::vector<LMP_Lepton::CompiledExpression> angleforce;
-  std::vector<LMP_Lepton::CompiledExpression> anglepot;
+  std::vector<Lepton::CompiledExpression> angleforce;
+  std::vector<Lepton::CompiledExpression> anglepot;
   try {
     for (const auto &expr : expressions) {
-      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
       angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression());
       if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression());
     }
diff --git a/src/OPENMP/bond_lepton_omp.cpp b/src/OPENMP/bond_lepton_omp.cpp
index 4b578ee73c..0029062366 100644
--- a/src/OPENMP/bond_lepton_omp.cpp
+++ b/src/OPENMP/bond_lepton_omp.cpp
@@ -24,7 +24,7 @@
 
 #include <cmath>
 
-#include "LMP_Lepton.h"
+#include "Lepton.h"
 #include "lepton_utils.h"
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
@@ -87,11 +87,11 @@ void BondLeptonOMP::compute(int eflag, int vflag)
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
 void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 {
-  std::vector<LMP_Lepton::CompiledExpression> bondforce;
-  std::vector<LMP_Lepton::CompiledExpression> bondpot;
+  std::vector<Lepton::CompiledExpression> bondforce;
+  std::vector<Lepton::CompiledExpression> bondpot;
   try {
     for (const auto &expr : expressions) {
-      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
       bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
       if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression());
     }
diff --git a/src/OPENMP/dihedral_lepton_omp.cpp b/src/OPENMP/dihedral_lepton_omp.cpp
index 655d0807d4..13a1328058 100644
--- a/src/OPENMP/dihedral_lepton_omp.cpp
+++ b/src/OPENMP/dihedral_lepton_omp.cpp
@@ -25,7 +25,7 @@
 
 #include <cmath>
 
-#include "LMP_Lepton.h"
+#include "Lepton.h"
 #include "lepton_utils.h"
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
@@ -92,11 +92,11 @@ void DihedralLeptonOMP::compute(int eflag, int vflag)
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
 void DihedralLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 {
-  std::vector<LMP_Lepton::CompiledExpression> dihedralforce;
-  std::vector<LMP_Lepton::CompiledExpression> dihedralpot;
+  std::vector<Lepton::CompiledExpression> dihedralforce;
+  std::vector<Lepton::CompiledExpression> dihedralpot;
   try {
     for (const auto &expr : expressions) {
-      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
       dihedralforce.emplace_back(parsed.differentiate("phi").createCompiledExpression());
       if (EFLAG) dihedralpot.emplace_back(parsed.createCompiledExpression());
     }
diff --git a/src/OPENMP/pair_lepton_omp.cpp b/src/OPENMP/pair_lepton_omp.cpp
index ad10163747..2c96b63a7f 100644
--- a/src/OPENMP/pair_lepton_omp.cpp
+++ b/src/OPENMP/pair_lepton_omp.cpp
@@ -22,7 +22,7 @@
 
 #include <cmath>
 
-#include "LMP_Lepton.h"
+#include "Lepton.h"
 #include "lepton_utils.h"
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
@@ -94,11 +94,11 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr)
   const int *const *const firstneigh = list->firstneigh;
   double fxtmp, fytmp, fztmp;
 
-  std::vector<LMP_Lepton::CompiledExpression> pairforce;
-  std::vector<LMP_Lepton::CompiledExpression> pairpot;
+  std::vector<Lepton::CompiledExpression> pairforce;
+  std::vector<Lepton::CompiledExpression> pairpot;
   try {
     for (const auto &expr : expressions) {
-      auto parsed = LMP_Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
       pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
       if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
     }

From faa2a9ffebf52bfd5ee3352c12286916ffcd6b92 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 13:59:34 -0500
Subject: [PATCH 45/79] remove Lepton source from lib/colvars folder

---
 lib/colvars/lepton/include/Lepton.h           |   43 -
 .../include/lepton/CompiledExpression.h       |  124 --
 .../include/lepton/CompiledVectorExpression.h |  145 --
 .../lepton/include/lepton/CustomFunction.h    |  109 --
 lib/colvars/lepton/include/lepton/Exception.h |   59 -
 .../lepton/include/lepton/ExpressionProgram.h |  103 --
 .../include/lepton/ExpressionTreeNode.h       |  111 --
 lib/colvars/lepton/include/lepton/Operation.h | 1193 -----------------
 .../lepton/include/lepton/ParsedExpression.h  |  142 --
 lib/colvars/lepton/include/lepton/Parser.h    |   77 --
 .../lepton/include/lepton/windowsIncludes.h   |   41 -
 lib/colvars/lepton/src/CompiledExpression.cpp |  812 -----------
 .../lepton/src/CompiledVectorExpression.cpp   |  933 -------------
 lib/colvars/lepton/src/ExpressionProgram.cpp  |  110 --
 lib/colvars/lepton/src/ExpressionTreeNode.cpp |  153 ---
 lib/colvars/lepton/src/MSVC_erfc.h            |   91 --
 lib/colvars/lepton/src/Operation.cpp          |  425 ------
 lib/colvars/lepton/src/ParsedExpression.cpp   |  422 ------
 lib/colvars/lepton/src/Parser.cpp             |  409 ------
 19 files changed, 5502 deletions(-)
 delete mode 100644 lib/colvars/lepton/include/Lepton.h
 delete mode 100644 lib/colvars/lepton/include/lepton/CompiledExpression.h
 delete mode 100644 lib/colvars/lepton/include/lepton/CompiledVectorExpression.h
 delete mode 100644 lib/colvars/lepton/include/lepton/CustomFunction.h
 delete mode 100644 lib/colvars/lepton/include/lepton/Exception.h
 delete mode 100644 lib/colvars/lepton/include/lepton/ExpressionProgram.h
 delete mode 100644 lib/colvars/lepton/include/lepton/ExpressionTreeNode.h
 delete mode 100644 lib/colvars/lepton/include/lepton/Operation.h
 delete mode 100644 lib/colvars/lepton/include/lepton/ParsedExpression.h
 delete mode 100644 lib/colvars/lepton/include/lepton/Parser.h
 delete mode 100644 lib/colvars/lepton/include/lepton/windowsIncludes.h
 delete mode 100644 lib/colvars/lepton/src/CompiledExpression.cpp
 delete mode 100644 lib/colvars/lepton/src/CompiledVectorExpression.cpp
 delete mode 100644 lib/colvars/lepton/src/ExpressionProgram.cpp
 delete mode 100644 lib/colvars/lepton/src/ExpressionTreeNode.cpp
 delete mode 100644 lib/colvars/lepton/src/MSVC_erfc.h
 delete mode 100644 lib/colvars/lepton/src/Operation.cpp
 delete mode 100644 lib/colvars/lepton/src/ParsedExpression.cpp
 delete mode 100644 lib/colvars/lepton/src/Parser.cpp

diff --git a/lib/colvars/lepton/include/Lepton.h b/lib/colvars/lepton/include/Lepton.h
deleted file mode 100644
index 22edcb3ff9..0000000000
--- a/lib/colvars/lepton/include/Lepton.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef LEPTON_H_
-#define LEPTON_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "lepton/CompiledExpression.h"
-#include "lepton/CustomFunction.h"
-#include "lepton/ExpressionProgram.h"
-#include "lepton/ExpressionTreeNode.h"
-#include "lepton/Operation.h"
-#include "lepton/ParsedExpression.h"
-#include "lepton/Parser.h"
-
-#endif /*LEPTON_H_*/
diff --git a/lib/colvars/lepton/include/lepton/CompiledExpression.h b/lib/colvars/lepton/include/lepton/CompiledExpression.h
deleted file mode 100644
index 82d66d5c6a..0000000000
--- a/lib/colvars/lepton/include/lepton/CompiledExpression.h
+++ /dev/null
@@ -1,124 +0,0 @@
-#ifndef LEPTON_COMPILED_EXPRESSION_H_
-#define LEPTON_COMPILED_EXPRESSION_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2013-2022 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "ExpressionTreeNode.h"
-#include "windowsIncludes.h"
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-#ifdef LEPTON_USE_JIT
-#if defined(__ARM__) || defined(__ARM64__)
-#include "asmjit/a64.h"
-#else
-#include "asmjit/x86.h"
-#endif
-#endif
-
-namespace Lepton {
-
-class Operation;
-class ParsedExpression;
-
-/**
- * A CompiledExpression is a highly optimized representation of an expression for cases when you want to evaluate
- * it many times as quickly as possible.  You should treat it as an opaque object; none of the internal representation
- * is visible.
- *
- * A CompiledExpression is created by calling createCompiledExpression() on a ParsedExpression.
- *
- * WARNING: CompiledExpression is NOT thread safe.  You should never access a CompiledExpression from two threads at
- * the same time.
- */
-
-class LEPTON_EXPORT CompiledExpression {
-public:
-    CompiledExpression();
-    CompiledExpression(const CompiledExpression& expression);
-    ~CompiledExpression();
-    CompiledExpression& operator=(const CompiledExpression& expression);
-    /**
-     * Get the names of all variables used by this expression.
-     */
-    const std::set<std::string>& getVariables() const;
-    /**
-     * Get a reference to the memory location where the value of a particular variable is stored.  This can be used
-     * to set the value of the variable before calling evaluate().
-     */
-    double& getVariableReference(const std::string& name);
-    /**
-     * You can optionally specify the memory locations from which the values of variables should be read.
-     * This is useful, for example, when several expressions all use the same variable.  You can then set
-     * the value of that variable in one place, and it will be seen by all of them.
-     */
-    void setVariableLocations(std::map<std::string, double*>& variableLocations);
-    /**
-     * Evaluate the expression.  The values of all variables should have been set before calling this.
-     */
-    double evaluate() const;
-private:
-    friend class ParsedExpression;
-    CompiledExpression(const ParsedExpression& expression);
-    void compileExpression(const ExpressionTreeNode& node, std::vector<std::pair<ExpressionTreeNode, int> >& temps);
-    int findTempIndex(const ExpressionTreeNode& node, std::vector<std::pair<ExpressionTreeNode, int> >& temps);
-    std::map<std::string, double*> variablePointers;
-    std::vector<std::pair<double*, double*> > variablesToCopy;
-    std::vector<std::vector<int> > arguments;
-    std::vector<int> target;
-    std::vector<Operation*> operation;
-    std::map<std::string, int> variableIndices;
-    std::set<std::string> variableNames;
-    mutable std::vector<double> workspace;
-    mutable std::vector<double> argValues;
-    std::map<std::string, double> dummyVariables;
-    double (*jitCode)();
-#ifdef LEPTON_USE_JIT
-    void findPowerGroups(std::vector<std::vector<int> >& groups, std::vector<std::vector<int> >& groupPowers, std::vector<int>& stepGroup);
-    void generateJitCode();
-#if defined(__ARM__) || defined(__ARM64__)
-    void generateSingleArgCall(asmjit::a64::Compiler& c, asmjit::arm::Vec& dest, asmjit::arm::Vec& arg, double (*function)(double));
-    void generateTwoArgCall(asmjit::a64::Compiler& c, asmjit::arm::Vec& dest, asmjit::arm::Vec& arg1, asmjit::arm::Vec& arg2, double (*function)(double, double));
-#else
-    void generateSingleArgCall(asmjit::x86::Compiler& c, asmjit::x86::Xmm& dest, asmjit::x86::Xmm& arg, double (*function)(double));
-    void generateTwoArgCall(asmjit::x86::Compiler& c, asmjit::x86::Xmm& dest, asmjit::x86::Xmm& arg1, asmjit::x86::Xmm& arg2, double (*function)(double, double));
-#endif
-    std::vector<double> constants;
-    asmjit::JitRuntime runtime;
-#endif
-};
-
-} // namespace Lepton
-
-#endif /*LEPTON_COMPILED_EXPRESSION_H_*/
diff --git a/lib/colvars/lepton/include/lepton/CompiledVectorExpression.h b/lib/colvars/lepton/include/lepton/CompiledVectorExpression.h
deleted file mode 100644
index ea3586f1b0..0000000000
--- a/lib/colvars/lepton/include/lepton/CompiledVectorExpression.h
+++ /dev/null
@@ -1,145 +0,0 @@
-#ifndef LEPTON_VECTOR_EXPRESSION_H_
-#define LEPTON_VECTOR_EXPRESSION_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2013-2022 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "ExpressionTreeNode.h"
-#include "windowsIncludes.h"
-#include <array>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-#ifdef LEPTON_USE_JIT
-#if defined(__ARM__) || defined(__ARM64__)
-#include "asmjit/a64.h"
-#else
-#include "asmjit/x86.h"
-#endif
-#endif
-
-namespace Lepton {
-
-class Operation;
-class ParsedExpression;
-
-/**
- * A CompiledVectorExpression is a highly optimized representation of an expression for cases when you want to evaluate
- * it many times as quickly as possible.  It is similar to CompiledExpression, with the extra feature that it uses the CPU's
- * vector unit (AVX on x86, NEON on ARM) to evaluate the expression for multiple sets of arguments at once.  It also differs
- * from CompiledExpression and ParsedExpression in using single precision rather than double precision to evaluate the expression.
- * You should treat it as an opaque object; none of the internal representation is visible.
- *
- * A CompiledVectorExpression is created by calling createCompiledVectorExpression() on a ParsedExpression.  When you create
- * it, you must specify the width of the vectors on which to compute the expression.  The allowed widths depend on the type of
- * CPU it is running on.  4 is always allowed, and 8 is allowed on x86 processors with AVX.  Call getAllowedWidths() to query
- * the allowed values.
- *
- * WARNING: CompiledVectorExpression is NOT thread safe.  You should never access a CompiledVectorExpression from two threads at
- * the same time.
- */
-
-class LEPTON_EXPORT CompiledVectorExpression {
-public:
-    CompiledVectorExpression();
-    CompiledVectorExpression(const CompiledVectorExpression& expression);
-    ~CompiledVectorExpression();
-    CompiledVectorExpression& operator=(const CompiledVectorExpression& expression);
-    /**
-     * Get the width of the vectors on which the expression is computed.
-     */
-    int getWidth() const;
-    /**
-     * Get the names of all variables used by this expression.
-     */
-    const std::set<std::string>& getVariables() const;
-    /**
-     * Get a pointer to the memory location where the value of a particular variable is stored.  This can be used
-     * to set the value of the variable before calling evaluate().
-     *
-     * @param name    the name of the variable to query
-     * @return a pointer to N floating point values, where N is the vector width
-     */
-    float* getVariablePointer(const std::string& name);
-    /**
-     * You can optionally specify the memory locations from which the values of variables should be read.
-     * This is useful, for example, when several expressions all use the same variable.  You can then set
-     * the value of that variable in one place, and it will be seen by all of them.  The location should
-     * be a pointer to N floating point values, where N is the vector width.
-     */
-    void setVariableLocations(std::map<std::string, float*>& variableLocations);
-    /**
-     * Evaluate the expression.  The values of all variables should have been set before calling this.
-     *
-     * @return a pointer to N floating point values, where N is the vector width
-     */
-    const float* evaluate() const;
-    /**
-     * Get the list of vector widths that are supported on the current processor.
-     */
-    static const std::vector<int>& getAllowedWidths();
-private:
-    friend class ParsedExpression;
-    CompiledVectorExpression(const ParsedExpression& expression, int width);
-    void compileExpression(const ExpressionTreeNode& node, std::vector<std::pair<ExpressionTreeNode, int> >& temps, int& workspaceSize);
-    int findTempIndex(const ExpressionTreeNode& node, std::vector<std::pair<ExpressionTreeNode, int> >& temps);
-    int width;
-    std::map<std::string, float*> variablePointers;
-    std::vector<std::pair<float*, float*> > variablesToCopy;
-    std::vector<std::vector<int> > arguments;
-    std::vector<int> target;
-    std::vector<Operation*> operation;
-    std::map<std::string, int> variableIndices;
-    std::set<std::string> variableNames;
-    mutable std::vector<float> workspace;
-    mutable std::vector<double> argValues;
-    std::map<std::string, double> dummyVariables;
-    void (*jitCode)();
-#ifdef LEPTON_USE_JIT
-    void findPowerGroups(std::vector<std::vector<int> >& groups, std::vector<std::vector<int> >& groupPowers, std::vector<int>& stepGroup);
-    void generateJitCode();
-#if defined(__ARM__) || defined(__ARM64__)
-    void generateSingleArgCall(asmjit::a64::Compiler& c, asmjit::arm::Vec& dest, asmjit::arm::Vec& arg, float (*function)(float));
-    void generateTwoArgCall(asmjit::a64::Compiler& c, asmjit::arm::Vec& dest, asmjit::arm::Vec& arg1, asmjit::arm::Vec& arg2, float (*function)(float, float));
-#else
-    void generateSingleArgCall(asmjit::x86::Compiler& c, asmjit::x86::Ymm& dest, asmjit::x86::Ymm& arg, float (*function)(float));
-    void generateTwoArgCall(asmjit::x86::Compiler& c, asmjit::x86::Ymm& dest, asmjit::x86::Ymm& arg1, asmjit::x86::Ymm& arg2, float (*function)(float, float));
-#endif
-    std::vector<float> constants;
-    asmjit::JitRuntime runtime;
-#endif
-};
-
-} // namespace Lepton
-
-#endif /*LEPTON_VECTOR_EXPRESSION_H_*/
diff --git a/lib/colvars/lepton/include/lepton/CustomFunction.h b/lib/colvars/lepton/include/lepton/CustomFunction.h
deleted file mode 100644
index fbb0ddd52a..0000000000
--- a/lib/colvars/lepton/include/lepton/CustomFunction.h
+++ /dev/null
@@ -1,109 +0,0 @@
-#ifndef LEPTON_CUSTOM_FUNCTION_H_
-#define LEPTON_CUSTOM_FUNCTION_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "windowsIncludes.h"
-
-namespace Lepton {
-
-/**
- * This class is the interface for defining your own function that may be included in expressions.
- * To use it, create a concrete subclass that implements all of the virtual methods for each new function
- * you want to define.  Then when you call Parser::parse() to parse an expression, pass a map of
- * function names to CustomFunction objects.
- */
-
-class LEPTON_EXPORT CustomFunction {
-public:
-    virtual ~CustomFunction() {
-    }
-    /**
-     * Get the number of arguments this function expects.
-     */
-    virtual int getNumArguments() const = 0;
-    /**
-     * Evaluate the function.
-     *
-     * @param arguments    the array of argument values
-     */
-    virtual double evaluate(const double* arguments) const = 0;
-    /**
-     * Evaluate a derivative of the function.
-     *
-     * @param arguments    the array of argument values
-     * @param derivOrder   an array specifying the number of times the function has been differentiated
-     *                     with respect to each of its arguments.  For example, the array {0, 2} indicates
-     *                     a second derivative with respect to the second argument.
-     */
-    virtual double evaluateDerivative(const double* arguments, const int* derivOrder) const = 0;
-    /**
-     * Create a new duplicate of this object on the heap using the "new" operator.
-     */
-    virtual CustomFunction* clone() const = 0;
-};
-
-/**
- * This class is an implementation of CustomFunction that does no computation.  It just returns
- * 0 for the value and derivatives.  This is useful when using the parser to analyze expressions
- * rather than to evaluate them.  You can just create PlaceholderFunctions to represent any custom
- * functions that may appear in expressions.
- */
-
-class LEPTON_EXPORT PlaceholderFunction : public CustomFunction {
-public:
-    /**
-     * Create a Placeholder function.
-     *
-     * @param numArgs    the number of arguments the function expects
-     */
-    PlaceholderFunction(int numArgs) : numArgs(numArgs) {
-    }
-    int getNumArguments() const {
-        return numArgs;
-    }
-    double evaluate(const double* arguments) const {
-        return 0.0;
-    }
-    double evaluateDerivative(const double* arguments, const int* derivOrder) const {
-        return 0.0;
-    }
-    CustomFunction* clone() const {
-        return new PlaceholderFunction(numArgs);
-    };
-private:
-    int numArgs;
-};
-
-} // namespace Lepton
-
-#endif /*LEPTON_CUSTOM_FUNCTION_H_*/
diff --git a/lib/colvars/lepton/include/lepton/Exception.h b/lib/colvars/lepton/include/lepton/Exception.h
deleted file mode 100644
index 5ad55714d1..0000000000
--- a/lib/colvars/lepton/include/lepton/Exception.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef LEPTON_EXCEPTION_H_
-#define LEPTON_EXCEPTION_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include <exception>
-#include <string>
-
-namespace Lepton {
-
-/**
- * This class is used for all exceptions thrown by Lepton.
- */
-
-class Exception : public std::exception {
-public:
-    Exception(const std::string& message) : message(message) {
-    }
-    ~Exception() throw() {
-    }
-    const char* what() const throw() {
-        return message.c_str();
-    }
-private:
-    std::string message;
-};
-
-} // namespace Lepton
-
-#endif /*LEPTON_EXCEPTION_H_*/
diff --git a/lib/colvars/lepton/include/lepton/ExpressionProgram.h b/lib/colvars/lepton/include/lepton/ExpressionProgram.h
deleted file mode 100644
index a49a9094d0..0000000000
--- a/lib/colvars/lepton/include/lepton/ExpressionProgram.h
+++ /dev/null
@@ -1,103 +0,0 @@
-#ifndef LEPTON_EXPRESSION_PROGRAM_H_
-#define LEPTON_EXPRESSION_PROGRAM_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009-2018 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "ExpressionTreeNode.h"
-#include "windowsIncludes.h"
-#include <map>
-#include <string>
-#include <vector>
-
-namespace Lepton {
-
-class ParsedExpression;
-
-/**
- * An ExpressionProgram is a linear sequence of Operations for evaluating an expression.  The evaluation
- * is done with a stack.  The arguments to each Operation are first taken off the stack in order, then it is
- * evaluated and the result is pushed back onto the stack.  At the end, the stack contains a single value,
- * which is the value of the expression.
- *
- * An ExpressionProgram is created by calling createProgram() on a ParsedExpression.
- */
-
-class LEPTON_EXPORT ExpressionProgram {
-public:
-    ExpressionProgram();
-    ExpressionProgram(const ExpressionProgram& program);
-    ~ExpressionProgram();
-    ExpressionProgram& operator=(const ExpressionProgram& program);
-    /**
-     * Get the number of Operations that make up this program.
-     */
-    int getNumOperations() const;
-    /**
-     * Get an Operation in this program.
-     */
-    const Operation& getOperation(int index) const;
-    /**
-     * Change an Operation in this program.
-     *
-     * The Operation must have been allocated on the heap with the "new" operator.
-     * The ExpressionProgram assumes ownership of it and will delete it when it
-     * is no longer needed.
-     */
-    void setOperation(int index, Operation* operation);
-    /**
-     * Get the size of the stack needed to execute this program.  This is the largest number of elements present
-     * on the stack at any point during evaluation.
-     */
-    int getStackSize() const;
-    /**
-     * Evaluate the expression.  If the expression involves any variables, this method will throw an exception.
-     */
-    double evaluate() const;
-    /**
-     * Evaluate the expression.
-     *
-     * @param variables    a map specifying the values of all variables that appear in the expression.  If any
-     *                     variable appears in the expression but is not included in this map, an exception
-     *                     will be thrown.
-     */
-    double evaluate(const std::map<std::string, double>& variables) const;
-private:
-    friend class ParsedExpression;
-    ExpressionProgram(const ParsedExpression& expression);
-    void buildProgram(const ExpressionTreeNode& node);
-    std::vector<Operation*> operations;
-    int maxArgs, stackSize;
-};
-
-} // namespace Lepton
-
-#endif /*LEPTON_EXPRESSION_PROGRAM_H_*/
diff --git a/lib/colvars/lepton/include/lepton/ExpressionTreeNode.h b/lib/colvars/lepton/include/lepton/ExpressionTreeNode.h
deleted file mode 100644
index dde26103cb..0000000000
--- a/lib/colvars/lepton/include/lepton/ExpressionTreeNode.h
+++ /dev/null
@@ -1,111 +0,0 @@
-#ifndef LEPTON_EXPRESSION_TREE_NODE_H_
-#define LEPTON_EXPRESSION_TREE_NODE_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009-2021 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "windowsIncludes.h"
-#include <string>
-#include <vector>
-
-namespace Lepton {
-
-class Operation;
-class ParsedExpression;
-
-/**
- * This class represents a node in the abstract syntax tree representation of an expression.
- * Each node is defined by an Operation and a set of children.  When the expression is
- * evaluated, each child is first evaluated in order, then the resulting values are passed
- * as the arguments to the Operation's evaluate() method.
- */
-
-class LEPTON_EXPORT ExpressionTreeNode {
-public:
-    /**
-     * Create a new ExpressionTreeNode.
-     *
-     * @param operation    the operation for this node.  The ExpressionTreeNode takes over ownership
-     *                     of this object, and deletes it when the node is itself deleted.
-     * @param children     the children of this node
-     */
-    ExpressionTreeNode(Operation* operation, const std::vector<ExpressionTreeNode>& children);
-    /**
-     * Create a new ExpressionTreeNode with two children.
-     *
-     * @param operation    the operation for this node.  The ExpressionTreeNode takes over ownership
-     *                     of this object, and deletes it when the node is itself deleted.
-     * @param child1       the first child of this node
-     * @param child2       the second child of this node
-     */
-    ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child1, const ExpressionTreeNode& child2);
-    /**
-     * Create a new ExpressionTreeNode with one child.
-     *
-     * @param operation    the operation for this node.  The ExpressionTreeNode takes over ownership
-     *                     of this object, and deletes it when the node is itself deleted.
-     * @param child        the child of this node
-     */
-    ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child);
-    /**
-     * Create a new ExpressionTreeNode with no children.
-     *
-     * @param operation    the operation for this node.  The ExpressionTreeNode takes over ownership
-     *                     of this object, and deletes it when the node is itself deleted.
-     */
-    ExpressionTreeNode(Operation* operation);
-    ExpressionTreeNode(const ExpressionTreeNode& node);
-    ExpressionTreeNode(ExpressionTreeNode&& node);
-    ExpressionTreeNode();
-    ~ExpressionTreeNode();
-    bool operator==(const ExpressionTreeNode& node) const;
-    bool operator!=(const ExpressionTreeNode& node) const;
-    ExpressionTreeNode& operator=(const ExpressionTreeNode& node);
-    ExpressionTreeNode& operator=(ExpressionTreeNode&& node);
-    /**
-     * Get the Operation performed by this node.
-     */
-    const Operation& getOperation() const;
-    /**
-     * Get this node's child nodes.
-     */
-    const std::vector<ExpressionTreeNode>& getChildren() const;
-private:
-    friend class ParsedExpression;
-    void assignTags(std::vector<const ExpressionTreeNode*>& examples) const;
-    Operation* operation;
-    std::vector<ExpressionTreeNode> children;
-    mutable int tag;
-};
-
-} // namespace Lepton
-
-#endif /*LEPTON_EXPRESSION_TREE_NODE_H_*/
diff --git a/lib/colvars/lepton/include/lepton/Operation.h b/lib/colvars/lepton/include/lepton/Operation.h
deleted file mode 100644
index 1ddde0b8c0..0000000000
--- a/lib/colvars/lepton/include/lepton/Operation.h
+++ /dev/null
@@ -1,1193 +0,0 @@
-#ifndef LEPTON_OPERATION_H_
-#define LEPTON_OPERATION_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "windowsIncludes.h"
-#include "CustomFunction.h"
-#include "Exception.h"
-#include <cmath>
-#include <map>
-#include <string>
-#include <vector>
-#include <sstream>
-#include <algorithm>
-
-namespace Lepton {
-
-class ExpressionTreeNode;
-
-/**
- * An Operation represents a single step in the evaluation of an expression, such as a function,
- * an operator, or a constant value.  Each Operation takes some number of values as arguments
- * and produces a single value.
- *
- * This is an abstract class with subclasses for specific operations.
- */
-
-class LEPTON_EXPORT Operation {
-public:
-    virtual ~Operation() {
-    }
-    /**
-     * This enumeration lists all Operation subclasses.  This is provided so that switch statements
-     * can be used when processing or analyzing parsed expressions.
-     */
-    enum Id {CONSTANT, VARIABLE, CUSTOM, ADD, SUBTRACT, MULTIPLY, DIVIDE, POWER, NEGATE, SQRT, EXP, LOG,
-             SIN, COS, SEC, CSC, TAN, COT, ASIN, ACOS, ATAN, ATAN2, SINH, COSH, TANH, ERF, ERFC, STEP, DELTA, SQUARE, CUBE, RECIPROCAL,
-             ADD_CONSTANT, MULTIPLY_CONSTANT, POWER_CONSTANT, MIN, MAX, ABS, FLOOR, CEIL, SELECT};
-    /**
-     * Get the name of this Operation.
-     */
-    virtual std::string getName() const = 0;
-    /**
-     * Get this Operation's ID.
-     */
-    virtual Id getId() const = 0;
-    /**
-     * Get the number of arguments this operation expects.
-     */
-    virtual int getNumArguments() const = 0;
-    /**
-     * Create a clone of this Operation.
-     */
-    virtual Operation* clone() const = 0;
-    /**
-     * Perform the computation represented by this operation.
-     *
-     * @param args        the array of arguments
-     * @param variables   a map containing the values of all variables
-     * @return the result of performing the computation.
-     */
-    virtual double evaluate(double* args, const std::map<std::string, double>& variables) const = 0;
-    /**
-     * Return an ExpressionTreeNode which represents the analytic derivative of this Operation with respect to a variable.
-     *
-     * @param children     the child nodes
-     * @param childDerivs  the derivatives of the child nodes with respect to the variable
-     * @param variable     the variable with respect to which the derivate should be taken
-     */
-    virtual ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const = 0;
-    /**
-     * Get whether this operation should be displayed with infix notation.
-     */
-    virtual bool isInfixOperator() const {
-        return false;
-    }
-    /**
-     * Get whether this is a symmetric binary operation, such that exchanging its arguments
-     * does not affect the result.
-     */
-    virtual bool isSymmetric() const {
-        return false;
-    }
-    virtual bool operator!=(const Operation& op) const {
-        return op.getId() != getId();
-    }
-    virtual bool operator==(const Operation& op) const {
-        return !(*this != op);
-    }
-    class Constant;
-    class Variable;
-    class Custom;
-    class Add;
-    class Subtract;
-    class Multiply;
-    class Divide;
-    class Power;
-    class Negate;
-    class Sqrt;
-    class Exp;
-    class Log;
-    class Sin;
-    class Cos;
-    class Sec;
-    class Csc;
-    class Tan;
-    class Cot;
-    class Asin;
-    class Acos;
-    class Atan;
-    class Atan2;
-    class Sinh;
-    class Cosh;
-    class Tanh;
-    class Erf;
-    class Erfc;
-    class Step;
-    class Delta;
-    class Square;
-    class Cube;
-    class Reciprocal;
-    class AddConstant;
-    class MultiplyConstant;
-    class PowerConstant;
-    class Min;
-    class Max;
-    class Abs;
-    class Floor;
-    class Ceil;
-    class Select;
-};
-
-class LEPTON_EXPORT Operation::Constant : public Operation {
-public:
-    Constant(double value) : value(value) {
-    }
-    std::string getName() const {
-        std::stringstream name;
-        name << value;
-        return name.str();
-    }
-    Id getId() const {
-        return CONSTANT;
-    }
-    int getNumArguments() const {
-        return 0;
-    }
-    Operation* clone() const {
-        return new Constant(value);
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return value;
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-    double getValue() const {
-        return value;
-    }
-    bool operator!=(const Operation& op) const {
-        const Constant* o = dynamic_cast<const Constant*>(&op);
-        return (o == NULL || o->value != value);
-    }
-private:
-    double value;
-};
-
-class LEPTON_EXPORT Operation::Variable : public Operation {
-public:
-    Variable(const std::string& name) : name(name) {
-    }
-    std::string getName() const {
-        return name;
-    }
-    Id getId() const {
-        return VARIABLE;
-    }
-    int getNumArguments() const {
-        return 0;
-    }
-    Operation* clone() const {
-        return new Variable(name);
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        std::map<std::string, double>::const_iterator iter = variables.find(name);
-        if (iter == variables.end())
-            throw Exception("No value specified for variable "+name);
-        return iter->second;
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-    bool operator!=(const Operation& op) const {
-        const Variable* o = dynamic_cast<const Variable*>(&op);
-        return (o == NULL || o->name != name);
-    }
-private:
-    std::string name;
-};
-
-class LEPTON_EXPORT Operation::Custom : public Operation {
-public:
-    Custom(const std::string& name, CustomFunction* function) : name(name), function(function), isDerivative(false), derivOrder(function->getNumArguments(), 0) {
-    }
-    Custom(const std::string& name, CustomFunction* function, const std::vector<int>& derivOrder) : name(name), function(function), isDerivative(false), derivOrder(derivOrder) {
-        for (int order : derivOrder)
-            if (order != 0)
-                isDerivative = true;
-    }
-    Custom(const Custom& base, int derivIndex) : name(base.name), function(base.function->clone()), isDerivative(true), derivOrder(base.derivOrder) {
-        derivOrder[derivIndex]++;
-    }
-    ~Custom() {
-        delete function;
-    }
-    std::string getName() const {
-        return name;
-    }
-    Id getId() const {
-        return CUSTOM;
-    }
-    int getNumArguments() const {
-        return function->getNumArguments();
-    }
-    Operation* clone() const {
-        Custom* clone = new Custom(name, function->clone());
-        clone->isDerivative = isDerivative;
-        clone->derivOrder = derivOrder;
-        return clone;
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        if (isDerivative)
-            return function->evaluateDerivative(args, &derivOrder[0]);
-        return function->evaluate(args);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-    const std::vector<int>& getDerivOrder() const {
-        return derivOrder;
-    }
-    bool operator!=(const Operation& op) const {
-        const Custom* o = dynamic_cast<const Custom*>(&op);
-        return (o == NULL || o->name != name || o->isDerivative != isDerivative || o->derivOrder != derivOrder);
-    }
-private:
-    std::string name;
-    CustomFunction* function;
-    bool isDerivative;
-    std::vector<int> derivOrder;
-};
-
-class LEPTON_EXPORT Operation::Add : public Operation {
-public:
-    Add() {
-    }
-    std::string getName() const {
-        return "+";
-    }
-    Id getId() const {
-        return ADD;
-    }
-    int getNumArguments() const {
-        return 2;
-    }
-    Operation* clone() const {
-        return new Add();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return args[0]+args[1];
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-    bool isInfixOperator() const {
-        return true;
-    }
-    bool isSymmetric() const {
-        return true;
-    }
-};
-
-class LEPTON_EXPORT Operation::Subtract : public Operation {
-public:
-    Subtract() {
-    }
-    std::string getName() const {
-        return "-";
-    }
-    Id getId() const {
-        return SUBTRACT;
-    }
-    int getNumArguments() const {
-        return 2;
-    }
-    Operation* clone() const {
-        return new Subtract();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return args[0]-args[1];
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-    bool isInfixOperator() const {
-        return true;
-    }
-};
-
-class LEPTON_EXPORT Operation::Multiply : public Operation {
-public:
-    Multiply() {
-    }
-    std::string getName() const {
-        return "*";
-    }
-    Id getId() const {
-        return MULTIPLY;
-    }
-    int getNumArguments() const {
-        return 2;
-    }
-    Operation* clone() const {
-        return new Multiply();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return args[0]*args[1];
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-    bool isInfixOperator() const {
-        return true;
-    }
-    bool isSymmetric() const {
-        return true;
-    }
-};
-
-class LEPTON_EXPORT Operation::Divide : public Operation {
-public:
-    Divide() {
-    }
-    std::string getName() const {
-        return "/";
-    }
-    Id getId() const {
-        return DIVIDE;
-    }
-    int getNumArguments() const {
-        return 2;
-    }
-    Operation* clone() const {
-        return new Divide();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return args[0]/args[1];
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-    bool isInfixOperator() const {
-        return true;
-    }
-};
-
-class LEPTON_EXPORT Operation::Power : public Operation {
-public:
-    Power() {
-    }
-    std::string getName() const {
-        return "^";
-    }
-    Id getId() const {
-        return POWER;
-    }
-    int getNumArguments() const {
-        return 2;
-    }
-    Operation* clone() const {
-        return new Power();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::pow(args[0], args[1]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-    bool isInfixOperator() const {
-        return true;
-    }
-};
-
-class LEPTON_EXPORT Operation::Negate : public Operation {
-public:
-    Negate() {
-    }
-    std::string getName() const {
-        return "-";
-    }
-    Id getId() const {
-        return NEGATE;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Negate();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return -args[0];
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Sqrt : public Operation {
-public:
-    Sqrt() {
-    }
-    std::string getName() const {
-        return "sqrt";
-    }
-    Id getId() const {
-        return SQRT;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Sqrt();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::sqrt(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Exp : public Operation {
-public:
-    Exp() {
-    }
-    std::string getName() const {
-        return "exp";
-    }
-    Id getId() const {
-        return EXP;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Exp();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::exp(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Log : public Operation {
-public:
-    Log() {
-    }
-    std::string getName() const {
-        return "log";
-    }
-    Id getId() const {
-        return LOG;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Log();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::log(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Sin : public Operation {
-public:
-    Sin() {
-    }
-    std::string getName() const {
-        return "sin";
-    }
-    Id getId() const {
-        return SIN;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Sin();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::sin(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Cos : public Operation {
-public:
-    Cos() {
-    }
-    std::string getName() const {
-        return "cos";
-    }
-    Id getId() const {
-        return COS;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Cos();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::cos(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Sec : public Operation {
-public:
-    Sec() {
-    }
-    std::string getName() const {
-        return "sec";
-    }
-    Id getId() const {
-        return SEC;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Sec();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return 1.0/std::cos(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Csc : public Operation {
-public:
-    Csc() {
-    }
-    std::string getName() const {
-        return "csc";
-    }
-    Id getId() const {
-        return CSC;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Csc();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return 1.0/std::sin(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Tan : public Operation {
-public:
-    Tan() {
-    }
-    std::string getName() const {
-        return "tan";
-    }
-    Id getId() const {
-        return TAN;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Tan();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::tan(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Cot : public Operation {
-public:
-    Cot() {
-    }
-    std::string getName() const {
-        return "cot";
-    }
-    Id getId() const {
-        return COT;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Cot();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return 1.0/std::tan(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Asin : public Operation {
-public:
-    Asin() {
-    }
-    std::string getName() const {
-        return "asin";
-    }
-    Id getId() const {
-        return ASIN;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Asin();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::asin(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Acos : public Operation {
-public:
-    Acos() {
-    }
-    std::string getName() const {
-        return "acos";
-    }
-    Id getId() const {
-        return ACOS;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Acos();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::acos(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Atan : public Operation {
-public:
-    Atan() {
-    }
-    std::string getName() const {
-        return "atan";
-    }
-    Id getId() const {
-        return ATAN;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Atan();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::atan(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Atan2 : public Operation {
-public:
-    Atan2() {
-    }
-    std::string getName() const {
-        return "atan2";
-    }
-    Id getId() const {
-        return ATAN2;
-    }
-    int getNumArguments() const {
-        return 2;
-    }
-    Operation* clone() const {
-        return new Atan2();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::atan2(args[0], args[1]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Sinh : public Operation {
-public:
-    Sinh() {
-    }
-    std::string getName() const {
-        return "sinh";
-    }
-    Id getId() const {
-        return SINH;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Sinh();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::sinh(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Cosh : public Operation {
-public:
-    Cosh() {
-    }
-    std::string getName() const {
-        return "cosh";
-    }
-    Id getId() const {
-        return COSH;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Cosh();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::cosh(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Tanh : public Operation {
-public:
-    Tanh() {
-    }
-    std::string getName() const {
-        return "tanh";
-    }
-    Id getId() const {
-        return TANH;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Tanh();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::tanh(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Erf : public Operation {
-public:
-    Erf() {
-    }
-    std::string getName() const {
-        return "erf";
-    }
-    Id getId() const {
-        return ERF;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Erf();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const;
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Erfc : public Operation {
-public:
-    Erfc() {
-    }
-    std::string getName() const {
-        return "erfc";
-    }
-    Id getId() const {
-        return ERFC;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Erfc();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const;
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Step : public Operation {
-public:
-    Step() {
-    }
-    std::string getName() const {
-        return "step";
-    }
-    Id getId() const {
-        return STEP;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Step();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return (args[0] >= 0.0 ? 1.0 : 0.0);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Delta : public Operation {
-public:
-    Delta() {
-    }
-    std::string getName() const {
-        return "delta";
-    }
-    Id getId() const {
-        return DELTA;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Delta();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return (args[0] == 0.0 ? 1.0 : 0.0);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Square : public Operation {
-public:
-    Square() {
-    }
-    std::string getName() const {
-        return "square";
-    }
-    Id getId() const {
-        return SQUARE;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Square();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return args[0]*args[0];
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Cube : public Operation {
-public:
-    Cube() {
-    }
-    std::string getName() const {
-        return "cube";
-    }
-    Id getId() const {
-        return CUBE;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Cube();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return args[0]*args[0]*args[0];
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Reciprocal : public Operation {
-public:
-    Reciprocal() {
-    }
-    std::string getName() const {
-        return "recip";
-    }
-    Id getId() const {
-        return RECIPROCAL;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Reciprocal();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return 1.0/args[0];
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::AddConstant : public Operation {
-public:
-    AddConstant(double value) : value(value) {
-    }
-    std::string getName() const {
-        std::stringstream name;
-        name << value << "+";
-        return name.str();
-    }
-    Id getId() const {
-        return ADD_CONSTANT;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new AddConstant(value);
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return args[0]+value;
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-    double getValue() const {
-        return value;
-    }
-    bool operator!=(const Operation& op) const {
-        const AddConstant* o = dynamic_cast<const AddConstant*>(&op);
-        return (o == NULL || o->value != value);
-    }
-private:
-    double value;
-};
-
-class LEPTON_EXPORT Operation::MultiplyConstant : public Operation {
-public:
-    MultiplyConstant(double value) : value(value) {
-    }
-    std::string getName() const {
-        std::stringstream name;
-        name << value << "*";
-        return name.str();
-    }
-    Id getId() const {
-        return MULTIPLY_CONSTANT;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new MultiplyConstant(value);
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return args[0]*value;
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-    double getValue() const {
-        return value;
-    }
-    bool operator!=(const Operation& op) const {
-        const MultiplyConstant* o = dynamic_cast<const MultiplyConstant*>(&op);
-        return (o == NULL || o->value != value);
-    }
-private:
-    double value;
-};
-
-class LEPTON_EXPORT Operation::PowerConstant : public Operation {
-public:
-    PowerConstant(double value) : value(value) {
-        intValue = (int) value;
-        isIntPower = (intValue == value);
-    }
-    std::string getName() const {
-        std::stringstream name;
-        name << "^" << value;
-        return name.str();
-    }
-    Id getId() const {
-        return POWER_CONSTANT;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new PowerConstant(value);
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        if (isIntPower) {
-            // Integer powers can be computed much more quickly by repeated multiplication.
-
-            int exponent = intValue;
-            double base = args[0];
-            if (exponent < 0) {
-                exponent = -exponent;
-                base = 1.0/base;
-            }
-            double result = 1.0;
-            while (exponent != 0) {
-                if ((exponent&1) == 1)
-                    result *= base;
-                base *= base;
-                exponent = exponent>>1;
-           }
-           return result;
-        }
-        else
-        return std::pow(args[0], value);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-    double getValue() const {
-        return value;
-    }
-    bool operator!=(const Operation& op) const {
-        const PowerConstant* o = dynamic_cast<const PowerConstant*>(&op);
-        return (o == NULL || o->value != value);
-    }
-    bool isInfixOperator() const {
-        return true;
-    }
-private:
-    double value;
-    int intValue;
-    bool isIntPower;
-};
-
-class LEPTON_EXPORT Operation::Min : public Operation {
-public:
-    Min() {
-    }
-    std::string getName() const {
-        return "min";
-    }
-    Id getId() const {
-        return MIN;
-    }
-    int getNumArguments() const {
-        return 2;
-    }
-    Operation* clone() const {
-        return new Min();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        // parens around (std::min) are workaround for horrible microsoft max/min macro trouble
-        return (std::min)(args[0], args[1]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Max : public Operation {
-public:
-    Max() {
-    }
-    std::string getName() const {
-        return "max";
-    }
-    Id getId() const {
-        return MAX;
-    }
-    int getNumArguments() const {
-        return 2;
-    }
-    Operation* clone() const {
-        return new Max();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        // parens around (std::min) are workaround for horrible microsoft max/min macro trouble
-        return (std::max)(args[0], args[1]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Abs : public Operation {
-public:
-    Abs() {
-    }
-    std::string getName() const {
-        return "abs";
-    }
-    Id getId() const {
-        return ABS;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Abs();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::abs(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Floor : public Operation {
-public:
-
-    Floor() {
-    }
-    std::string getName() const {
-        return "floor";
-    }
-    Id getId() const {
-        return FLOOR;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Floor();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::floor(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Ceil : public Operation {
-public:
-    Ceil() {
-    }
-    std::string getName() const {
-        return "ceil";
-    }
-    Id getId() const {
-        return CEIL;
-    }
-    int getNumArguments() const {
-        return 1;
-    }
-    Operation* clone() const {
-        return new Ceil();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return std::ceil(args[0]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-class LEPTON_EXPORT Operation::Select : public Operation {
-public:
-    Select() {
-    }
-    std::string getName() const {
-        return "select";
-    }
-    Id getId() const {
-        return SELECT;
-    }
-    int getNumArguments() const {
-        return 3;
-    }
-    Operation* clone() const {
-        return new Select();
-    }
-    double evaluate(double* args, const std::map<std::string, double>& variables) const {
-        return (args[0] != 0.0 ? args[1] : args[2]);
-    }
-    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
-};
-
-} // namespace Lepton
-
-#endif /*LEPTON_OPERATION_H_*/
diff --git a/lib/colvars/lepton/include/lepton/ParsedExpression.h b/lib/colvars/lepton/include/lepton/ParsedExpression.h
deleted file mode 100644
index e2a7572c4a..0000000000
--- a/lib/colvars/lepton/include/lepton/ParsedExpression.h
+++ /dev/null
@@ -1,142 +0,0 @@
-#ifndef LEPTON_PARSED_EXPRESSION_H_
-#define LEPTON_PARSED_EXPRESSION_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009-2022 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "ExpressionTreeNode.h"
-#include "windowsIncludes.h"
-#include <map>
-#include <string>
-
-namespace Lepton {
-
-class CompiledExpression;
-class ExpressionProgram;
-class CompiledVectorExpression;
-
-/**
- * This class represents the result of parsing an expression.  It provides methods for working with the
- * expression in various ways, such as evaluating it, getting the tree representation of the expresson, etc.
- */
-
-class LEPTON_EXPORT ParsedExpression {
-public:
-    /**
-     * Create an uninitialized ParsedExpression.  This exists so that ParsedExpressions can be put in STL containers.
-     * Doing anything with it will produce an exception.
-     */
-    ParsedExpression();
-    /**
-     * Create a ParsedExpression.  Normally you will not call this directly.  Instead, use the Parser class
-     * to parse expression.
-     */
-    ParsedExpression(const ExpressionTreeNode& rootNode);
-    /**
-     * Get the root node of the expression's abstract syntax tree.
-     */
-    const ExpressionTreeNode& getRootNode() const;
-    /**
-     * Evaluate the expression.  If the expression involves any variables, this method will throw an exception.
-     */
-    double evaluate() const;
-    /**
-     * Evaluate the expression.
-     *
-     * @param variables    a map specifying the values of all variables that appear in the expression.  If any
-     *                     variable appears in the expression but is not included in this map, an exception
-     *                     will be thrown.
-     */
-    double evaluate(const std::map<std::string, double>& variables) const;
-    /**
-     * Create a new ParsedExpression which produces the same result as this one, but is faster to evaluate.
-     */
-    ParsedExpression optimize() const;
-    /**
-     * Create a new ParsedExpression which produces the same result as this one, but is faster to evaluate.
-     *
-     * @param variables    a map specifying values for a subset of variables that appear in the expression.
-     *                     All occurrences of these variables in the expression are replaced with the values
-     *                     specified.
-     */
-    ParsedExpression optimize(const std::map<std::string, double>& variables) const;
-    /**
-     * Create a new ParsedExpression which is the analytic derivative of this expression with respect to a
-     * particular variable.
-     *
-     * @param variable     the variable with respect to which the derivate should be taken
-     */
-    ParsedExpression differentiate(const std::string& variable) const;
-    /**
-     * Create an ExpressionProgram that represents the same calculation as this expression.
-     */
-    ExpressionProgram createProgram() const;
-    /**
-     * Create a CompiledExpression that represents the same calculation as this expression.
-     */
-    CompiledExpression createCompiledExpression() const;
-    /**
-     * Create a CompiledVectorExpression that allows the expression to be evaluated efficiently
-     * using the CPU's vector unit.
-     *
-     * @param width    the width of the vectors to evaluate it on.  The allowed values
-     *                 depend on the CPU.  4 is always allowed, and 8 is allowed on
-     *                 x86 processors with AVX.  Call CompiledVectorExpression::getAllowedWidths()
-     *                 to query the allowed widths on the current processor.
-     */
-    CompiledVectorExpression createCompiledVectorExpression(int width) const;
-    /**
-     * Create a new ParsedExpression which is identical to this one, except that the names of some
-     * variables have been changed.
-     *
-     * @param replacements    a map whose keys are the names of variables, and whose values are the
-     *                        new names to replace them with
-     */
-    ParsedExpression renameVariables(const std::map<std::string, std::string>& replacements) const;
-private:
-    static double evaluate(const ExpressionTreeNode& node, const std::map<std::string, double>& variables);
-    static ExpressionTreeNode preevaluateVariables(const ExpressionTreeNode& node, const std::map<std::string, double>& variables);
-    static ExpressionTreeNode precalculateConstantSubexpressions(const ExpressionTreeNode& node, std::map<int, ExpressionTreeNode>& nodeCache);
-    static ExpressionTreeNode substituteSimplerExpression(const ExpressionTreeNode& node, std::map<int, ExpressionTreeNode>& nodeCache);
-    static ExpressionTreeNode differentiate(const ExpressionTreeNode& node, const std::string& variable, std::map<int, ExpressionTreeNode>& nodeCache);
-    static bool isConstant(const ExpressionTreeNode& node);
-    static double getConstantValue(const ExpressionTreeNode& node);
-    static ExpressionTreeNode renameNodeVariables(const ExpressionTreeNode& node, const std::map<std::string, std::string>& replacements);
-    ExpressionTreeNode rootNode;
-};
-
-LEPTON_EXPORT std::ostream& operator<<(std::ostream& out, const ExpressionTreeNode& node);
-
-LEPTON_EXPORT std::ostream& operator<<(std::ostream& out, const ParsedExpression& exp);
-
-} // namespace Lepton
-
-#endif /*LEPTON_PARSED_EXPRESSION_H_*/
diff --git a/lib/colvars/lepton/include/lepton/Parser.h b/lib/colvars/lepton/include/lepton/Parser.h
deleted file mode 100644
index 63d5988d5f..0000000000
--- a/lib/colvars/lepton/include/lepton/Parser.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#ifndef LEPTON_PARSER_H_
-#define LEPTON_PARSER_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "windowsIncludes.h"
-#include <map>
-#include <string>
-#include <vector>
-
-namespace Lepton {
-
-class CustomFunction;
-class ExpressionTreeNode;
-class Operation;
-class ParsedExpression;
-class ParseToken;
-
-/**
- * This class provides the main interface for parsing expressions.
- */
-
-class LEPTON_EXPORT Parser {
-public:
-    /**
-     * Parse a mathematical expression and return a representation of it as an abstract syntax tree.
-     */
-    static ParsedExpression parse(const std::string& expression);
-    /**
-     * Parse a mathematical expression and return a representation of it as an abstract syntax tree.
-     *
-     * @param customFunctions   a map specifying user defined functions that may appear in the expression.
-     *                          The key are function names, and the values are corresponding CustomFunction objects.
-     */
-    static ParsedExpression parse(const std::string& expression, const std::map<std::string, CustomFunction*>& customFunctions);
-private:
-    static std::string trim(const std::string& expression);
-    static std::vector<ParseToken> tokenize(const std::string& expression);
-    static ParseToken getNextToken(const std::string& expression, int start);
-    static ExpressionTreeNode parsePrecedence(const std::vector<ParseToken>& tokens, int& pos, const std::map<std::string, CustomFunction*>& customFunctions,
-            const std::map<std::string, ExpressionTreeNode>& subexpressionDefs, int precedence);
-    static Operation* getOperatorOperation(const std::string& name);
-    static Operation* getFunctionOperation(const std::string& name, const std::map<std::string, CustomFunction*>& customFunctions);
-};
-
-} // namespace Lepton
-
-#endif /*LEPTON_PARSER_H_*/
diff --git a/lib/colvars/lepton/include/lepton/windowsIncludes.h b/lib/colvars/lepton/include/lepton/windowsIncludes.h
deleted file mode 100644
index 798229850e..0000000000
--- a/lib/colvars/lepton/include/lepton/windowsIncludes.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef LEPTON_WINDOW_INCLUDE_H_
-#define LEPTON_WINDOW_INCLUDE_H_
-
-/*
- * Shared libraries are messy in Visual Studio. We have to distinguish three
- * cases:
- *   (1) this header is being used to build the Lepton shared library
- *       (dllexport)
- *   (2) this header is being used by a *client* of the Lepton shared
- *       library (dllimport)
- *   (3) we are building the Lepton static library, or the client is
- *       being compiled with the expectation of linking with the
- *       Lepton static library (nothing special needed)
- * In the CMake script for building this library, we define one of the symbols
- *     Lepton_BUILDING_{SHARED|STATIC}_LIBRARY
- * Client code normally has no special symbol defined, in which case we'll
- * assume it wants to use the shared library. However, if the client defines
- * the symbol LEPTON_USE_STATIC_LIBRARIES we'll suppress the dllimport so
- * that the client code can be linked with static libraries. Note that
- * the client symbol is not library dependent, while the library symbols
- * affect only the Lepton library, meaning that other libraries can
- * be clients of this one. However, we are assuming all-static or all-shared.
- */
-
-#ifdef _MSC_VER
-    // We don't want to hear about how sprintf is "unsafe".
-    #pragma warning(disable:4996)
-    // Keep MS VC++ quiet about lack of dll export of private members.
-    #pragma warning(disable:4251)
-    #if defined(LEPTON_BUILDING_SHARED_LIBRARY)
-        #define LEPTON_EXPORT __declspec(dllexport)
-    #elif defined(LEPTON_BUILDING_STATIC_LIBRARY) || defined(LEPTON_USE_STATIC_LIBRARIES)
-        #define LEPTON_EXPORT
-    #else
-        #define LEPTON_EXPORT __declspec(dllimport)   // i.e., a client of a shared library
-    #endif
-#else
-    #define LEPTON_EXPORT // Linux, Mac
-#endif
-
-#endif // LEPTON_WINDOW_INCLUDE_H_
diff --git a/lib/colvars/lepton/src/CompiledExpression.cpp b/lib/colvars/lepton/src/CompiledExpression.cpp
deleted file mode 100644
index d8b6e112b2..0000000000
--- a/lib/colvars/lepton/src/CompiledExpression.cpp
+++ /dev/null
@@ -1,812 +0,0 @@
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2013-2022 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "lepton/CompiledExpression.h"
-#include "lepton/Operation.h"
-#include "lepton/ParsedExpression.h"
-#include <utility>
-
-using namespace Lepton;
-using namespace std;
-#ifdef LEPTON_USE_JIT
-    using namespace asmjit;
-#endif
-
-CompiledExpression::CompiledExpression() : jitCode(NULL) {
-}
-
-CompiledExpression::CompiledExpression(const ParsedExpression& expression) : jitCode(NULL) {
-    ParsedExpression expr = expression.optimize(); // Just in case it wasn't already optimized.
-    vector<pair<ExpressionTreeNode, int> > temps;
-    compileExpression(expr.getRootNode(), temps);
-    int maxArguments = 1;
-    for (int i = 0; i < (int) operation.size(); i++)
-        if (operation[i]->getNumArguments() > maxArguments)
-            maxArguments = operation[i]->getNumArguments();
-    argValues.resize(maxArguments);
-#ifdef LEPTON_USE_JIT
-    generateJitCode();
-#endif
-}
-
-CompiledExpression::~CompiledExpression() {
-    for (int i = 0; i < (int) operation.size(); i++)
-        if (operation[i] != NULL)
-            delete operation[i];
-}
-
-CompiledExpression::CompiledExpression(const CompiledExpression& expression) : jitCode(NULL) {
-    *this = expression;
-}
-
-CompiledExpression& CompiledExpression::operator=(const CompiledExpression& expression) {
-    arguments = expression.arguments;
-    target = expression.target;
-    variableIndices = expression.variableIndices;
-    variableNames = expression.variableNames;
-    workspace.resize(expression.workspace.size());
-    argValues.resize(expression.argValues.size());
-    operation.resize(expression.operation.size());
-    for (int i = 0; i < (int) operation.size(); i++)
-        operation[i] = expression.operation[i]->clone();
-    setVariableLocations(variablePointers);
-    return *this;
-}
-
-void CompiledExpression::compileExpression(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps) {
-    if (findTempIndex(node, temps) != -1)
-        return; // We have already processed a node identical to this one.
-
-    // Process the child nodes.
-
-    vector<int> args;
-    for (int i = 0; i < node.getChildren().size(); i++) {
-        compileExpression(node.getChildren()[i], temps);
-        args.push_back(findTempIndex(node.getChildren()[i], temps));
-    }
-
-    // Process this node.
-
-    if (node.getOperation().getId() == Operation::VARIABLE) {
-        variableIndices[node.getOperation().getName()] = (int) workspace.size();
-        variableNames.insert(node.getOperation().getName());
-    }
-    else {
-        int stepIndex = (int) arguments.size();
-        arguments.push_back(vector<int>());
-        target.push_back((int) workspace.size());
-        operation.push_back(node.getOperation().clone());
-        if (args.size() == 0)
-            arguments[stepIndex].push_back(0); // The value won't actually be used.  We just need something there.
-        else {
-            // If the arguments are sequential, we can just pass a pointer to the first one.
-
-            bool sequential = true;
-            for (int i = 1; i < args.size(); i++)
-                if (args[i] != args[i-1]+1)
-                    sequential = false;
-            if (sequential)
-                arguments[stepIndex].push_back(args[0]);
-            else
-                arguments[stepIndex] = args;
-        }
-    }
-    temps.push_back(make_pair(node, (int) workspace.size()));
-    workspace.push_back(0.0);
-}
-
-int CompiledExpression::findTempIndex(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps) {
-    for (int i = 0; i < (int) temps.size(); i++)
-        if (temps[i].first == node)
-            return i;
-    return -1;
-}
-
-const set<string>& CompiledExpression::getVariables() const {
-    return variableNames;
-}
-
-double& CompiledExpression::getVariableReference(const string& name) {
-    map<string, double*>::iterator pointer = variablePointers.find(name);
-    if (pointer != variablePointers.end())
-        return *pointer->second;
-    map<string, int>::iterator index = variableIndices.find(name);
-    if (index == variableIndices.end())
-        throw Exception("getVariableReference: Unknown variable '"+name+"'");
-    return workspace[index->second];
-}
-
-void CompiledExpression::setVariableLocations(map<string, double*>& variableLocations) {
-    variablePointers = variableLocations;
-#ifdef LEPTON_USE_JIT
-    // Rebuild the JIT code.
-
-    if (workspace.size() > 0)
-        generateJitCode();
-#endif
-    // Make a list of all variables we will need to copy before evaluating the expression.
-
-    variablesToCopy.clear();
-    for (map<string, int>::const_iterator iter = variableIndices.begin(); iter != variableIndices.end(); ++iter) {
-        map<string, double*>::iterator pointer = variablePointers.find(iter->first);
-        if (pointer != variablePointers.end())
-            variablesToCopy.push_back(make_pair(&workspace[iter->second], pointer->second));
-    }
-}
-
-double CompiledExpression::evaluate() const {
-    if (jitCode)
-        return jitCode();
-    for (int i = 0; i < variablesToCopy.size(); i++)
-        *variablesToCopy[i].first = *variablesToCopy[i].second;
-
-    // Loop over the operations and evaluate each one.
-
-    for (int step = 0; step < operation.size(); step++) {
-        const vector<int>& args = arguments[step];
-        if (args.size() == 1)
-            workspace[target[step]] = operation[step]->evaluate(&workspace[args[0]], dummyVariables);
-        else {
-            for (int i = 0; i < args.size(); i++)
-                argValues[i] = workspace[args[i]];
-            workspace[target[step]] = operation[step]->evaluate(&argValues[0], dummyVariables);
-        }
-    }
-    return workspace[workspace.size()-1];
-}
-
-#ifdef LEPTON_USE_JIT
-static double evaluateOperation(Operation* op, double* args) {
-    static map<string, double> dummyVariables;
-    return op->evaluate(args, dummyVariables);
-}
-
-void CompiledExpression::findPowerGroups(vector<vector<int> >& groups, vector<vector<int> >& groupPowers, vector<int>& stepGroup) {
-    // Identify every step that raises an argument to an integer power.
-
-    vector<int> stepPower(operation.size(), 0);
-    vector<int> stepArg(operation.size(), -1);
-    for (int step = 0; step < operation.size(); step++) {
-        Operation& op = *operation[step];
-        int power = 0;
-        if (op.getId() == Operation::SQUARE)
-            power = 2;
-        else if (op.getId() == Operation::CUBE)
-            power = 3;
-        else if (op.getId() == Operation::POWER_CONSTANT) {
-            double realPower = dynamic_cast<const Operation::PowerConstant*>(&op)->getValue();
-            if (realPower == (int) realPower)
-                power = (int) realPower;
-        }
-        if (power != 0) {
-            stepPower[step] = power;
-            stepArg[step] = arguments[step][0];
-        }
-    }
-
-    // Find groups that operate on the same argument and whose powers have the same sign.
-
-    stepGroup.resize(operation.size(), -1);
-    for (int i = 0; i < operation.size(); i++) {
-        if (stepGroup[i] != -1)
-            continue;
-        vector<int> group, power;
-        for (int j = i; j < operation.size(); j++) {
-            if (stepArg[i] == stepArg[j] && stepPower[i]*stepPower[j] > 0) {
-                stepGroup[j] = groups.size();
-                group.push_back(j);
-                power.push_back(stepPower[j]);
-            }
-        }
-        groups.push_back(group);
-        groupPowers.push_back(power);
-    }
-}
-
-#if defined(__ARM__) || defined(__ARM64__)
-void CompiledExpression::generateJitCode() {
-    CodeHolder code;
-    code.init(runtime.environment());
-    a64::Compiler c(&code);
-    c.addFunc(FuncSignatureT<double>());
-    vector<arm::Vec> workspaceVar(workspace.size());
-    for (int i = 0; i < (int) workspaceVar.size(); i++)
-        workspaceVar[i] = c.newVecD();
-    arm::Gp argsPointer = c.newIntPtr();
-    c.mov(argsPointer, imm(&argValues[0]));
-    vector<vector<int> > groups, groupPowers;
-    vector<int> stepGroup;
-    findPowerGroups(groups, groupPowers, stepGroup);
-
-    // Load the arguments into variables.
-
-    for (set<string>::const_iterator iter = variableNames.begin(); iter != variableNames.end(); ++iter) {
-        map<string, int>::iterator index = variableIndices.find(*iter);
-        arm::Gp variablePointer = c.newIntPtr();
-        c.mov(variablePointer, imm(&getVariableReference(index->first)));
-        c.ldr(workspaceVar[index->second], arm::ptr(variablePointer, 0));
-    }
-
-    // Make a list of all constants that will be needed for evaluation.
-
-    vector<int> operationConstantIndex(operation.size(), -1);
-    for (int step = 0; step < (int) operation.size(); step++) {
-        // Find the constant value (if any) used by this operation.
-
-        Operation& op = *operation[step];
-        double value;
-        if (op.getId() == Operation::CONSTANT)
-            value = dynamic_cast<Operation::Constant&>(op).getValue();
-        else if (op.getId() == Operation::ADD_CONSTANT)
-            value = dynamic_cast<Operation::AddConstant&>(op).getValue();
-        else if (op.getId() == Operation::MULTIPLY_CONSTANT)
-            value = dynamic_cast<Operation::MultiplyConstant&>(op).getValue();
-        else if (op.getId() == Operation::RECIPROCAL)
-            value = 1.0;
-        else if (op.getId() == Operation::STEP)
-            value = 1.0;
-        else if (op.getId() == Operation::DELTA)
-            value = 1.0;
-        else if (op.getId() == Operation::POWER_CONSTANT) {
-            if (stepGroup[step] == -1)
-                value = dynamic_cast<Operation::PowerConstant&>(op).getValue();
-            else
-                value = 1.0;
-        }
-        else
-            continue;
-
-        // See if we already have a variable for this constant.
-
-        for (int i = 0; i < (int) constants.size(); i++)
-            if (value == constants[i]) {
-                operationConstantIndex[step] = i;
-                break;
-            }
-        if (operationConstantIndex[step] == -1) {
-            operationConstantIndex[step] = constants.size();
-            constants.push_back(value);
-        }
-    }
-
-    // Load constants into variables.
-
-    vector<arm::Vec> constantVar(constants.size());
-    if (constants.size() > 0) {
-        arm::Gp constantsPointer = c.newIntPtr();
-        c.mov(constantsPointer, imm(&constants[0]));
-        for (int i = 0; i < (int) constants.size(); i++) {
-            constantVar[i] = c.newVecD();
-            c.ldr(constantVar[i], arm::ptr(constantsPointer, 8*i));
-        }
-    }
-
-    // Evaluate the operations.
-
-    vector<bool> hasComputedPower(operation.size(), false);
-    for (int step = 0; step < (int) operation.size(); step++) {
-        if (hasComputedPower[step])
-            continue;
-
-        // When one or more steps involve raising the same argument to multiple integer
-        // powers, we can compute them all together for efficiency.
-
-        if (stepGroup[step] != -1) {
-            vector<int>& group = groups[stepGroup[step]];
-            vector<int>& powers = groupPowers[stepGroup[step]];
-            arm::Vec multiplier = c.newVecD();
-            if (powers[0] > 0)
-                c.fmov(multiplier, workspaceVar[arguments[step][0]]);
-            else {
-                c.fdiv(multiplier, constantVar[operationConstantIndex[step]], workspaceVar[arguments[step][0]]);
-                for (int i = 0; i < powers.size(); i++)
-                    powers[i] = -powers[i];
-            }
-            vector<bool> hasAssigned(group.size(), false);
-            bool done = false;
-            while (!done) {
-                done = true;
-                for (int i = 0; i < group.size(); i++) {
-                    if (powers[i]%2 == 1) {
-                        if (!hasAssigned[i])
-                            c.fmov(workspaceVar[target[group[i]]], multiplier);
-                        else
-                            c.fmul(workspaceVar[target[group[i]]], workspaceVar[target[group[i]]], multiplier);
-                        hasAssigned[i] = true;
-                    }
-                    powers[i] >>= 1;
-                    if (powers[i] != 0)
-                        done = false;
-                }
-                if (!done)
-                    c.fmul(multiplier, multiplier, multiplier);
-            }
-            for (int step : group)
-                hasComputedPower[step] = true;
-            continue;
-        }
-
-        // Evaluate the step.
-
-        Operation& op = *operation[step];
-        vector<int> args = arguments[step];
-        if (args.size() == 1) {
-            // One or more sequential arguments.  Fill out the list.
-
-            for (int i = 1; i < op.getNumArguments(); i++)
-                args.push_back(args[0]+i);
-        }
-
-        // Generate instructions to execute this operation.
-
-        switch (op.getId()) {
-            case Operation::CONSTANT:
-                c.fmov(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::ADD:
-                c.fadd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::SUBTRACT:
-                c.fsub(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::MULTIPLY:
-                c.fmul(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::DIVIDE:
-                c.fdiv(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::POWER:
-                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], pow);
-                break;
-            case Operation::NEGATE:
-                c.fneg(workspaceVar[target[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::SQRT:
-                c.fsqrt(workspaceVar[target[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::EXP:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], exp);
-                break;
-            case Operation::LOG:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], log);
-                break;
-            case Operation::SIN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sin);
-                break;
-            case Operation::COS:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cos);
-                break;
-            case Operation::TAN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tan);
-                break;
-            case Operation::ASIN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], asin);
-                break;
-            case Operation::ACOS:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], acos);
-                break;
-            case Operation::ATAN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], atan);
-                break;
-            case Operation::ATAN2:
-                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], atan2);
-                break;
-            case Operation::SINH:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinh);
-                break;
-            case Operation::COSH:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cosh);
-                break;
-            case Operation::TANH:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanh);
-                break;
-            case Operation::STEP:
-                c.cmge(workspaceVar[target[step]], workspaceVar[args[0]], imm(0));
-                c.and_(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::DELTA:
-                c.cmeq(workspaceVar[target[step]], workspaceVar[args[0]], imm(0));
-                c.and_(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::SQUARE:
-                c.fmul(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
-                break;
-            case Operation::CUBE:
-                c.fmul(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
-                c.fmul(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::RECIPROCAL:
-                c.fdiv(workspaceVar[target[step]], constantVar[operationConstantIndex[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::ADD_CONSTANT:
-                c.fadd(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::MULTIPLY_CONSTANT:
-                c.fmul(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::POWER_CONSTANT:
-                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]], pow);
-                break;
-            case Operation::MIN:
-                c.fmin(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::MAX:
-                c.fmax(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::ABS:
-                c.fabs(workspaceVar[target[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::FLOOR:
-                c.frintm(workspaceVar[target[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::CEIL:
-                c.frintp(workspaceVar[target[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::SELECT:
-                c.fcmeq(workspaceVar[target[step]], workspaceVar[args[0]], imm(0));
-                c.bsl(workspaceVar[target[step]], workspaceVar[args[2]], workspaceVar[args[1]]);
-                break;
-            default:
-                // Just invoke evaluateOperation().
-
-                for (int i = 0; i < (int) args.size(); i++)
-                    c.str(workspaceVar[args[i]], arm::ptr(argsPointer, 8*i));
-                arm::Gp fn = c.newIntPtr();
-                c.mov(fn, imm((void*) evaluateOperation));
-                InvokeNode* invoke;
-                c.invoke(&invoke, fn, FuncSignatureT<double, Operation*, double*>());
-                invoke->setArg(0, imm(&op));
-                invoke->setArg(1, imm(&argValues[0]));
-                invoke->setRet(0, workspaceVar[target[step]]);
-        }
-    }
-    c.ret(workspaceVar[workspace.size()-1]);
-    c.endFunc();
-    c.finalize();
-    runtime.add(&jitCode, &code);
-}
-
-void CompiledExpression::generateSingleArgCall(a64::Compiler& c, arm::Vec& dest, arm::Vec& arg, double (*function)(double)) {
-    arm::Gp fn = c.newIntPtr();
-    c.mov(fn, imm((void*) function));
-    InvokeNode* invoke;
-    c.invoke(&invoke, fn, FuncSignatureT<double, double>());
-    invoke->setArg(0, arg);
-    invoke->setRet(0, dest);
-}
-
-void CompiledExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& dest, arm::Vec& arg1, arm::Vec& arg2, double (*function)(double, double)) {
-    arm::Gp fn = c.newIntPtr();
-    c.mov(fn, imm((void*) function));
-    InvokeNode* invoke;
-    c.invoke(&invoke, fn, FuncSignatureT<double, double, double>());
-    invoke->setArg(0, arg1);
-    invoke->setArg(1, arg2);
-    invoke->setRet(0, dest);
-}
-#else
-void CompiledExpression::generateJitCode() {
-    const CpuInfo& cpu = CpuInfo::host();
-    if (!cpu.hasFeature(CpuFeatures::X86::kAVX))
-        return;
-    CodeHolder code;
-    code.init(runtime.environment());
-    x86::Compiler c(&code);
-    FuncNode* funcNode = c.addFunc(FuncSignatureT<double>());
-    funcNode->frame().setAvxEnabled();
-    vector<x86::Xmm> workspaceVar(workspace.size());
-    for (int i = 0; i < (int) workspaceVar.size(); i++)
-        workspaceVar[i] = c.newXmmSd();
-    x86::Gp argsPointer = c.newIntPtr();
-    c.mov(argsPointer, imm(&argValues[0]));
-    vector<vector<int> > groups, groupPowers;
-    vector<int> stepGroup;
-    findPowerGroups(groups, groupPowers, stepGroup);
-
-    // Load the arguments into variables.
-
-    x86::Gp variablePointer = c.newIntPtr();
-    for (set<string>::const_iterator iter = variableNames.begin(); iter != variableNames.end(); ++iter) {
-        map<string, int>::iterator index = variableIndices.find(*iter);
-        c.mov(variablePointer, imm(&getVariableReference(index->first)));
-        c.vmovsd(workspaceVar[index->second], x86::ptr(variablePointer, 0, 0));
-    }
-
-    // Make a list of all constants that will be needed for evaluation.
-
-    vector<int> operationConstantIndex(operation.size(), -1);
-    for (int step = 0; step < (int) operation.size(); step++) {
-        // Find the constant value (if any) used by this operation.
-
-        Operation& op = *operation[step];
-        double value;
-        if (op.getId() == Operation::CONSTANT)
-            value = dynamic_cast<Operation::Constant&>(op).getValue();
-        else if (op.getId() == Operation::ADD_CONSTANT)
-            value = dynamic_cast<Operation::AddConstant&>(op).getValue();
-        else if (op.getId() == Operation::MULTIPLY_CONSTANT)
-            value = dynamic_cast<Operation::MultiplyConstant&>(op).getValue();
-        else if (op.getId() == Operation::RECIPROCAL)
-            value = 1.0;
-        else if (op.getId() == Operation::STEP)
-            value = 1.0;
-        else if (op.getId() == Operation::DELTA)
-            value = 1.0;
-        else if (op.getId() == Operation::ABS) {
-            long long mask = 0x7FFFFFFFFFFFFFFF;
-            value = *reinterpret_cast<double*>(&mask);
-        }
-        else if (op.getId() == Operation::POWER_CONSTANT) {
-            if (stepGroup[step] == -1)
-                value = dynamic_cast<Operation::PowerConstant&>(op).getValue();
-            else
-                value = 1.0;
-        }
-        else
-            continue;
-
-        // See if we already have a variable for this constant.
-
-        for (int i = 0; i < (int) constants.size(); i++)
-            if (value == constants[i]) {
-                operationConstantIndex[step] = i;
-                break;
-            }
-        if (operationConstantIndex[step] == -1) {
-            operationConstantIndex[step] = constants.size();
-            constants.push_back(value);
-        }
-    }
-
-    // Load constants into variables.
-
-    vector<x86::Xmm> constantVar(constants.size());
-    if (constants.size() > 0) {
-        x86::Gp constantsPointer = c.newIntPtr();
-        c.mov(constantsPointer, imm(&constants[0]));
-        for (int i = 0; i < (int) constants.size(); i++) {
-            constantVar[i] = c.newXmmSd();
-            c.vmovsd(constantVar[i], x86::ptr(constantsPointer, 8*i, 0));
-        }
-    }
-
-    // Evaluate the operations.
-
-    vector<bool> hasComputedPower(operation.size(), false);
-    for (int step = 0; step < (int) operation.size(); step++) {
-        if (hasComputedPower[step])
-            continue;
-
-        // When one or more steps involve raising the same argument to multiple integer
-        // powers, we can compute them all together for efficiency.
-
-        if (stepGroup[step] != -1) {
-            vector<int>& group = groups[stepGroup[step]];
-            vector<int>& powers = groupPowers[stepGroup[step]];
-            x86::Xmm multiplier = c.newXmmSd();
-            if (powers[0] > 0)
-                c.vmovsd(multiplier, workspaceVar[arguments[step][0]], workspaceVar[arguments[step][0]]);
-            else {
-                c.vdivsd(multiplier, constantVar[operationConstantIndex[step]], workspaceVar[arguments[step][0]]);
-                for (int i = 0; i < powers.size(); i++)
-                    powers[i] = -powers[i];
-            }
-            vector<bool> hasAssigned(group.size(), false);
-            bool done = false;
-            while (!done) {
-                done = true;
-                for (int i = 0; i < group.size(); i++) {
-                    if (powers[i]%2 == 1) {
-                        if (!hasAssigned[i])
-                            c.vmovsd(workspaceVar[target[group[i]]], multiplier, multiplier);
-                        else
-                            c.vmulsd(workspaceVar[target[group[i]]], workspaceVar[target[group[i]]], multiplier);
-                        hasAssigned[i] = true;
-                    }
-                    powers[i] >>= 1;
-                    if (powers[i] != 0)
-                        done = false;
-                }
-                if (!done)
-                    c.vmulsd(multiplier, multiplier, multiplier);
-            }
-            for (int step : group)
-                hasComputedPower[step] = true;
-            continue;
-        }
-
-        // Evaluate the step.
-
-        Operation& op = *operation[step];
-        vector<int> args = arguments[step];
-        if (args.size() == 1) {
-            // One or more sequential arguments.  Fill out the list.
-
-            for (int i = 1; i < op.getNumArguments(); i++)
-                args.push_back(args[0]+i);
-        }
-
-        // Generate instructions to execute this operation.
-
-        switch (op.getId()) {
-            case Operation::CONSTANT:
-                c.vmovsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::ADD:
-                c.vaddsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::SUBTRACT:
-                c.vsubsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::MULTIPLY:
-                c.vmulsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::DIVIDE:
-                c.vdivsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::POWER:
-                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], pow);
-                break;
-            case Operation::NEGATE:
-                c.vxorps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[target[step]]);
-                c.vsubsd(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::SQRT:
-                c.vsqrtsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
-                break;
-            case Operation::EXP:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], exp);
-                break;
-            case Operation::LOG:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], log);
-                break;
-            case Operation::SIN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sin);
-                break;
-            case Operation::COS:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cos);
-                break;
-            case Operation::TAN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tan);
-                break;
-            case Operation::ASIN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], asin);
-                break;
-            case Operation::ACOS:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], acos);
-                break;
-            case Operation::ATAN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], atan);
-                break;
-            case Operation::ATAN2:
-                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], atan2);
-                break;
-            case Operation::SINH:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinh);
-                break;
-            case Operation::COSH:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cosh);
-                break;
-            case Operation::TANH:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanh);
-                break;
-            case Operation::STEP:
-                c.vxorps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[target[step]]);
-                c.vcmpsd(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]], imm(18)); // Comparison mode is _CMP_LE_OQ = 18
-                c.vandps(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::DELTA:
-                c.vxorps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[target[step]]);
-                c.vcmpsd(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]], imm(16)); // Comparison mode is _CMP_EQ_OS = 16
-                c.vandps(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::SQUARE:
-                c.vmulsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
-                break;
-            case Operation::CUBE:
-                c.vmulsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
-                c.vmulsd(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::RECIPROCAL:
-                c.vdivsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::ADD_CONSTANT:
-                c.vaddsd(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::MULTIPLY_CONSTANT:
-                c.vmulsd(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::POWER_CONSTANT:
-                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]], pow);
-                break;
-            case Operation::MIN:
-                c.vminsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::MAX:
-                c.vmaxsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::ABS:
-                c.vandpd(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::FLOOR:
-                c.vroundsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]], imm(1));
-                break;
-            case Operation::CEIL:
-                c.vroundsd(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]], imm(2));
-                break;
-            case Operation::SELECT:
-            {
-                x86::Xmm mask = c.newXmmSd();
-                c.vxorps(mask, mask, mask);
-                c.vcmpsd(mask, mask, workspaceVar[args[0]], imm(0)); // Comparison mode is _CMP_EQ_OQ = 0
-                c.vblendvps(workspaceVar[target[step]], workspaceVar[args[1]], workspaceVar[args[2]], mask);
-                break;
-            }
-            default:
-                // Just invoke evaluateOperation().
-
-                for (int i = 0; i < (int) args.size(); i++)
-                    c.vmovsd(x86::ptr(argsPointer, 8*i, 0), workspaceVar[args[i]]);
-                x86::Gp fn = c.newIntPtr();
-                c.mov(fn, imm((void*) evaluateOperation));
-                InvokeNode* invoke;
-                c.invoke(&invoke, fn, FuncSignatureT<double, Operation*, double*>());
-                invoke->setArg(0, imm(&op));
-                invoke->setArg(1, imm(&argValues[0]));
-                invoke->setRet(0, workspaceVar[target[step]]);
-        }
-    }
-    c.ret(workspaceVar[workspace.size()-1]);
-    c.endFunc();
-    c.finalize();
-    runtime.add(&jitCode, &code);
-}
-
-void CompiledExpression::generateSingleArgCall(x86::Compiler& c, x86::Xmm& dest, x86::Xmm& arg, double (*function)(double)) {
-    x86::Gp fn = c.newIntPtr();
-    c.mov(fn, imm((void*) function));
-    InvokeNode* invoke;
-    c.invoke(&invoke, fn, FuncSignatureT<double, double>());
-    invoke->setArg(0, arg);
-    invoke->setRet(0, dest);
-}
-
-void CompiledExpression::generateTwoArgCall(x86::Compiler& c, x86::Xmm& dest, x86::Xmm& arg1, x86::Xmm& arg2, double (*function)(double, double)) {
-    x86::Gp fn = c.newIntPtr();
-    c.mov(fn, imm((void*) function));
-    InvokeNode* invoke;
-    c.invoke(&invoke, fn, FuncSignatureT<double, double, double>());
-    invoke->setArg(0, arg1);
-    invoke->setArg(1, arg2);
-    invoke->setRet(0, dest);
-}
-#endif
-#endif
diff --git a/lib/colvars/lepton/src/CompiledVectorExpression.cpp b/lib/colvars/lepton/src/CompiledVectorExpression.cpp
deleted file mode 100644
index 7c01a986bb..0000000000
--- a/lib/colvars/lepton/src/CompiledVectorExpression.cpp
+++ /dev/null
@@ -1,933 +0,0 @@
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2013-2022 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "lepton/CompiledVectorExpression.h"
-#include "lepton/Operation.h"
-#include "lepton/ParsedExpression.h"
-#include <algorithm>
-#include <utility>
-
-using namespace Lepton;
-using namespace std;
-#ifdef LEPTON_USE_JIT
-using namespace asmjit;
-#endif
-
-CompiledVectorExpression::CompiledVectorExpression() : jitCode(NULL) {
-}
-
-CompiledVectorExpression::CompiledVectorExpression(const ParsedExpression& expression, int width) : jitCode(NULL), width(width) {
-    const vector<int> allowedWidths = getAllowedWidths();
-    if (find(allowedWidths.begin(), allowedWidths.end(), width) == allowedWidths.end())
-        throw Exception("Unsupported width for vector expression: "+to_string(width));
-    ParsedExpression expr = expression.optimize(); // Just in case it wasn't already optimized.
-    vector<pair<ExpressionTreeNode, int> > temps;
-    int workspaceSize = 0;
-    compileExpression(expr.getRootNode(), temps, workspaceSize);
-    workspace.resize(workspaceSize*width);
-    int maxArguments = 1;
-    for (int i = 0; i < (int) operation.size(); i++)
-        if (operation[i]->getNumArguments() > maxArguments)
-            maxArguments = operation[i]->getNumArguments();
-    argValues.resize(maxArguments);
-#ifdef LEPTON_USE_JIT
-    generateJitCode();
-#endif
-}
-
-CompiledVectorExpression::~CompiledVectorExpression() {
-    for (int i = 0; i < (int) operation.size(); i++)
-        if (operation[i] != NULL)
-            delete operation[i];
-}
-
-CompiledVectorExpression::CompiledVectorExpression(const CompiledVectorExpression& expression) : jitCode(NULL) {
-    *this = expression;
-}
-
-CompiledVectorExpression& CompiledVectorExpression::operator=(const CompiledVectorExpression& expression) {
-    arguments = expression.arguments;
-    width = expression.width;
-    target = expression.target;
-    variableIndices = expression.variableIndices;
-    variableNames = expression.variableNames;
-    workspace.resize(expression.workspace.size());
-    argValues.resize(expression.argValues.size());
-    operation.resize(expression.operation.size());
-    for (int i = 0; i < (int) operation.size(); i++)
-        operation[i] = expression.operation[i]->clone();
-    setVariableLocations(variablePointers);
-    return *this;
-}
-
-const vector<int>& CompiledVectorExpression::getAllowedWidths() {
-    static vector<int> widths;
-    if (widths.size() == 0) {
-        widths.push_back(4);
-#ifdef LEPTON_USE_JIT
-        const CpuInfo& cpu = CpuInfo::host();
-        if (cpu.hasFeature(CpuFeatures::X86::kAVX))
-            widths.push_back(8);
-#endif
-    }
-    return widths;
-}
-
-void CompiledVectorExpression::compileExpression(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps, int& workspaceSize) {
-    if (findTempIndex(node, temps) != -1)
-        return; // We have already processed a node identical to this one.
-
-    // Process the child nodes.
-
-    vector<int> args;
-    for (int i = 0; i < node.getChildren().size(); i++) {
-        compileExpression(node.getChildren()[i], temps, workspaceSize);
-        args.push_back(findTempIndex(node.getChildren()[i], temps));
-    }
-
-    // Process this node.
-
-    if (node.getOperation().getId() == Operation::VARIABLE) {
-        variableIndices[node.getOperation().getName()] = workspaceSize;
-        variableNames.insert(node.getOperation().getName());
-    }
-    else {
-        int stepIndex = (int) arguments.size();
-        arguments.push_back(vector<int>());
-        target.push_back(workspaceSize);
-        operation.push_back(node.getOperation().clone());
-        if (args.size() == 0)
-            arguments[stepIndex].push_back(0); // The value won't actually be used.  We just need something there.
-        else {
-            // If the arguments are sequential, we can just pass a pointer to the first one.
-
-            bool sequential = true;
-            for (int i = 1; i < args.size(); i++)
-                if (args[i] != args[i - 1] + 1)
-                    sequential = false;
-            if (sequential)
-                arguments[stepIndex].push_back(args[0]);
-            else
-                arguments[stepIndex] = args;
-        }
-    }
-    temps.push_back(make_pair(node, workspaceSize));
-    workspaceSize++;
-}
-
-int CompiledVectorExpression::findTempIndex(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps) {
-    for (int i = 0; i < (int) temps.size(); i++)
-        if (temps[i].first == node)
-            return i;
-    return -1;
-}
-
-int CompiledVectorExpression::getWidth() const {
-    return width;
-}
-
-const set<string>& CompiledVectorExpression::getVariables() const {
-    return variableNames;
-}
-
-float* CompiledVectorExpression::getVariablePointer(const string& name) {
-    map<string, float*>::iterator pointer = variablePointers.find(name);
-    if (pointer != variablePointers.end())
-        return pointer->second;
-    map<string, int>::iterator index = variableIndices.find(name);
-    if (index == variableIndices.end())
-        throw Exception("getVariableReference: Unknown variable '" + name + "'");
-    return &workspace[index->second*width];
-}
-
-void CompiledVectorExpression::setVariableLocations(map<string, float*>& variableLocations) {
-    variablePointers = variableLocations;
-#ifdef LEPTON_USE_JIT
-    // Rebuild the JIT code.
-
-    if (workspace.size() > 0)
-        generateJitCode();
-#endif
-    // Make a list of all variables we will need to copy before evaluating the expression.
-
-    variablesToCopy.clear();
-    for (map<string, int>::const_iterator iter = variableIndices.begin(); iter != variableIndices.end(); ++iter) {
-        map<string, float*>::iterator pointer = variablePointers.find(iter->first);
-        if (pointer != variablePointers.end())
-            variablesToCopy.push_back(make_pair(&workspace[iter->second*width], pointer->second));
-    }
-}
-
-const float* CompiledVectorExpression::evaluate() const {
-    if (jitCode) {
-        jitCode();
-        return &workspace[workspace.size()-width];
-    }
-    for (int i = 0; i < variablesToCopy.size(); i++)
-        for (int j = 0; j < width; j++)
-            variablesToCopy[i].first[j] = variablesToCopy[i].second[j];
-
-    // Loop over the operations and evaluate each one.
-
-    for (int step = 0; step < operation.size(); step++) {
-        const vector<int>& args = arguments[step];
-        if (args.size() == 1) {
-            for (int j = 0; j < width; j++) {
-                for (int i = 0; i < operation[step]->getNumArguments(); i++)
-                    argValues[i] = workspace[(args[0]+i)*width+j];
-                workspace[target[step]*width+j] = operation[step]->evaluate(&argValues[0], dummyVariables);
-            }
-        } else {
-            for (int j = 0; j < width; j++) {
-                for (int i = 0; i < args.size(); i++)
-                    argValues[i] = workspace[args[i]*width+j];
-                workspace[target[step]*width+j] = operation[step]->evaluate(&argValues[0], dummyVariables);
-            }
-        }
-    }
-    return &workspace[workspace.size()-width];
-}
-
-#ifdef LEPTON_USE_JIT
-
-static double evaluateOperation(Operation* op, double* args) {
-    static map<string, double> dummyVariables;
-    return op->evaluate(args, dummyVariables);
-}
-
-void CompiledVectorExpression::findPowerGroups(vector<vector<int> >& groups, vector<vector<int> >& groupPowers, vector<int>& stepGroup) {
-    // Identify every step that raises an argument to an integer power.
-
-    vector<int> stepPower(operation.size(), 0);
-    vector<int> stepArg(operation.size(), -1);
-    for (int step = 0; step < operation.size(); step++) {
-        Operation& op = *operation[step];
-        int power = 0;
-        if (op.getId() == Operation::SQUARE)
-            power = 2;
-        else if (op.getId() == Operation::CUBE)
-            power = 3;
-        else if (op.getId() == Operation::POWER_CONSTANT) {
-            double realPower = dynamic_cast<const Operation::PowerConstant*> (&op)->getValue();
-            if (realPower == (int) realPower)
-                power = (int) realPower;
-        }
-        if (power != 0) {
-            stepPower[step] = power;
-            stepArg[step] = arguments[step][0];
-        }
-    }
-
-    // Find groups that operate on the same argument and whose powers have the same sign.
-
-    stepGroup.resize(operation.size(), -1);
-    for (int i = 0; i < operation.size(); i++) {
-        if (stepGroup[i] != -1)
-            continue;
-        vector<int> group, power;
-        for (int j = i; j < operation.size(); j++) {
-            if (stepArg[i] == stepArg[j] && stepPower[i] * stepPower[j] > 0) {
-                stepGroup[j] = groups.size();
-                group.push_back(j);
-                power.push_back(stepPower[j]);
-            }
-        }
-        groups.push_back(group);
-        groupPowers.push_back(power);
-    }
-}
-
-#if defined(__ARM__) || defined(__ARM64__)
-
-void CompiledVectorExpression::generateJitCode() {
-    CodeHolder code;
-    code.init(runtime.environment());
-    a64::Compiler c(&code);
-    c.addFunc(FuncSignatureT<void>());
-    vector<arm::Vec> workspaceVar(workspace.size()/width);
-    for (int i = 0; i < (int) workspaceVar.size(); i++)
-        workspaceVar[i] = c.newVecQ();
-    arm::Gp argsPointer = c.newIntPtr();
-    c.mov(argsPointer, imm(&argValues[0]));
-    vector<vector<int> > groups, groupPowers;
-    vector<int> stepGroup;
-    findPowerGroups(groups, groupPowers, stepGroup);
-
-    // Load the arguments into variables.
-
-    arm::Gp variablePointer = c.newIntPtr();
-    for (set<string>::const_iterator iter = variableNames.begin(); iter != variableNames.end(); ++iter) {
-        map<string, int>::iterator index = variableIndices.find(*iter);
-        c.mov(variablePointer, imm(getVariablePointer(index->first)));
-        c.ldr(workspaceVar[index->second].s4(), arm::ptr(variablePointer, 0));
-    }
-
-    // Make a list of all constants that will be needed for evaluation.
-
-    vector<int> operationConstantIndex(operation.size(), -1);
-    for (int step = 0; step < (int) operation.size(); step++) {
-        // Find the constant value (if any) used by this operation.
-
-        Operation& op = *operation[step];
-        float value;
-        if (op.getId() == Operation::CONSTANT)
-            value = dynamic_cast<Operation::Constant&> (op).getValue();
-        else if (op.getId() == Operation::ADD_CONSTANT)
-            value = dynamic_cast<Operation::AddConstant&> (op).getValue();
-        else if (op.getId() == Operation::MULTIPLY_CONSTANT)
-            value = dynamic_cast<Operation::MultiplyConstant&> (op).getValue();
-        else if (op.getId() == Operation::RECIPROCAL)
-            value = 1.0;
-        else if (op.getId() == Operation::STEP)
-            value = 1.0;
-        else if (op.getId() == Operation::DELTA)
-            value = 1.0;
-        else if (op.getId() == Operation::POWER_CONSTANT) {
-            if (stepGroup[step] == -1)
-                value = dynamic_cast<Operation::PowerConstant&> (op).getValue();
-            else
-                value = 1.0;
-        } else
-            continue;
-
-        // See if we already have a variable for this constant.
-
-        for (int i = 0; i < (int) constants.size(); i++)
-            if (value == constants[i]) {
-                operationConstantIndex[step] = i;
-                break;
-            }
-        if (operationConstantIndex[step] == -1) {
-            operationConstantIndex[step] = constants.size();
-            constants.push_back(value);
-        }
-    }
-
-    // Load constants into variables.
-
-    vector<arm::Vec> constantVar(constants.size());
-    if (constants.size() > 0) {
-        arm::Gp constantsPointer = c.newIntPtr();
-        for (int i = 0; i < (int) constants.size(); i++) {
-            c.mov(constantsPointer, imm(&constants[i]));
-            constantVar[i] = c.newVecQ();
-            c.ld1r(constantVar[i].s4(), arm::ptr(constantsPointer));
-        }
-    }
-
-    // Evaluate the operations.
-
-    vector<bool> hasComputedPower(operation.size(), false);
-    arm::Vec argReg = c.newVecS();
-    arm::Vec doubleArgReg = c.newVecD();
-    arm::Vec doubleResultReg = c.newVecD();
-    for (int step = 0; step < (int) operation.size(); step++) {
-        if (hasComputedPower[step])
-            continue;
-
-        // When one or more steps involve raising the same argument to multiple integer
-        // powers, we can compute them all together for efficiency.
-
-        if (stepGroup[step] != -1) {
-            vector<int>& group = groups[stepGroup[step]];
-            vector<int>& powers = groupPowers[stepGroup[step]];
-            arm::Vec multiplier = c.newVecQ();
-            if (powers[0] > 0)
-                c.mov(multiplier.s4(), workspaceVar[arguments[step][0]].s4());
-            else {
-                c.fdiv(multiplier.s4(), constantVar[operationConstantIndex[step]].s4(), workspaceVar[arguments[step][0]].s4());
-                for (int i = 0; i < powers.size(); i++)
-                    powers[i] = -powers[i];
-            }
-            vector<bool> hasAssigned(group.size(), false);
-            bool done = false;
-            while (!done) {
-                done = true;
-                for (int i = 0; i < group.size(); i++) {
-                    if (powers[i] % 2 == 1) {
-                        if (!hasAssigned[i])
-                            c.mov(workspaceVar[target[group[i]]].s4(), multiplier.s4());
-                        else
-                            c.fmul(workspaceVar[target[group[i]]].s4(), workspaceVar[target[group[i]]].s4(), multiplier.s4());
-                        hasAssigned[i] = true;
-                    }
-                    powers[i] >>= 1;
-                    if (powers[i] != 0)
-                        done = false;
-                }
-                if (!done)
-                    c.fmul(multiplier.s4(), multiplier.s4(), multiplier.s4());
-            }
-            for (int step : group)
-                hasComputedPower[step] = true;
-            continue;
-        }
-
-        // Evaluate the step.
-
-        Operation& op = *operation[step];
-        vector<int> args = arguments[step];
-        if (args.size() == 1) {
-            // One or more sequential arguments.  Fill out the list.
-
-            for (int i = 1; i < op.getNumArguments(); i++)
-                args.push_back(args[0] + i);
-        }
-
-        // Generate instructions to execute this operation.
-
-        switch (op.getId()) {
-            case Operation::CONSTANT:
-                c.mov(workspaceVar[target[step]].s4(), constantVar[operationConstantIndex[step]].s4());
-                break;
-            case Operation::ADD:
-                c.fadd(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[1]].s4());
-                break;
-            case Operation::SUBTRACT:
-                c.fsub(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[1]].s4());
-                break;
-            case Operation::MULTIPLY:
-                c.fmul(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[1]].s4());
-                break;
-            case Operation::DIVIDE:
-                c.fdiv(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[1]].s4());
-                break;
-            case Operation::POWER:
-                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], powf);
-                break;
-            case Operation::NEGATE:
-                c.fneg(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4());
-                break;
-            case Operation::SQRT:
-                c.fsqrt(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4());
-                break;
-            case Operation::EXP:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], expf);
-                break;
-            case Operation::LOG:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], logf);
-                break;
-            case Operation::SIN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinf);
-                break;
-            case Operation::COS:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cosf);
-                break;
-            case Operation::TAN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanf);
-                break;
-            case Operation::ASIN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], asinf);
-                break;
-            case Operation::ACOS:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], acosf);
-                break;
-            case Operation::ATAN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], atanf);
-                break;
-            case Operation::ATAN2:
-                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], atan2f);
-                break;
-            case Operation::SINH:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinhf);
-                break;
-            case Operation::COSH:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], coshf);
-                break;
-            case Operation::TANH:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanhf);
-                break;
-            case Operation::STEP:
-                c.cmge(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), imm(0));
-                c.and_(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::DELTA:
-                c.cmeq(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), imm(0));
-                c.and_(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::SQUARE:
-                c.fmul(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[0]].s4());
-                break;
-            case Operation::CUBE:
-                c.fmul(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[0]].s4());
-                c.fmul(workspaceVar[target[step]].s4(), workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4());
-                break;
-            case Operation::RECIPROCAL:
-                c.fdiv(workspaceVar[target[step]].s4(), constantVar[operationConstantIndex[step]].s4(), workspaceVar[args[0]].s4());
-                break;
-            case Operation::ADD_CONSTANT:
-                c.fadd(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), constantVar[operationConstantIndex[step]].s4());
-                break;
-            case Operation::MULTIPLY_CONSTANT:
-                c.fmul(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), constantVar[operationConstantIndex[step]].s4());
-                break;
-            case Operation::POWER_CONSTANT:
-                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]], powf);
-                break;
-            case Operation::MIN:
-                c.fmin(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[1]].s4());
-                break;
-            case Operation::MAX:
-                c.fmax(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), workspaceVar[args[1]].s4());
-                break;
-            case Operation::ABS:
-                c.fabs(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4());
-                break;
-            case Operation::FLOOR:
-                c.frintm(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4());
-                break;
-            case Operation::CEIL:
-                c.frintp(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4());
-                break;
-            case Operation::SELECT:
-                c.fcmeq(workspaceVar[target[step]].s4(), workspaceVar[args[0]].s4(), imm(0));
-                c.bsl(workspaceVar[target[step]], workspaceVar[args[2]], workspaceVar[args[1]]);
-                break;
-            default:
-                // Just invoke evaluateOperation().
-                for (int element = 0; element < width; element++) {
-                    for (int i = 0; i < (int) args.size(); i++) {
-                        c.ins(argReg.s(0), workspaceVar[args[i]].s(element));
-                        c.fcvt(doubleArgReg, argReg);
-                        c.str(doubleArgReg, arm::ptr(argsPointer, 8*i));
-                    }
-                    arm::Gp fn = c.newIntPtr();
-                    c.mov(fn, imm((void*) evaluateOperation));
-                    InvokeNode* invoke;
-                    c.invoke(&invoke, fn, FuncSignatureT<double, Operation*, double*>());
-                    invoke->setArg(0, imm(&op));
-                    invoke->setArg(1, imm(&argValues[0]));
-                    invoke->setRet(0, doubleResultReg);
-                    c.fcvt(argReg, doubleResultReg);
-                    c.ins(workspaceVar[target[step]].s(element), argReg.s(0));
-                }
-        }
-    }
-    arm::Gp resultPointer = c.newIntPtr();
-    c.mov(resultPointer, imm(&workspace[workspace.size()-width]));
-    c.str(workspaceVar.back().s4(), arm::ptr(resultPointer, 0));
-    c.endFunc();
-    c.finalize();
-    runtime.add(&jitCode, &code);
-}
-
-void CompiledVectorExpression::generateSingleArgCall(a64::Compiler& c, arm::Vec& dest, arm::Vec& arg, float (*function)(float)) {
-    arm::Gp fn = c.newIntPtr();
-    c.mov(fn, imm((void*) function));
-    arm::Vec a = c.newVecS();
-    arm::Vec d = c.newVecS();
-    for (int element = 0; element < width; element++) {
-        c.ins(a.s(0), arg.s(element));
-        InvokeNode* invoke;
-        c.invoke(&invoke, fn, FuncSignatureT<float, float>());
-        invoke->setArg(0, a);
-        invoke->setRet(0, d);
-        c.ins(dest.s(element), d.s(0));
-    }
-}
-
-void CompiledVectorExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& dest, arm::Vec& arg1, arm::Vec& arg2, float (*function)(float, float)) {
-    arm::Gp fn = c.newIntPtr();
-    c.mov(fn, imm((void*) function));
-    arm::Vec a1 = c.newVecS();
-    arm::Vec a2 = c.newVecS();
-    arm::Vec d = c.newVecS();
-    for (int element = 0; element < width; element++) {
-        c.ins(a1.s(0), arg1.s(element));
-        c.ins(a2.s(0), arg2.s(element));
-        InvokeNode* invoke;
-        c.invoke(&invoke, fn, FuncSignatureT<float, float, float>());
-        invoke->setArg(0, a1);
-        invoke->setArg(1, a2);
-        invoke->setRet(0, d);
-        c.ins(dest.s(element), d.s(0));
-    }
-}
-#else
-
-void CompiledVectorExpression::generateJitCode() {
-    const CpuInfo& cpu = CpuInfo::host();
-    if (!cpu.hasFeature(CpuFeatures::X86::kAVX))
-        return;
-    CodeHolder code;
-    code.init(runtime.environment());
-    x86::Compiler c(&code);
-    FuncNode* funcNode = c.addFunc(FuncSignatureT<void>());
-    funcNode->frame().setAvxEnabled();
-    vector<x86::Ymm> workspaceVar(workspace.size()/width);
-    for (int i = 0; i < (int) workspaceVar.size(); i++)
-        workspaceVar[i] = c.newYmmPs();
-    x86::Gp argsPointer = c.newIntPtr();
-    c.mov(argsPointer, imm(&argValues[0]));
-    vector<vector<int> > groups, groupPowers;
-    vector<int> stepGroup;
-    findPowerGroups(groups, groupPowers, stepGroup);
-
-    // Load the arguments into variables.
-
-    for (set<string>::const_iterator iter = variableNames.begin(); iter != variableNames.end(); ++iter) {
-        map<string, int>::iterator index = variableIndices.find(*iter);
-        x86::Gp variablePointer = c.newIntPtr();
-        c.mov(variablePointer, imm(getVariablePointer(index->first)));
-        if (width == 4)
-            c.vmovdqu(workspaceVar[index->second].xmm(), x86::ptr(variablePointer, 0, 0));
-        else
-            c.vmovdqu(workspaceVar[index->second], x86::ptr(variablePointer, 0, 0));
-    }
-
-    // Make a list of all constants that will be needed for evaluation.
-
-    vector<int> operationConstantIndex(operation.size(), -1);
-    for (int step = 0; step < (int) operation.size(); step++) {
-        // Find the constant value (if any) used by this operation.
-
-        Operation& op = *operation[step];
-        double value;
-        if (op.getId() == Operation::CONSTANT)
-            value = dynamic_cast<Operation::Constant&> (op).getValue();
-        else if (op.getId() == Operation::ADD_CONSTANT)
-            value = dynamic_cast<Operation::AddConstant&> (op).getValue();
-        else if (op.getId() == Operation::MULTIPLY_CONSTANT)
-            value = dynamic_cast<Operation::MultiplyConstant&> (op).getValue();
-        else if (op.getId() == Operation::RECIPROCAL)
-            value = 1.0;
-        else if (op.getId() == Operation::STEP)
-            value = 1.0;
-        else if (op.getId() == Operation::DELTA)
-            value = 1.0;
-        else if (op.getId() == Operation::ABS) {
-            int mask = 0x7FFFFFFF;
-            value = *reinterpret_cast<float*>(&mask);
-        }
-        else if (op.getId() == Operation::POWER_CONSTANT) {
-            if (stepGroup[step] == -1)
-                value = dynamic_cast<Operation::PowerConstant&> (op).getValue();
-            else
-                value = 1.0;
-        } else
-            continue;
-
-        // See if we already have a variable for this constant.
-
-        for (int i = 0; i < (int) constants.size(); i++)
-            if (value == constants[i]) {
-                operationConstantIndex[step] = i;
-                break;
-            }
-        if (operationConstantIndex[step] == -1) {
-            operationConstantIndex[step] = constants.size();
-            constants.push_back(value);
-        }
-    }
-
-    // Load constants into variables.
-
-    vector<x86::Ymm> constantVar(constants.size());
-    if (constants.size() > 0) {
-        x86::Gp constantsPointer = c.newIntPtr();
-        c.mov(constantsPointer, imm(&constants[0]));
-        for (int i = 0; i < (int) constants.size(); i++) {
-            constantVar[i] = c.newYmmPs();
-            c.vbroadcastss(constantVar[i], x86::ptr(constantsPointer, 4*i, 0));
-        }
-    }
-
-    // Evaluate the operations.
-
-    vector<bool> hasComputedPower(operation.size(), false);
-    x86::Ymm argReg = c.newYmm();
-    x86::Ymm doubleArgReg = c.newYmm();
-    x86::Ymm doubleResultReg = c.newYmm();
-    for (int step = 0; step < (int) operation.size(); step++) {
-        if (hasComputedPower[step])
-            continue;
-
-        // When one or more steps involve raising the same argument to multiple integer
-        // powers, we can compute them all together for efficiency.
-
-        if (stepGroup[step] != -1) {
-            vector<int>& group = groups[stepGroup[step]];
-            vector<int>& powers = groupPowers[stepGroup[step]];
-            x86::Ymm multiplier = c.newYmmPs();
-            if (powers[0] > 0)
-                c.vmovdqu(multiplier, workspaceVar[arguments[step][0]]);
-            else {
-                c.vdivps(multiplier, constantVar[operationConstantIndex[step]], workspaceVar[arguments[step][0]]);
-                for (int i = 0; i < powers.size(); i++)
-                    powers[i] = -powers[i];
-            }
-            vector<bool> hasAssigned(group.size(), false);
-            bool done = false;
-            while (!done) {
-                done = true;
-                for (int i = 0; i < group.size(); i++) {
-                    if (powers[i] % 2 == 1) {
-                        if (!hasAssigned[i])
-                            c.vmovdqu(workspaceVar[target[group[i]]], multiplier);
-                        else
-                            c.vmulps(workspaceVar[target[group[i]]], workspaceVar[target[group[i]]], multiplier);
-                        hasAssigned[i] = true;
-                    }
-                    powers[i] >>= 1;
-                    if (powers[i] != 0)
-                        done = false;
-                }
-                if (!done)
-                    c.vmulps(multiplier, multiplier, multiplier);
-            }
-            for (int step : group)
-                hasComputedPower[step] = true;
-            continue;
-        }
-
-        // Evaluate the step.
-
-        Operation& op = *operation[step];
-        vector<int> args = arguments[step];
-        if (args.size() == 1) {
-            // One or more sequential arguments.  Fill out the list.
-
-            for (int i = 1; i < op.getNumArguments(); i++)
-                args.push_back(args[0] + i);
-        }
-
-        // Generate instructions to execute this operation.
-
-        switch (op.getId()) {
-            case Operation::CONSTANT:
-                c.vmovdqu(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::ADD:
-                c.vaddps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::SUBTRACT:
-                c.vsubps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::MULTIPLY:
-                c.vmulps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::DIVIDE:
-                c.vdivps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::POWER:
-                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], powf);
-                break;
-            case Operation::NEGATE:
-                c.vxorps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[target[step]]);
-                c.vsubps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::SQRT:
-                c.vsqrtps(workspaceVar[target[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::EXP:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], expf);
-                break;
-            case Operation::LOG:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], logf);
-                break;
-            case Operation::SIN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinf);
-                break;
-            case Operation::COS:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cosf);
-                break;
-            case Operation::TAN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanf);
-                break;
-            case Operation::ASIN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], asinf);
-                break;
-            case Operation::ACOS:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], acosf);
-                break;
-            case Operation::ATAN:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], atanf);
-                break;
-            case Operation::ATAN2:
-                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], atan2f);
-                break;
-            case Operation::SINH:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinhf);
-                break;
-            case Operation::COSH:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], coshf);
-                break;
-            case Operation::TANH:
-                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanhf);
-                break;
-            case Operation::STEP:
-                c.vxorps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[target[step]]);
-                c.vcmpps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]], imm(18)); // Comparison mode is _CMP_LE_OQ = 18
-                c.vandps(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::DELTA:
-                c.vxorps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[target[step]]);
-                c.vcmpps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]], imm(16)); // Comparison mode is _CMP_EQ_OQ = 0
-                c.vandps(workspaceVar[target[step]], workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::SQUARE:
-                c.vmulps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
-                break;
-            case Operation::CUBE:
-                c.vmulps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[0]]);
-                c.vmulps(workspaceVar[target[step]], workspaceVar[target[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::RECIPROCAL:
-                c.vdivps(workspaceVar[target[step]], constantVar[operationConstantIndex[step]], workspaceVar[args[0]]);
-                break;
-            case Operation::ADD_CONSTANT:
-                c.vaddps(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::MULTIPLY_CONSTANT:
-                c.vmulps(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::POWER_CONSTANT:
-                generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]], powf);
-                break;
-            case Operation::MIN:
-                c.vminps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::MAX:
-                c.vmaxps(workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]]);
-                break;
-            case Operation::ABS:
-                c.vandps(workspaceVar[target[step]], workspaceVar[args[0]], constantVar[operationConstantIndex[step]]);
-                break;
-            case Operation::FLOOR:
-                c.vroundps(workspaceVar[target[step]], workspaceVar[args[0]], imm(1));
-                break;
-            case Operation::CEIL:
-                c.vroundps(workspaceVar[target[step]], workspaceVar[args[0]], imm(2));
-                break;
-            case Operation::SELECT:
-            {
-                x86::Ymm mask = c.newYmmPs();
-                c.vxorps(mask, mask, mask);
-                c.vcmpps(mask, mask, workspaceVar[args[0]], imm(0)); // Comparison mode is _CMP_EQ_OQ = 0
-                c.vblendvps(workspaceVar[target[step]], workspaceVar[args[1]], workspaceVar[args[2]], mask);
-                break;
-            }
-            default:
-                // Just invoke evaluateOperation().
-
-                for (int element = 0; element < width; element++) {
-                    for (int i = 0; i < (int) args.size(); i++) {
-                        if (element < 4)
-                            c.vshufps(argReg, workspaceVar[args[i]], workspaceVar[args[i]], imm(element));
-                        else {
-                            c.vperm2f128(argReg, workspaceVar[args[i]], workspaceVar[args[i]], imm(1));
-                            c.vshufps(argReg, argReg, argReg, imm(element-4));
-                        }
-                        c.vcvtss2sd(doubleArgReg.xmm(), doubleArgReg.xmm(), argReg.xmm());
-                        c.vmovsd(x86::ptr(argsPointer, 8*i, 0), doubleArgReg.xmm());
-                    }
-                    x86::Gp fn = c.newIntPtr();
-                    c.mov(fn, imm((void*) evaluateOperation));
-                    InvokeNode* invoke;
-                    c.invoke(&invoke, fn, FuncSignatureT<double, Operation*, double*>());
-                    invoke->setArg(0, imm(&op));
-                    invoke->setArg(1, imm(&argValues[0]));
-                    invoke->setRet(0, doubleResultReg);
-                    c.vcvtsd2ss(argReg.xmm(), argReg.xmm(), doubleResultReg.xmm());
-                    if (element > 3)
-                        c.vperm2f128(argReg, argReg, argReg, imm(0));
-                    if (element != 0)
-                        c.vshufps(argReg, argReg, argReg, imm(0));
-                    c.vblendps(workspaceVar[target[step]], workspaceVar[target[step]], argReg, 1<<element);
-                }
-        }
-    }
-    x86::Gp resultPointer = c.newIntPtr();
-    c.mov(resultPointer, imm(&workspace[workspace.size()-width]));
-    if (width == 4)
-        c.vmovdqu(x86::ptr(resultPointer, 0, 0), workspaceVar.back().xmm());
-    else
-        c.vmovdqu(x86::ptr(resultPointer, 0, 0), workspaceVar.back());
-    c.endFunc();
-    c.finalize();
-    runtime.add(&jitCode, &code);
-}
-
-void CompiledVectorExpression::generateSingleArgCall(x86::Compiler& c, x86::Ymm& dest, x86::Ymm& arg, float (*function)(float)) {
-    x86::Gp fn = c.newIntPtr();
-    c.mov(fn, imm((void*) function));
-    x86::Ymm a = c.newYmm();
-    x86::Ymm d = c.newYmm();
-    for (int element = 0; element < width; element++) {
-        if (element < 4)
-            c.vshufps(a, arg, arg, imm(element));
-        else {
-            c.vperm2f128(a, arg, arg, imm(1));
-            c.vshufps(a, a, a, imm(element-4));
-        }
-        InvokeNode* invoke;
-        c.invoke(&invoke, fn, FuncSignatureT<float, float>());
-        invoke->setArg(0, a);
-        invoke->setRet(0, d);
-        if (element > 3)
-            c.vperm2f128(d, d, d, imm(0));
-        if (element != 0)
-            c.vshufps(d, d, d, imm(0));
-        c.vblendps(dest, dest, d, 1<<element);
-    }
-}
-
-void CompiledVectorExpression::generateTwoArgCall(x86::Compiler& c, x86::Ymm& dest, x86::Ymm& arg1, x86::Ymm& arg2, float (*function)(float, float)) {
-    x86::Gp fn = c.newIntPtr();
-    c.mov(fn, imm((void*) function));
-    x86::Ymm a1 = c.newYmm();
-    x86::Ymm a2 = c.newYmm();
-    x86::Ymm d = c.newYmm();
-    for (int element = 0; element < width; element++) {
-        if (element < 4) {
-            c.vshufps(a1, arg1, arg1, imm(element));
-            c.vshufps(a2, arg2, arg2, imm(element));
-        }
-        else {
-            c.vperm2f128(a1, arg1, arg1, imm(1));
-            c.vperm2f128(a2, arg2, arg2, imm(1));
-            c.vshufps(a1, a1, a1, imm(element-4));
-            c.vshufps(a2, a2, a2, imm(element-4));
-        }
-        InvokeNode* invoke;
-        c.invoke(&invoke, fn, FuncSignatureT<float, float, float>());
-        invoke->setArg(0, a1);
-        invoke->setArg(1, a2);
-        invoke->setRet(0, d);
-        if (element > 3)
-            c.vperm2f128(d, d, d, imm(0));
-        if (element != 0)
-            c.vshufps(d, d, d, imm(0));
-        c.vblendps(dest, dest, d, 1<<element);
-    }
-}
-#endif
-#endif
diff --git a/lib/colvars/lepton/src/ExpressionProgram.cpp b/lib/colvars/lepton/src/ExpressionProgram.cpp
deleted file mode 100644
index bbbae8533f..0000000000
--- a/lib/colvars/lepton/src/ExpressionProgram.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009-2018 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "lepton/ExpressionProgram.h"
-#include "lepton/Operation.h"
-#include "lepton/ParsedExpression.h"
-
-using namespace Lepton;
-using namespace std;
-
-ExpressionProgram::ExpressionProgram() : maxArgs(0), stackSize(0) {
-}
-
-ExpressionProgram::ExpressionProgram(const ParsedExpression& expression) : maxArgs(0), stackSize(0) {
-    buildProgram(expression.getRootNode());
-    int currentStackSize = 0;
-    for (int i = 0; i < (int) operations.size(); i++) {
-        int args = operations[i]->getNumArguments();
-        if (args > maxArgs)
-            maxArgs = args;
-        currentStackSize += 1-args;
-        if (currentStackSize > stackSize)
-            stackSize = currentStackSize;
-    }
-}
-
-ExpressionProgram::~ExpressionProgram() {
-    for (int i = 0; i < (int) operations.size(); i++)
-        delete operations[i];
-}
-
-ExpressionProgram::ExpressionProgram(const ExpressionProgram& program) {
-    *this = program;
-}
-
-ExpressionProgram& ExpressionProgram::operator=(const ExpressionProgram& program) {
-    maxArgs = program.maxArgs;
-    stackSize = program.stackSize;
-    operations.resize(program.operations.size());
-    for (int i = 0; i < (int) operations.size(); i++)
-        operations[i] = program.operations[i]->clone();
-    return *this;
-}
-
-void ExpressionProgram::buildProgram(const ExpressionTreeNode& node) {
-    for (int i = (int) node.getChildren().size()-1; i >= 0; i--)
-        buildProgram(node.getChildren()[i]);
-    operations.push_back(node.getOperation().clone());
-}
-
-int ExpressionProgram::getNumOperations() const {
-    return (int) operations.size();
-}
-
-const Operation& ExpressionProgram::getOperation(int index) const {
-    return *operations[index];
-}
-
-void ExpressionProgram::setOperation(int index, Operation* operation) {
-    delete operations[index];
-    operations[index] = operation;
-}
-
-int ExpressionProgram::getStackSize() const {
-    return stackSize;
-}
-
-double ExpressionProgram::evaluate() const {
-    return evaluate(map<string, double>());
-}
-
-double ExpressionProgram::evaluate(const std::map<std::string, double>& variables) const {
-    vector<double> stack(stackSize+1);
-    int stackPointer = stackSize;
-    for (int i = 0; i < (int) operations.size(); i++) {
-        int numArgs = operations[i]->getNumArguments();
-        double result = operations[i]->evaluate(&stack[stackPointer], variables);
-        stackPointer += numArgs-1;
-        stack[stackPointer] = result;
-    }
-    return stack[stackSize-1];
-}
diff --git a/lib/colvars/lepton/src/ExpressionTreeNode.cpp b/lib/colvars/lepton/src/ExpressionTreeNode.cpp
deleted file mode 100644
index b7a376528d..0000000000
--- a/lib/colvars/lepton/src/ExpressionTreeNode.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009-2021 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "lepton/ExpressionTreeNode.h"
-#include "lepton/Exception.h"
-#include "lepton/Operation.h"
-#include <utility>
-
-using namespace Lepton;
-using namespace std;
-
-ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const vector<ExpressionTreeNode>& children) : operation(operation), children(children) {
-    if (operation->getNumArguments() != children.size())
-        throw Exception("wrong number of arguments to function: "+operation->getName());
-}
-
-ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child1, const ExpressionTreeNode& child2) : operation(operation) {
-    children.push_back(child1);
-    children.push_back(child2);
-    if (operation->getNumArguments() != children.size())
-        throw Exception("wrong number of arguments to function: "+operation->getName());
-}
-
-ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child) : operation(operation) {
-    children.push_back(child);
-    if (operation->getNumArguments() != children.size())
-        throw Exception("wrong number of arguments to function: "+operation->getName());
-}
-
-ExpressionTreeNode::ExpressionTreeNode(Operation* operation) : operation(operation) {
-    if (operation->getNumArguments() != children.size())
-        throw Exception("wrong number of arguments to function: "+operation->getName());
-}
-
-ExpressionTreeNode::ExpressionTreeNode(const ExpressionTreeNode& node) : operation(node.operation == NULL ? NULL : node.operation->clone()), children(node.getChildren()) {
-}
-
-ExpressionTreeNode::ExpressionTreeNode(ExpressionTreeNode&& node) : operation(node.operation), children(move(node.children)) {
-    node.operation = NULL;
-    node.children.clear();
-}
-
-ExpressionTreeNode::ExpressionTreeNode() : operation(NULL) {
-}
-
-ExpressionTreeNode::~ExpressionTreeNode() {
-    if (operation != NULL)
-        delete operation;
-}
-
-bool ExpressionTreeNode::operator!=(const ExpressionTreeNode& node) const {
-    if (node.getOperation() != getOperation())
-        return true;
-    if (getOperation().isSymmetric() && getChildren().size() == 2) {
-        if (getChildren()[0] == node.getChildren()[0] && getChildren()[1] == node.getChildren()[1])
-            return false;
-        if (getChildren()[0] == node.getChildren()[1] && getChildren()[1] == node.getChildren()[0])
-            return false;
-        return true;
-    }
-    for (int i = 0; i < (int) getChildren().size(); i++)
-        if (getChildren()[i] != node.getChildren()[i])
-            return true;
-    return false;
-}
-
-bool ExpressionTreeNode::operator==(const ExpressionTreeNode& node) const {
-    return !(*this != node);
-}
-
-ExpressionTreeNode& ExpressionTreeNode::operator=(const ExpressionTreeNode& node) {
-    if (operation != NULL)
-        delete operation;
-    operation = node.getOperation().clone();
-    children = node.getChildren();
-    return *this;
-}
-
-ExpressionTreeNode& ExpressionTreeNode::operator=(ExpressionTreeNode&& node) {
-    if (operation != NULL)
-        delete operation;
-    operation = node.operation;
-    children = move(node.children);
-    node.operation = NULL;
-    node.children.clear();
-    return *this;
-}
-
-const Operation& ExpressionTreeNode::getOperation() const {
-    return *operation;
-}
-
-const vector<ExpressionTreeNode>& ExpressionTreeNode::getChildren() const {
-    return children;
-}
-
-void ExpressionTreeNode::assignTags(vector<const ExpressionTreeNode*>& examples) const {
-    // Assign tag values to all nodes in a tree, such that two nodes have the same
-    // tag if and only if they (and all their children) are equal.  This is used to
-    // optimize other operations.
-
-    int numTags = examples.size();
-    for (const ExpressionTreeNode& child : getChildren())
-        child.assignTags(examples);
-    if (numTags == examples.size()) {
-        // All the children matched existing tags, so possibly this node does too.
-
-        for (int i = 0; i < examples.size(); i++) {
-            const ExpressionTreeNode& example = *examples[i];
-            bool matches = (getChildren().size() == example.getChildren().size() && getOperation() == example.getOperation());
-            for (int j = 0; matches && j < getChildren().size(); j++)
-                if (getChildren()[j].tag != example.getChildren()[j].tag)
-                    matches = false;
-            if (matches) {
-                tag = i;
-                return;
-            }
-        }
-    }
-
-    // This node does not match any previous node, so assign a new tag.
-
-    tag = examples.size();
-    examples.push_back(this);
-}
diff --git a/lib/colvars/lepton/src/MSVC_erfc.h b/lib/colvars/lepton/src/MSVC_erfc.h
deleted file mode 100644
index b1cd87a289..0000000000
--- a/lib/colvars/lepton/src/MSVC_erfc.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef LEPTON_MSVC_ERFC_H_
-#define LEPTON_MSVC_ERFC_H_
-
-/*
- * Up to version 11 (VC++ 2012), Microsoft does not support the
- * standard C99 erf() and erfc() functions so we have to fake them here.
- * These were added in version 12 (VC++ 2013), which sets _MSC_VER=1800
- * (VC11 has _MSC_VER=1700).
- */
-
-#if defined(_MSC_VER) || defined(__MINGW32__)
-#if !defined(M_PI)
-#define M_PI 3.14159265358979323846264338327950288
-#endif
-#endif
-
-#if defined(_MSC_VER)
-#if _MSC_VER <= 1700 // 1700 is VC11, 1800 is VC12
-/***************************
-*   erf.cpp
-*   author:  Steve Strand
-*   written: 29-Jan-04
-***************************/
-
-#include <cmath>
-
-static const double rel_error= 1E-12;        //calculate 12 significant figures
-//you can adjust rel_error to trade off between accuracy and speed
-//but don't ask for > 15 figures (assuming usual 52 bit mantissa in a double)
-
-static double erfc(double x);
-
-static double erf(double x)
-//erf(x) = 2/sqrt(pi)*integral(exp(-t^2),t,0,x)
-//       = 2/sqrt(pi)*[x - x^3/3 + x^5/5*2! - x^7/7*3! + ...]
-//       = 1-erfc(x)
-{
-    static const double two_sqrtpi=  1.128379167095512574;        // 2/sqrt(pi)
-    if (fabs(x) > 2.2) {
-        return 1.0 - erfc(x);        //use continued fraction when fabs(x) > 2.2
-    }
-    double sum= x, term= x, xsqr= x*x;
-    int j= 1;
-    do {
-        term*= xsqr/j;
-        sum-= term/(2*j+1);
-        ++j;
-        term*= xsqr/j;
-        sum+= term/(2*j+1);
-        ++j;
-    } while (fabs(term)/sum > rel_error);
-    return two_sqrtpi*sum;
-}
-
-
-static double erfc(double x)
-//erfc(x) = 2/sqrt(pi)*integral(exp(-t^2),t,x,inf)
-//        = exp(-x^2)/sqrt(pi) * [1/x+ (1/2)/x+ (2/2)/x+ (3/2)/x+ (4/2)/x+ ...]
-//        = 1-erf(x)
-//expression inside [] is a continued fraction so '+' means add to denominator only
-{
-    static const double one_sqrtpi=  0.564189583547756287;        // 1/sqrt(pi)
-    if (fabs(x) < 2.2) {
-        return 1.0 - erf(x);        //use series when fabs(x) < 2.2
-    }
-    // Don't look for x==0 here!
-    if (x < 0) {               //continued fraction only valid for x>0
-        return 2.0 - erfc(-x);
-    }
-    double a=1, b=x;                //last two convergent numerators
-    double c=x, d=x*x+0.5;          //last two convergent denominators
-    double q1, q2= b/d;             //last two convergents (a/c and b/d)
-    double n= 1.0, t;
-    do {
-        t= a*n+b*x;
-        a= b;
-        b= t;
-        t= c*n+d*x;
-        c= d;
-        d= t;
-        n+= 0.5;
-        q1= q2;
-        q2= b/d;
-      } while (fabs(q1-q2)/q2 > rel_error);
-    return one_sqrtpi*exp(-x*x)*q2;
-}
-
-#endif // _MSC_VER <= 1700
-#endif // _MSC_VER
-
-#endif // LEPTON_MSVC_ERFC_H_
diff --git a/lib/colvars/lepton/src/Operation.cpp b/lib/colvars/lepton/src/Operation.cpp
deleted file mode 100644
index b5a958b2f7..0000000000
--- a/lib/colvars/lepton/src/Operation.cpp
+++ /dev/null
@@ -1,425 +0,0 @@
-
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009-2021 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "lepton/Operation.h"
-#include "lepton/ExpressionTreeNode.h"
-#include "MSVC_erfc.h"
-
-using namespace Lepton;
-using namespace std;
-
-static bool isZero(const ExpressionTreeNode& node) {
-    if (node.getOperation().getId() != Operation::CONSTANT)
-        return false;
-    return dynamic_cast<const Operation::Constant&>(node.getOperation()).getValue() == 0.0;
-}
-
-double Operation::Erf::evaluate(double* args, const map<string, double>& variables) const {
-    return erf(args[0]);
-}
-
-double Operation::Erfc::evaluate(double* args, const map<string, double>& variables) const {
-    return erfc(args[0]);
-}
-
-ExpressionTreeNode Operation::Constant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    return ExpressionTreeNode(new Operation::Constant(0.0));
-}
-
-ExpressionTreeNode Operation::Variable::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (variable == name)
-        return ExpressionTreeNode(new Operation::Constant(1.0));
-    return ExpressionTreeNode(new Operation::Constant(0.0));
-}
-
-ExpressionTreeNode Operation::Custom::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (function->getNumArguments() == 0)
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    ExpressionTreeNode result;
-    bool foundTerm = false;
-    for (int i = 0; i < getNumArguments(); i++) {
-        if (!isZero(childDerivs[i])) {
-            if (foundTerm)
-                result = ExpressionTreeNode(new Operation::Add(),
-                                            result,
-                                            ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::Custom(*this, i), children), childDerivs[i]));
-            else {
-                result = ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::Custom(*this, i), children), childDerivs[i]);
-                foundTerm = true;
-            }
-        }
-    }
-    if (foundTerm)
-        return result;
-    return ExpressionTreeNode(new Operation::Constant(0.0));
-}
-
-ExpressionTreeNode Operation::Add::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return childDerivs[1];
-    if (isZero(childDerivs[1]))
-        return childDerivs[0];
-    return ExpressionTreeNode(new Operation::Add(), childDerivs[0], childDerivs[1]);
-}
-
-ExpressionTreeNode Operation::Subtract::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0])) {
-        if (isZero(childDerivs[1]))
-            return ExpressionTreeNode(new Operation::Constant(0.0));
-        return ExpressionTreeNode(new Operation::Negate(), childDerivs[1]);
-    }
-    if (isZero(childDerivs[1]))
-        return childDerivs[0];
-    return ExpressionTreeNode(new Operation::Subtract(), childDerivs[0], childDerivs[1]);
-}
-
-ExpressionTreeNode Operation::Multiply::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0])) {
-        if (isZero(childDerivs[1]))
-            return ExpressionTreeNode(new Operation::Constant(0.0));
-        return ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1]);
-    }
-    if (isZero(childDerivs[1]))
-        return ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]);
-    return ExpressionTreeNode(new Operation::Add(),
-                              ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1]),
-                              ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]));
-}
-
-ExpressionTreeNode Operation::Divide::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    ExpressionTreeNode subexp;
-    if (isZero(childDerivs[0])) {
-        if (isZero(childDerivs[1]))
-            return ExpressionTreeNode(new Operation::Constant(0.0));
-        subexp = ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1]));
-    }
-    else if (isZero(childDerivs[1]))
-        subexp = ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]);
-    else
-        subexp = ExpressionTreeNode(new Operation::Subtract(),
-                                                 ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]),
-                                                 ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1]));
-    return ExpressionTreeNode(new Operation::Divide(), subexp, ExpressionTreeNode(new Operation::Square(), children[1]));
-}
-
-ExpressionTreeNode Operation::Power::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    return ExpressionTreeNode(new Operation::Add(),
-                              ExpressionTreeNode(new Operation::Multiply(),
-                                                 ExpressionTreeNode(new Operation::Multiply(),
-                                                                    children[1],
-                                                                    ExpressionTreeNode(new Operation::Power(),
-                                                                                       children[0], ExpressionTreeNode(new Operation::AddConstant(-1.0), children[1]))),
-                                                 childDerivs[0]),
-                              ExpressionTreeNode(new Operation::Multiply(),
-                                                 ExpressionTreeNode(new Operation::Multiply(),
-                                                                    ExpressionTreeNode(new Operation::Log(), children[0]),
-                                                                    ExpressionTreeNode(new Operation::Power(), children[0], children[1])),
-                                                 childDerivs[1]));
-}
-
-ExpressionTreeNode Operation::Negate::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Negate(), childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Sqrt::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::MultiplyConstant(0.5),
-                                                 ExpressionTreeNode(new Operation::Reciprocal(),
-                                                                    ExpressionTreeNode(new Operation::Sqrt(), children[0]))),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Exp::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Exp(), children[0]),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Log::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Reciprocal(), children[0]),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Sin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Cos(), children[0]),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Cos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Negate(),
-                                                 ExpressionTreeNode(new Operation::Sin(), children[0])),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Sec::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Multiply(),
-                                                 ExpressionTreeNode(new Operation::Sec(), children[0]),
-                                                 ExpressionTreeNode(new Operation::Tan(), children[0])),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Csc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Negate(),
-                                                 ExpressionTreeNode(new Operation::Multiply(),
-                                                                    ExpressionTreeNode(new Operation::Csc(), children[0]),
-                                                                    ExpressionTreeNode(new Operation::Cot(), children[0]))),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Tan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Square(),
-                                                 ExpressionTreeNode(new Operation::Sec(), children[0])),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Cot::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Negate(),
-                                                 ExpressionTreeNode(new Operation::Square(),
-                                                                    ExpressionTreeNode(new Operation::Csc(), children[0]))),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Asin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Reciprocal(),
-                                                 ExpressionTreeNode(new Operation::Sqrt(),
-                                                                    ExpressionTreeNode(new Operation::Subtract(),
-                                                                                       ExpressionTreeNode(new Operation::Constant(1.0)),
-                                                                                       ExpressionTreeNode(new Operation::Square(), children[0])))),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Acos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Negate(),
-                                                 ExpressionTreeNode(new Operation::Reciprocal(),
-                                                                    ExpressionTreeNode(new Operation::Sqrt(),
-                                                                                       ExpressionTreeNode(new Operation::Subtract(),
-                                                                                                          ExpressionTreeNode(new Operation::Constant(1.0)),
-                                                                                                          ExpressionTreeNode(new Operation::Square(), children[0]))))),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Atan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Reciprocal(),
-                                                 ExpressionTreeNode(new Operation::AddConstant(1.0),
-                                                                    ExpressionTreeNode(new Operation::Square(), children[0]))),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Atan2::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    return ExpressionTreeNode(new Operation::Divide(),
-                              ExpressionTreeNode(new Operation::Subtract(),
-                                                 ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]),
-                                                 ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1])),
-                              ExpressionTreeNode(new Operation::Add(),
-                                                 ExpressionTreeNode(new Operation::Square(), children[0]),
-                                                 ExpressionTreeNode(new Operation::Square(), children[1])));
-}
-
-ExpressionTreeNode Operation::Sinh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Cosh(),
-                                                 children[0]),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Cosh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Sinh(),
-                                                 children[0]),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Tanh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Subtract(),
-                                                 ExpressionTreeNode(new Operation::Constant(1.0)),
-                                                 ExpressionTreeNode(new Operation::Square(),
-                                                                    ExpressionTreeNode(new Operation::Tanh(), children[0]))),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Erf::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Multiply(),
-                                                 ExpressionTreeNode(new Operation::Constant(2.0/sqrt(M_PI))),
-                                                 ExpressionTreeNode(new Operation::Exp(),
-                                                                    ExpressionTreeNode(new Operation::Negate(),
-                                                                                       ExpressionTreeNode(new Operation::Square(), children[0])))),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Erfc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Multiply(),
-                                                 ExpressionTreeNode(new Operation::Constant(-2.0/sqrt(M_PI))),
-                                                 ExpressionTreeNode(new Operation::Exp(),
-                                                                    ExpressionTreeNode(new Operation::Negate(),
-                                                                                       ExpressionTreeNode(new Operation::Square(), children[0])))),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Step::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    return ExpressionTreeNode(new Operation::Constant(0.0));
-}
-
-ExpressionTreeNode Operation::Delta::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    return ExpressionTreeNode(new Operation::Constant(0.0));
-}
-
-ExpressionTreeNode Operation::Square::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::MultiplyConstant(2.0),
-                                                 children[0]),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Cube::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::MultiplyConstant(3.0),
-                                                 ExpressionTreeNode(new Operation::Square(), children[0])),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Reciprocal::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::Negate(),
-                                                 ExpressionTreeNode(new Operation::Reciprocal(),
-                                                                    ExpressionTreeNode(new Operation::Square(), children[0]))),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::AddConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    return childDerivs[0];
-}
-
-ExpressionTreeNode Operation::MultiplyConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::MultiplyConstant(value),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::PowerConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              ExpressionTreeNode(new Operation::MultiplyConstant(value),
-                                                 ExpressionTreeNode(new Operation::PowerConstant(value-1),
-                                                                    children[0])),
-                              childDerivs[0]);
-}
-
-ExpressionTreeNode Operation::Min::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    ExpressionTreeNode step(new Operation::Step(),
-                            ExpressionTreeNode(new Operation::Subtract(), children[0], children[1]));
-    return ExpressionTreeNode(new Operation::Select(), {step, childDerivs[1], childDerivs[0]});
-}
-
-ExpressionTreeNode Operation::Max::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    ExpressionTreeNode step(new Operation::Step(),
-                            ExpressionTreeNode(new Operation::Subtract(), children[0], children[1]));
-    return ExpressionTreeNode(new Operation::Select(), {step, childDerivs[0], childDerivs[1]});
-}
-
-ExpressionTreeNode Operation::Abs::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    if (isZero(childDerivs[0]))
-        return ExpressionTreeNode(new Operation::Constant(0.0));
-    ExpressionTreeNode step(new Operation::Step(), children[0]);
-    return ExpressionTreeNode(new Operation::Multiply(),
-                              childDerivs[0],
-                              ExpressionTreeNode(new Operation::AddConstant(-1),
-                                                 ExpressionTreeNode(new Operation::MultiplyConstant(2), step)));
-}
-
-ExpressionTreeNode Operation::Floor::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    return ExpressionTreeNode(new Operation::Constant(0.0));
-}
-
-ExpressionTreeNode Operation::Ceil::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    return ExpressionTreeNode(new Operation::Constant(0.0));
-}
-
-ExpressionTreeNode Operation::Select::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
-    return ExpressionTreeNode(new Operation::Select(), {children[0], childDerivs[1], childDerivs[2]});
-}
diff --git a/lib/colvars/lepton/src/ParsedExpression.cpp b/lib/colvars/lepton/src/ParsedExpression.cpp
deleted file mode 100644
index ea2cf707d6..0000000000
--- a/lib/colvars/lepton/src/ParsedExpression.cpp
+++ /dev/null
@@ -1,422 +0,0 @@
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009-2022 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "lepton/ParsedExpression.h"
-#include "lepton/CompiledExpression.h"
-#include "lepton/CompiledVectorExpression.h"
-#include "lepton/ExpressionProgram.h"
-#include "lepton/Operation.h"
-#include <limits>
-#include <vector>
-
-using namespace Lepton;
-using namespace std;
-
-ParsedExpression::ParsedExpression() : rootNode(ExpressionTreeNode()) {
-}
-
-ParsedExpression::ParsedExpression(const ExpressionTreeNode& rootNode) : rootNode(rootNode) {
-}
-
-const ExpressionTreeNode& ParsedExpression::getRootNode() const {
-    if (&rootNode.getOperation() == NULL)
-        throw Exception("Illegal call to an initialized ParsedExpression");
-    return rootNode;
-}
-
-double ParsedExpression::evaluate() const {
-    return evaluate(getRootNode(), map<string, double>());
-}
-
-double ParsedExpression::evaluate(const map<string, double>& variables) const {
-    return evaluate(getRootNode(), variables);
-}
-
-double ParsedExpression::evaluate(const ExpressionTreeNode& node, const map<string, double>& variables) {
-    int numArgs = (int) node.getChildren().size();
-    vector<double> args(max(numArgs, 1));
-    for (int i = 0; i < numArgs; i++)
-        args[i] = evaluate(node.getChildren()[i], variables);
-    return node.getOperation().evaluate(&args[0], variables);
-}
-
-ParsedExpression ParsedExpression::optimize() const {
-    ExpressionTreeNode result = getRootNode();
-    vector<const ExpressionTreeNode*> examples;
-    result.assignTags(examples);
-    map<int, ExpressionTreeNode> nodeCache;
-    result = precalculateConstantSubexpressions(result, nodeCache);
-    while (true) {
-        examples.clear();
-        result.assignTags(examples);
-        nodeCache.clear();
-        ExpressionTreeNode simplified = substituteSimplerExpression(result, nodeCache);
-        if (simplified == result)
-            break;
-        result = simplified;
-    }
-    return ParsedExpression(result);
-}
-
-ParsedExpression ParsedExpression::optimize(const map<string, double>& variables) const {
-    ExpressionTreeNode result = preevaluateVariables(getRootNode(), variables);
-    vector<const ExpressionTreeNode*> examples;
-    result.assignTags(examples);
-    map<int, ExpressionTreeNode> nodeCache;
-    result = precalculateConstantSubexpressions(result, nodeCache);
-    while (true) {
-        examples.clear();
-        result.assignTags(examples);
-        nodeCache.clear();
-        ExpressionTreeNode simplified = substituteSimplerExpression(result, nodeCache);
-        if (simplified == result)
-            break;
-        result = simplified;
-    }
-    return ParsedExpression(result);
-}
-
-ExpressionTreeNode ParsedExpression::preevaluateVariables(const ExpressionTreeNode& node, const map<string, double>& variables) {
-    if (node.getOperation().getId() == Operation::VARIABLE) {
-        const Operation::Variable& var = dynamic_cast<const Operation::Variable&>(node.getOperation());
-        map<string, double>::const_iterator iter = variables.find(var.getName());
-        if (iter == variables.end())
-            return node;
-        return ExpressionTreeNode(new Operation::Constant(iter->second));
-    }
-    vector<ExpressionTreeNode> children(node.getChildren().size());
-    for (int i = 0; i < (int) children.size(); i++)
-        children[i] = preevaluateVariables(node.getChildren()[i], variables);
-    return ExpressionTreeNode(node.getOperation().clone(), children);
-}
-
-ExpressionTreeNode ParsedExpression::precalculateConstantSubexpressions(const ExpressionTreeNode& node, map<int, ExpressionTreeNode>& nodeCache) {
-    auto cached = nodeCache.find(node.tag);
-    if (cached != nodeCache.end())
-        return cached->second;
-    vector<ExpressionTreeNode> children(node.getChildren().size());
-    for (int i = 0; i < (int) children.size(); i++)
-        children[i] = precalculateConstantSubexpressions(node.getChildren()[i], nodeCache);
-    ExpressionTreeNode result = ExpressionTreeNode(node.getOperation().clone(), children);
-    if (node.getOperation().getId() == Operation::VARIABLE || node.getOperation().getId() == Operation::CUSTOM) {
-        nodeCache[node.tag] = result;
-        return result;
-    }
-    for (int i = 0; i < (int) children.size(); i++)
-        if (children[i].getOperation().getId() != Operation::CONSTANT) {
-            nodeCache[node.tag] = result;
-            return result;
-        }
-    result = ExpressionTreeNode(new Operation::Constant(evaluate(result, map<string, double>())));
-    nodeCache[node.tag] = result;
-    return result;
-}
-
-ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const ExpressionTreeNode& node, map<int, ExpressionTreeNode>& nodeCache) {
-    vector<ExpressionTreeNode> children(node.getChildren().size());
-    for (int i = 0; i < (int) children.size(); i++) {
-        const ExpressionTreeNode& child = node.getChildren()[i];
-        auto cached = nodeCache.find(child.tag);
-        if (cached == nodeCache.end()) {
-            children[i] = substituteSimplerExpression(child, nodeCache);
-            nodeCache[child.tag] = children[i];
-        }
-        else
-            children[i] = cached->second;
-    }
-
-    // Collect some info on constant expressions in children
-    bool first_const = children.size() > 0 && isConstant(children[0]); // is first child constant?
-    bool second_const = children.size() > 1 && isConstant(children[1]); ; // is second child constant?
-    double first, second; // if yes, value of first and second child
-    if (first_const)
-        first = getConstantValue(children[0]);
-    if (second_const)
-        second = getConstantValue(children[1]);
-
-    switch (node.getOperation().getId()) {
-        case Operation::ADD:
-        {
-            if (first_const) {
-                if (first == 0.0) { // Add 0
-                    return children[1];
-                } else { // Add a constant
-                    return ExpressionTreeNode(new Operation::AddConstant(first), children[1]);
-                }
-            }
-            if (second_const) {
-                if (second == 0.0) { // Add 0
-                    return children[0];
-                } else { // Add a constant
-                    return ExpressionTreeNode(new Operation::AddConstant(second), children[0]);
-                }
-            }
-            if (children[1].getOperation().getId() == Operation::NEGATE) // a+(-b) = a-b
-                return ExpressionTreeNode(new Operation::Subtract(), children[0], children[1].getChildren()[0]);
-            if (children[0].getOperation().getId() == Operation::NEGATE) // (-a)+b = b-a
-                return ExpressionTreeNode(new Operation::Subtract(), children[1], children[0].getChildren()[0]);
-            break;
-        }
-        case Operation::SUBTRACT:
-        {
-            if (children[0] == children[1])
-                return ExpressionTreeNode(new Operation::Constant(0.0)); // Subtracting anything from itself is 0
-            if (first_const) {
-                if (first == 0.0) // Subtract from 0
-                    return ExpressionTreeNode(new Operation::Negate(), children[1]);
-            }
-            if (second_const) {
-                if (second == 0.0) { // Subtract 0
-                    return children[0];
-                } else { // Subtract a constant
-                    return ExpressionTreeNode(new Operation::AddConstant(-second), children[0]);
-                }
-            }
-            if (children[1].getOperation().getId() == Operation::NEGATE) // a-(-b) = a+b
-                return ExpressionTreeNode(new Operation::Add(), children[0], children[1].getChildren()[0]);
-            break;
-        }
-        case Operation::MULTIPLY:
-        {
-            if ((first_const && first == 0.0) || (second_const && second == 0.0)) // Multiply by 0
-                return ExpressionTreeNode(new Operation::Constant(0.0));
-            if (first_const && first == 1.0) // Multiply by 1
-                return children[1];
-            if (second_const && second == 1.0) // Multiply by 1
-                return children[0];
-            if (first_const) { // Multiply by a constant
-                if (children[1].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine two multiplies into a single one
-                    return ExpressionTreeNode(new Operation::MultiplyConstant(first*dynamic_cast<const Operation::MultiplyConstant*>(&children[1].getOperation())->getValue()), children[1].getChildren()[0]);
-                return ExpressionTreeNode(new Operation::MultiplyConstant(first), children[1]);
-            }
-            if (second_const) { // Multiply by a constant
-                if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine two multiplies into a single one
-                    return ExpressionTreeNode(new Operation::MultiplyConstant(second*dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]);
-                return ExpressionTreeNode(new Operation::MultiplyConstant(second), children[0]);
-            }
-            if (children[0].getOperation().getId() == Operation::NEGATE && children[1].getOperation().getId() == Operation::NEGATE) // The two negations cancel
-                return ExpressionTreeNode(new Operation::Multiply(), children[0].getChildren()[0], children[1].getChildren()[0]);
-            if (children[0].getOperation().getId() == Operation::NEGATE && children[1].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Negate the constant
-                return ExpressionTreeNode(new Operation::Multiply(), children[0].getChildren()[0], ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[1].getOperation())->getValue()), children[1].getChildren()[0]));
-            if (children[1].getOperation().getId() == Operation::NEGATE && children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Negate the constant
-                return ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]), children[1].getChildren()[0]);
-            if (children[0].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further
-                return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Multiply(), children[0].getChildren()[0], children[1]));
-            if (children[1].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further
-                return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Multiply(), children[0], children[1].getChildren()[0]));
-            if (children[1].getOperation().getId() == Operation::RECIPROCAL) // a*(1/b) = a/b
-                return ExpressionTreeNode(new Operation::Divide(), children[0], children[1].getChildren()[0]);
-            if (children[0].getOperation().getId() == Operation::RECIPROCAL) // (1/a)*b = b/a
-                return ExpressionTreeNode(new Operation::Divide(), children[1], children[0].getChildren()[0]);
-            if (children[0] == children[1])
-                return ExpressionTreeNode(new Operation::Square(), children[0]); // x*x = square(x)
-            if (children[0].getOperation().getId() == Operation::SQUARE && children[0].getChildren()[0] == children[1])
-                return ExpressionTreeNode(new Operation::Cube(), children[1]); // x*x*x = cube(x)
-            if (children[1].getOperation().getId() == Operation::SQUARE && children[1].getChildren()[0] == children[0])
-                return ExpressionTreeNode(new Operation::Cube(), children[0]); // x*x*x = cube(x)
-            break;
-        }
-        case Operation::DIVIDE:
-        {
-            if (children[0] == children[1])
-                return ExpressionTreeNode(new Operation::Constant(1.0)); // Dividing anything from itself is 0
-            if (first_const && first == 0.0) // 0 divided by something
-                return ExpressionTreeNode(new Operation::Constant(0.0));
-            if (first_const && first == 1.0) // 1 divided by something
-                return ExpressionTreeNode(new Operation::Reciprocal(), children[1]);
-            if (second_const && second == 1.0) // Divide by 1
-                return children[0];
-            if (second_const) {
-                if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine a multiply and a divide into one multiply
-                    return ExpressionTreeNode(new Operation::MultiplyConstant(dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()/second), children[0].getChildren()[0]);
-                return ExpressionTreeNode(new Operation::MultiplyConstant(1.0/second), children[0]); // Replace a divide with a multiply
-            }
-            if (children[0].getOperation().getId() == Operation::NEGATE && children[1].getOperation().getId() == Operation::NEGATE) // The two negations cancel
-                return ExpressionTreeNode(new Operation::Divide(), children[0].getChildren()[0], children[1].getChildren()[0]);
-            if (children[1].getOperation().getId() == Operation::NEGATE && children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Negate the constant
-                return ExpressionTreeNode(new Operation::Divide(), ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]), children[1].getChildren()[0]);
-            if (children[0].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further
-                return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Divide(), children[0].getChildren()[0], children[1]));
-            if (children[1].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further
-                return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Divide(), children[0], children[1].getChildren()[0]));
-            if (children[1].getOperation().getId() == Operation::RECIPROCAL) // a/(1/b) = a*b
-                return ExpressionTreeNode(new Operation::Multiply(), children[0], children[1].getChildren()[0]);
-            break;
-        }
-        case Operation::POWER:
-        {
-            if (first_const && first == 0.0) // 0 to any power is 0
-                return ExpressionTreeNode(new Operation::Constant(0.0));
-            if (first_const && first == 1.0) // 1 to any power is 1
-                return ExpressionTreeNode(new Operation::Constant(1.0));
-            if (second_const) { // Constant exponent
-                if (second == 0.0) // x^0 = 1
-                    return ExpressionTreeNode(new Operation::Constant(1.0));
-                if (second == 1.0) // x^1 = x
-                    return children[0];
-                if (second == -1.0) // x^-1 = recip(x)
-                    return ExpressionTreeNode(new Operation::Reciprocal(), children[0]);
-                if (second == 2.0) // x^2 = square(x)
-                    return ExpressionTreeNode(new Operation::Square(), children[0]);
-                if (second == 3.0) // x^3 = cube(x)
-                    return ExpressionTreeNode(new Operation::Cube(), children[0]);
-                if (second == 0.5) // x^0.5 = sqrt(x)
-                    return ExpressionTreeNode(new Operation::Sqrt(), children[0]);
-                // Constant power
-                return ExpressionTreeNode(new Operation::PowerConstant(second), children[0]);
-            }
-            break;
-        }
-        case Operation::NEGATE:
-        {
-            if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine a multiply and a negate into a single multiply
-                return ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]);
-            if (first_const) // Negate a constant
-                return ExpressionTreeNode(new Operation::Constant(-first));
-            if (children[0].getOperation().getId() == Operation::NEGATE) // The two negations cancel
-                return children[0].getChildren()[0];
-            break;
-        }
-        case Operation::MULTIPLY_CONSTANT:
-        {
-            if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine two multiplies into a single one
-                return ExpressionTreeNode(new Operation::MultiplyConstant(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()*dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]);
-            if (first_const) // Multiply two constants
-                return ExpressionTreeNode(new Operation::Constant(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()*getConstantValue(children[0])));
-            if (children[0].getOperation().getId() == Operation::NEGATE) // Combine a multiply and a negate into a single multiply
-                return ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()), children[0].getChildren()[0]);
-            break;
-        }
-        case Operation::SQRT:
-        {
-            if (children[0].getOperation().getId() == Operation::SQUARE) // sqrt(square(x)) = abs(x)
-                return ExpressionTreeNode(new Operation::Abs(), children[0].getChildren()[0]);
-        }
-        case Operation::SQUARE:
-        {
-            if (children[0].getOperation().getId() == Operation::SQRT) // square(sqrt(x)) = x
-                return children[0].getChildren()[0];
-        }
-        default:
-        {
-            // If operation ID is not one of the above,
-            // we don't substitute a simpler expression.
-            break;
-        }
-
-    }
-    return ExpressionTreeNode(node.getOperation().clone(), children);
-}
-
-ParsedExpression ParsedExpression::differentiate(const string& variable) const {
-    vector<const ExpressionTreeNode*> examples;
-    getRootNode().assignTags(examples);
-    map<int, ExpressionTreeNode> nodeCache;
-    return differentiate(getRootNode(), variable, nodeCache);
-}
-
-ExpressionTreeNode ParsedExpression::differentiate(const ExpressionTreeNode& node, const string& variable, map<int, ExpressionTreeNode>& nodeCache) {
-    auto cached = nodeCache.find(node.tag);
-    if (cached != nodeCache.end())
-        return cached->second;
-    vector<ExpressionTreeNode> childDerivs(node.getChildren().size());
-    for (int i = 0; i < (int) childDerivs.size(); i++)
-        childDerivs[i] = differentiate(node.getChildren()[i], variable, nodeCache);
-    ExpressionTreeNode result = node.getOperation().differentiate(node.getChildren(), childDerivs, variable);
-    nodeCache[node.tag] = result;
-    return result;
-}
-
-bool ParsedExpression::isConstant(const ExpressionTreeNode& node) {
-    return (node.getOperation().getId() == Operation::CONSTANT);
-}
-
-double ParsedExpression::getConstantValue(const ExpressionTreeNode& node) {
-    if (node.getOperation().getId() != Operation::CONSTANT) {
-        throw Exception("getConstantValue called on a non-constant ExpressionNode");
-    }
-    return dynamic_cast<const Operation::Constant&>(node.getOperation()).getValue();
-}
-
-ExpressionProgram ParsedExpression::createProgram() const {
-    return ExpressionProgram(*this);
-}
-
-CompiledExpression ParsedExpression::createCompiledExpression() const {
-    return CompiledExpression(*this);
-}
-
-CompiledVectorExpression ParsedExpression::createCompiledVectorExpression(int width) const {
-    return CompiledVectorExpression(*this, width);
-}
-
-ParsedExpression ParsedExpression::renameVariables(const map<string, string>& replacements) const {
-    return ParsedExpression(renameNodeVariables(getRootNode(), replacements));
-}
-
-ExpressionTreeNode ParsedExpression::renameNodeVariables(const ExpressionTreeNode& node, const map<string, string>& replacements) {
-    if (node.getOperation().getId() == Operation::VARIABLE) {
-        map<string, string>::const_iterator replace = replacements.find(node.getOperation().getName());
-        if (replace != replacements.end())
-            return ExpressionTreeNode(new Operation::Variable(replace->second));
-    }
-    vector<ExpressionTreeNode> children;
-    for (int i = 0; i < (int) node.getChildren().size(); i++)
-        children.push_back(renameNodeVariables(node.getChildren()[i], replacements));
-    return ExpressionTreeNode(node.getOperation().clone(), children);
-}
-
-ostream& Lepton::operator<<(ostream& out, const ExpressionTreeNode& node) {
-    if (node.getOperation().isInfixOperator() && node.getChildren().size() == 2) {
-        out << "(" << node.getChildren()[0] << ")" << node.getOperation().getName() << "(" << node.getChildren()[1] << ")";
-    }
-    else if (node.getOperation().isInfixOperator() && node.getChildren().size() == 1) {
-        out << "(" << node.getChildren()[0] << ")" << node.getOperation().getName();
-    }
-    else {
-        out << node.getOperation().getName();
-        if (node.getChildren().size() > 0) {
-            out << "(";
-            for (int i = 0; i < (int) node.getChildren().size(); i++) {
-                if (i > 0)
-                    out << ", ";
-                out << node.getChildren()[i];
-            }
-            out << ")";
-        }
-    }
-    return out;
-}
-
-ostream& Lepton::operator<<(ostream& out, const ParsedExpression& exp) {
-    out << exp.getRootNode();
-    return out;
-}
diff --git a/lib/colvars/lepton/src/Parser.cpp b/lib/colvars/lepton/src/Parser.cpp
deleted file mode 100644
index e284add258..0000000000
--- a/lib/colvars/lepton/src/Parser.cpp
+++ /dev/null
@@ -1,409 +0,0 @@
-/* -------------------------------------------------------------------------- *
- *                                   Lepton                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the Lepton expression parser originating from              *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "lepton/Parser.h"
-#include "lepton/CustomFunction.h"
-#include "lepton/Exception.h"
-#include "lepton/ExpressionTreeNode.h"
-#include "lepton/Operation.h"
-#include "lepton/ParsedExpression.h"
-#include <cctype>
-#include <iostream>
-
-using namespace Lepton;
-using namespace std;
-
-static const string Digits = "0123456789";
-static const string Operators = "+-*/^";
-static const bool LeftAssociative[] = {true, true, true, true, false};
-static const int Precedence[] = {0, 0, 1, 1, 3};
-static const Operation::Id OperationId[] = {Operation::ADD, Operation::SUBTRACT, Operation::MULTIPLY, Operation::DIVIDE, Operation::POWER};
-
-class Lepton::ParseToken {
-public:
-    enum Type {Number, Operator, Variable, Function, LeftParen, RightParen, Comma, Whitespace};
-
-    ParseToken(string text, Type type) : text(text), type(type) {
-    }
-    const string& getText() const {
-        return text;
-    }
-    Type getType() const {
-        return type;
-    }
-private:
-    string text;
-    Type type;
-};
-
-string Parser::trim(const string& expression) {
-    // Remove leading and trailing spaces.
-
-    int start, end;
-    for (start = 0; start < (int) expression.size() && isspace(expression[start]); start++)
-        ;
-    for (end = (int) expression.size()-1; end > start && isspace(expression[end]); end--)
-        ;
-    if (start == end && isspace(expression[end]))
-        return "";
-    return expression.substr(start, end-start+1);
-}
-
-ParseToken Parser::getNextToken(const string& expression, int start) {
-    char c = expression[start];
-    if (c == '(')
-        return ParseToken("(", ParseToken::LeftParen);
-    if (c == ')')
-        return ParseToken(")", ParseToken::RightParen);
-    if (c == ',')
-        return ParseToken(",", ParseToken::Comma);
-    if (Operators.find(c) != string::npos)
-        return ParseToken(string(1, c), ParseToken::Operator);
-    if (isspace(c)) {
-        // White space
-
-        for (int pos = start+1; pos < (int) expression.size(); pos++) {
-            if (!isspace(expression[pos]))
-                return ParseToken(expression.substr(start, pos-start), ParseToken::Whitespace);
-        }
-        return ParseToken(expression.substr(start, string::npos), ParseToken::Whitespace);
-    }
-    if (c == '.' || Digits.find(c) != string::npos) {
-        // A number
-
-        bool foundDecimal = (c == '.');
-        bool foundExp = false;
-        int pos;
-        for (pos = start+1; pos < (int) expression.size(); pos++) {
-            c = expression[pos];
-            if (Digits.find(c) != string::npos)
-                continue;
-            if (c == '.' && !foundDecimal) {
-                foundDecimal = true;
-                continue;
-            }
-            if ((c == 'e' || c == 'E') && !foundExp) {
-                foundExp = true;
-                if (pos < (int) expression.size()-1 && (expression[pos+1] == '-' || expression[pos+1] == '+'))
-                    pos++;
-                continue;
-            }
-            break;
-        }
-        return ParseToken(expression.substr(start, pos-start), ParseToken::Number);
-    }
-
-    // A variable, function, or left parenthesis
-
-    for (int pos = start; pos < (int) expression.size(); pos++) {
-        c = expression[pos];
-        if (c == '(')
-            return ParseToken(expression.substr(start, pos-start+1), ParseToken::Function);
-        if (Operators.find(c) != string::npos || c == ',' || c == ')' || isspace(c))
-            return ParseToken(expression.substr(start, pos-start), ParseToken::Variable);
-    }
-    return ParseToken(expression.substr(start, string::npos), ParseToken::Variable);
-}
-
-vector<ParseToken> Parser::tokenize(const string& expression) {
-    vector<ParseToken> tokens;
-    int pos = 0;
-    while (pos < (int) expression.size()) {
-        ParseToken token = getNextToken(expression, pos);
-        if (token.getType() != ParseToken::Whitespace)
-            tokens.push_back(token);
-        pos += (int) token.getText().size();
-    }
-    return tokens;
-}
-
-ParsedExpression Parser::parse(const string& expression) {
-    return parse(expression, map<string, CustomFunction*>());
-}
-
-ParsedExpression Parser::parse(const string& expression, const map<string, CustomFunction*>& customFunctions) {
-    try {
-        // First split the expression into subexpressions.
-
-        string primaryExpression = expression;
-        vector<string> subexpressions;
-        while (true) {
-            string::size_type pos = primaryExpression.find_last_of(';');
-            if (pos == string::npos)
-                break;
-            string sub = trim(primaryExpression.substr(pos+1));
-            if (sub.size() > 0)
-                subexpressions.push_back(sub);
-            primaryExpression = primaryExpression.substr(0, pos);
-        }
-
-        // Parse the subexpressions.
-
-        map<string, ExpressionTreeNode> subexpDefs;
-        for (int i = 0; i < (int) subexpressions.size(); i++) {
-            string::size_type equalsPos = subexpressions[i].find('=');
-            if (equalsPos == string::npos)
-                throw Exception("subexpression does not specify a name");
-            string name = trim(subexpressions[i].substr(0, equalsPos));
-            if (name.size() == 0)
-                throw Exception("subexpression does not specify a name");
-            vector<ParseToken> tokens = tokenize(subexpressions[i].substr(equalsPos+1));
-            int pos = 0;
-            subexpDefs[name] = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
-            if (pos != tokens.size())
-                throw Exception("unexpected text at end of subexpression: "+tokens[pos].getText());
-        }
-
-        // Now parse the primary expression.
-
-        vector<ParseToken> tokens = tokenize(primaryExpression);
-        int pos = 0;
-        ExpressionTreeNode result = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
-        if (pos != tokens.size())
-            throw Exception("unexpected text at end of expression: "+tokens[pos].getText());
-        return ParsedExpression(result);
-    }
-    catch (Exception& ex) {
-        throw Exception("Parse error in expression \""+expression+"\": "+ex.what());
-    }
-}
-
-ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int& pos, const map<string, CustomFunction*>& customFunctions,
-            const map<string, ExpressionTreeNode>& subexpressionDefs, int precedence) {
-    if (pos == tokens.size())
-        throw Exception("unexpected end of expression");
-
-    // Parse the next value (number, variable, function, parenthesized expression)
-
-    ParseToken token = tokens[pos];
-    ExpressionTreeNode result;
-    if (token.getType() == ParseToken::Number) {
-        double value;
-        stringstream(token.getText()) >> value;
-        result = ExpressionTreeNode(new Operation::Constant(value));
-        pos++;
-    }
-    else if (token.getType() == ParseToken::Variable) {
-        map<string, ExpressionTreeNode>::const_iterator subexp = subexpressionDefs.find(token.getText());
-        if (subexp == subexpressionDefs.end()) {
-            Operation* op = new Operation::Variable(token.getText());
-            result = ExpressionTreeNode(op);
-        }
-        else
-            result = subexp->second;
-        pos++;
-    }
-    else if (token.getType() == ParseToken::LeftParen) {
-        pos++;
-        result = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0);
-        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
-            throw Exception("unbalanced parentheses");
-        pos++;
-    }
-    else if (token.getType() == ParseToken::Function) {
-        pos++;
-        vector<ExpressionTreeNode> args;
-        bool moreArgs;
-        do {
-            args.push_back(parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0));
-            moreArgs = (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Comma);
-            if (moreArgs)
-                pos++;
-        } while (moreArgs);
-        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
-            throw Exception("unbalanced parentheses");
-        pos++;
-        Operation* op = getFunctionOperation(token.getText(), customFunctions);
-        try {
-            result = ExpressionTreeNode(op, args);
-        }
-        catch (...) {
-            delete op;
-            throw;
-        }
-    }
-    else if (token.getType() == ParseToken::Operator && token.getText() == "-") {
-        pos++;
-        ExpressionTreeNode toNegate = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 2);
-        result = ExpressionTreeNode(new Operation::Negate(), toNegate);
-    }
-    else
-        throw Exception("unexpected token: "+token.getText());
-
-    // Now deal with the next binary operator.
-
-    while (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Operator) {
-        token = tokens[pos];
-        int opIndex = (int) Operators.find(token.getText());
-        int opPrecedence = Precedence[opIndex];
-        if (opPrecedence < precedence)
-            return result;
-        pos++;
-        ExpressionTreeNode arg = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, LeftAssociative[opIndex] ? opPrecedence+1 : opPrecedence);
-        Operation* op = getOperatorOperation(token.getText());
-        try {
-            result = ExpressionTreeNode(op, result, arg);
-        }
-        catch (...) {
-            delete op;
-            throw;
-        }
-    }
-    return result;
-}
-
-Operation* Parser::getOperatorOperation(const std::string& name) {
-    switch (OperationId[Operators.find(name)]) {
-        case Operation::ADD:
-            return new Operation::Add();
-        case Operation::SUBTRACT:
-            return new Operation::Subtract();
-        case Operation::MULTIPLY:
-            return new Operation::Multiply();
-        case Operation::DIVIDE:
-            return new Operation::Divide();
-        case Operation::POWER:
-            return new Operation::Power();
-        default:
-            throw Exception("unknown operator");
-    }
-}
-
-Operation* Parser::getFunctionOperation(const std::string& name, const map<string, CustomFunction*>& customFunctions) {
-
-    static map<string, Operation::Id> opMap;
-    if (opMap.size() == 0) {
-        opMap["sqrt"] = Operation::SQRT;
-        opMap["exp"] = Operation::EXP;
-        opMap["log"] = Operation::LOG;
-        opMap["sin"] = Operation::SIN;
-        opMap["cos"] = Operation::COS;
-        opMap["sec"] = Operation::SEC;
-        opMap["csc"] = Operation::CSC;
-        opMap["tan"] = Operation::TAN;
-        opMap["cot"] = Operation::COT;
-        opMap["asin"] = Operation::ASIN;
-        opMap["acos"] = Operation::ACOS;
-        opMap["atan"] = Operation::ATAN;
-        opMap["atan2"] = Operation::ATAN2;
-        opMap["sinh"] = Operation::SINH;
-        opMap["cosh"] = Operation::COSH;
-        opMap["tanh"] = Operation::TANH;
-        opMap["erf"] = Operation::ERF;
-        opMap["erfc"] = Operation::ERFC;
-        opMap["step"] = Operation::STEP;
-        opMap["delta"] = Operation::DELTA;
-        opMap["square"] = Operation::SQUARE;
-        opMap["cube"] = Operation::CUBE;
-        opMap["recip"] = Operation::RECIPROCAL;
-        opMap["min"] = Operation::MIN;
-        opMap["max"] = Operation::MAX;
-        opMap["abs"] = Operation::ABS;
-        opMap["floor"] = Operation::FLOOR;
-        opMap["ceil"] = Operation::CEIL;
-        opMap["select"] = Operation::SELECT;
-    }
-    string trimmed = name.substr(0, name.size()-1);
-
-    // First check custom functions.
-
-    map<string, CustomFunction*>::const_iterator custom = customFunctions.find(trimmed);
-    if (custom != customFunctions.end())
-        return new Operation::Custom(trimmed, custom->second->clone());
-
-    // Now try standard functions.
-
-    map<string, Operation::Id>::const_iterator iter = opMap.find(trimmed);
-    if (iter == opMap.end())
-        throw Exception("unknown function: "+trimmed);
-    switch (iter->second) {
-        case Operation::SQRT:
-            return new Operation::Sqrt();
-        case Operation::EXP:
-            return new Operation::Exp();
-        case Operation::LOG:
-            return new Operation::Log();
-        case Operation::SIN:
-            return new Operation::Sin();
-        case Operation::COS:
-            return new Operation::Cos();
-        case Operation::SEC:
-            return new Operation::Sec();
-        case Operation::CSC:
-            return new Operation::Csc();
-        case Operation::TAN:
-            return new Operation::Tan();
-        case Operation::COT:
-            return new Operation::Cot();
-        case Operation::ASIN:
-            return new Operation::Asin();
-        case Operation::ACOS:
-            return new Operation::Acos();
-        case Operation::ATAN:
-            return new Operation::Atan();
-        case Operation::ATAN2:
-            return new Operation::Atan2();
-        case Operation::SINH:
-            return new Operation::Sinh();
-        case Operation::COSH:
-            return new Operation::Cosh();
-        case Operation::TANH:
-            return new Operation::Tanh();
-        case Operation::ERF:
-            return new Operation::Erf();
-        case Operation::ERFC:
-            return new Operation::Erfc();
-        case Operation::STEP:
-            return new Operation::Step();
-        case Operation::DELTA:
-            return new Operation::Delta();
-        case Operation::SQUARE:
-            return new Operation::Square();
-        case Operation::CUBE:
-            return new Operation::Cube();
-        case Operation::RECIPROCAL:
-            return new Operation::Reciprocal();
-        case Operation::MIN:
-            return new Operation::Min();
-        case Operation::MAX:
-            return new Operation::Max();
-        case Operation::ABS:
-            return new Operation::Abs();
-        case Operation::FLOOR:
-            return new Operation::Floor();
-        case Operation::CEIL:
-            return new Operation::Ceil();
-        case Operation::SELECT:
-            return new Operation::Select();
-        default:
-            throw Exception("unknown function");
-    }
-}

From c68f75492369d3c20226d11efa629f53e9dd0a06 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 13:59:56 -0500
Subject: [PATCH 46/79] remove access to non-existing option

---
 lib/lepton/Install.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/lepton/Install.py b/lib/lepton/Install.py
index 84ac16b0d0..34e01ad339 100755
--- a/lib/lepton/Install.py
+++ b/lib/lepton/Install.py
@@ -34,7 +34,7 @@ parser.add_argument("-m", "--machine",
 args = parser.parse_args()
 
 # print help message and exit, if neither build nor path options are given
-if not args.machine and not args.extramake:
+if not args.machine:
   parser.print_help()
   sys.exit(HELP)
 

From 4f4f7be9c845c7f27a8a8a088aac34f9c4d85015 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 14:04:42 -0500
Subject: [PATCH 47/79] must provide list of object for colvars lib

---
 lib/colvars/Makefile.common | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/colvars/Makefile.common b/lib/colvars/Makefile.common
index f6d6ba84bb..3db2dae495 100644
--- a/lib/colvars/Makefile.common
+++ b/lib/colvars/Makefile.common
@@ -63,17 +63,17 @@ COLVARS_SRCS = \
 
 # Allow to selectively turn off Lepton
 ifeq ($(COLVARS_LEPTON),no)
-LEPTON_INCFLAGS = 
-COLVARS_OBJS = $(COLVARS_SRCS:.cpp=.o)
+LEPTON_INCFLAGS =
 else
 LEPTON_INCFLAGS = -I../lepton/include -DLEPTON
 endif
 
+COLVARS_OBJS = $(COLVARS_SRCS:.cpp=.o)
 
 %.o: %.cpp
 	$(CXX) $(CXXFLAGS) $(COLVARS_INCFLAGS) $(LEPTON_INCFLAGS) -c -o $@ $<
 
-$(COLVARS_LIB):	Makefile.deps $(COLVARS_OBJS)
+$(COLVARS_LIB): Makefile.deps $(COLVARS_OBJS)
 	$(AR) $(ARFLAGS) $(COLVARS_LIB) $(COLVARS_OBJS)
 
 

From 854089ef8dd4f601dadf5c711fe8fbf5435afd09 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 14:05:16 -0500
Subject: [PATCH 48/79] trigger building Lepton lib when requesting colvars

---
 src/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Makefile b/src/Makefile
index 494c64699e..13eeac58e7 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -639,6 +639,8 @@ lib-%:
 	  echo "Install script for lib $(@:lib-%=%) does not exist"; \
 	fi; touch main.cpp
 
+lib-colvars: lib-lepton
+
 # status = list src files that differ from package files
 # installed = list of installed packages
 # update = replace src files with newer package files

From a79a058bce5dd1e92c3ee98863010e9ff1bdf0c8 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 14:37:56 -0500
Subject: [PATCH 49/79] fix up a few more details for conventional build

---
 src/COLVARS/Install.sh | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/COLVARS/Install.sh b/src/COLVARS/Install.sh
index 6b9b9febe0..d5c811e582 100755
--- a/src/COLVARS/Install.sh
+++ b/src/COLVARS/Install.sh
@@ -38,9 +38,12 @@ if (test $1 = 1) then
 
   if (test -e ../Makefile.package) then
     sed -i -e 's/[^ \t]*colvars[^ \t]* //g' ../Makefile.package
-    sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/colvars -I..\/..\/lib\/lepton\/include |' ../Makefile.package
+    if (test ! -e ../pair_lepton.cpp) then
+      sed -i -e 's/[^ \t]*lepton[^ \t]* //g' ../Makefile.package
+    fi
+    sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/colvars -I..\/..\/lib\/lepton\/include -I..\/..\/lib\/lepton |' ../Makefile.package
     sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/colvars$(LIBOBJDIR) -L..\/..\/lib\/lepton$(LIBOBJDIR) |' ../Makefile.package
-    sed -i -e 's|^PKG_LIB =[ \t]*|&-lcolvars -llepton|' ../Makefile.package
+    sed -i -e 's|^PKG_LIB =[ \t]*|&-lcolvars -llepton |' ../Makefile.package
     sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(colvars_SYSINC) $(lepton_SYSINC) |' ../Makefile.package
     sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(colvars_SYSLIB) $(lepton_SYSLIB) |' ../Makefile.package
     sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(colvars_SYSPATH) $(lepton_SYSPATH) |' ../Makefile.package
@@ -48,6 +51,9 @@ if (test $1 = 1) then
 
   if (test -e ../Makefile.package.settings) then
     sed -i -e '/^[ \t]*include.*colvars.*$/d' ../Makefile.package.settings
+    if (test ! -e ../pair_lepton.cpp) then
+      sed -i -e '/^[ \t]*include.*lepton.*$/d' ../Makefile.package.settings
+    fi
     # multiline form needed for BSD sed on Macs
     sed -i -e '4 i \
 include ..\/..\/lib\/colvars\/Makefile.lammps
@@ -65,10 +71,14 @@ elif (test $1 = 0) then
 
   if (test -e ../Makefile.package) then
     sed -i -e 's/[^ \t]*colvars[^ \t]* //g' ../Makefile.package
+    if (test ! -e ../pair_lepton.cpp) then
+      sed -i -e 's/[^ \t]*lepton[^ \t]* //g' ../Makefile.package
+    fi
   fi
-
   if (test -e ../Makefile.package.settings) then
     sed -i -e '/^[ \t]*include.*colvars.*$/d' ../Makefile.package.settings
+    if (test ! -e ../pair_lepton.cpp) then
+      sed -i -e '/^[ \t]*include.*lepton.*$/d' ../Makefile.package.settings
+    fi
   fi
-
 fi

From 307829ad108fed4865ce4fea375fe1eb59013b5b Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 14:40:43 -0500
Subject: [PATCH 50/79] add unit test for dihedral style lepton

---
 .../force-styles/tests/dihedral-lepton.yaml   | 87 +++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 unittest/force-styles/tests/dihedral-lepton.yaml

diff --git a/unittest/force-styles/tests/dihedral-lepton.yaml b/unittest/force-styles/tests/dihedral-lepton.yaml
new file mode 100644
index 0000000000..2e5dee9603
--- /dev/null
+++ b/unittest/force-styles/tests/dihedral-lepton.yaml
@@ -0,0 +1,87 @@
+---
+lammps_version: 22 Dec 2022
+date_generated: Mon Dec 26 15:54:31 2022
+epsilon: 2.5e-13
+skip_tests:
+prerequisites: ! |
+  atom full
+  dihedral lepton
+pre_commands: ! ""
+post_commands: ! ""
+input_file: in.fourmol
+dihedral_style: lepton
+dihedral_coeff: ! |
+  1  "k*(1 + d*cos(n*phi)); k=75.0; d=1; n=2"
+  2  "k*(1 + d*cos(n*phi)); k=45.0; d=-1; n=4"
+  3  "k*(1 + d*cos(n*phi)); k=56.0; d=-1; n=2"
+  4  "k*(1 + d*cos(n*phi)); k=23.0; d=1; n=1"
+  5  "k*(1 + d*cos(n*phi)); k=19.0; d=-1; n=3"
+extract: ! ""
+natoms: 29
+init_energy: 789.1739585864801
+init_stress: ! |-
+  -6.2042484436524084e+01  1.2714037725306235e+02 -6.5097892816538319e+01  2.6648135399224245e+01  1.3495574921305200e+02  1.6236422290928138e+02
+init_forces: ! |2
+    1 -2.1511698742846065e+01  4.0249060564856002e+01 -9.0013321196300723e+01
+    2 -8.1931697051662269e+00  4.2308632119001643e+00 -4.0030670619000830e+00
+    3  9.1213724359021342e+01 -1.3766351447039602e+02  8.1969246558441185e+01
+    4 -4.8202572898596188e+01 -8.0465316960732949e+00  6.4757081520864730e+01
+    5 -6.2252471689207333e+01  2.2804485244022288e+01 -5.3285277341381336e+00
+    6  9.1271091191894925e+01  1.3743691097166172e+02 -3.9344000137592744e+01
+    7 -4.7435622518386914e+01 -5.1206081255886943e+01  8.4101355581705377e+00
+    8  2.2568717344776428e+02  1.6221073825524221e+02  5.7667169753528491e+01
+    9 -2.0794865226207477e+00  5.0314964909956039e+00 -7.5468528100484278e-01
+   10 -4.0476567806811579e+02 -4.7270660984257188e+02 -9.9999223894595502e+01
+   11  3.9909170606249532e+01  2.0810704935563015e+02 -1.3665198019985252e+02
+   12  6.2493704719337998e+01  7.0253447917427536e+01  1.9569964347346638e+02
+   13  2.9234925409867785e+01  6.7200938735330837e+01  1.4104379799580227e+02
+   14  7.2099736490024071e+01 -1.0032854911322354e+02 -3.5674421421421016e+01
+   15 -1.0059762933494233e+02  3.4057372960589930e+01 -1.0291545492293885e+02
+   16 -9.2273705073611552e+01 -1.2566881299602967e+02 -6.3115663814665560e+01
+   17  1.7540250832933313e+02  1.4403773566652492e+02  2.8253270804136410e+01
+   18  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   19  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   20  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   21  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   22  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   23  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   24  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   25  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   26  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   27  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   28  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   29  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+run_energy: 786.1866358550079
+run_stress: ! |-
+  -6.1891690150881558e+01  1.2738938495389718e+02 -6.5497694803015776e+01  2.6197221636385898e+01  1.3475397071042019e+02  1.6145289649182797e+02
+run_forces: ! |2
+    1 -2.2302877929458504e+01  4.0672550312262082e+01 -9.0501596668366645e+01
+    2 -7.6795593796038872e+00  3.9696254383022449e+00 -3.7581780677357592e+00
+    3  9.2113038158654120e+01 -1.3743858583367702e+02  8.2424527694664405e+01
+    4 -4.8297128598674377e+01 -8.1171172512686063e+00  6.4789088490585158e+01
+    5 -6.2249945690217785e+01  2.2813353689490324e+01 -5.3758961093281243e+00
+    6  9.1082266628006437e+01  1.3760435354838000e+02 -3.9497610280357804e+01
+    7 -4.6896902011280702e+01 -5.0626904069869482e+01  8.3785410081477032e+00
+    8  2.2272760695742238e+02  1.5895499756012134e+02  5.7194518287049661e+01
+    9 -1.3424389406807151e+00  5.5961120716833399e+00 -1.0522843139660516e+00
+   10 -4.0569661830987496e+02 -4.7090645706702190e+02 -9.7628440388580344e+01
+   11  4.2260633810406674e+01  2.0874271156158213e+02 -1.3676519733514760e+02
+   12  6.2351939715965081e+01  6.8740733618467345e+01  1.9368291702263957e+02
+   13  2.9034913938879313e+01  6.7392732937882698e+01  1.4128237950589556e+02
+   14  7.1584708215786918e+01 -9.9391162196277406e+01 -3.5112483074387477e+01
+   15 -1.0011391208839510e+02  3.3797184010534494e+01 -1.0280672267359483e+02
+   16 -9.3370884293886760e+01 -1.2693997516553874e+02 -6.3467167983741845e+01
+   17  1.7679515981695175e+02  1.4513584683494716e+02  2.8213604886224367e+01
+   18  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   19  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   20  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   21  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   22  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   23  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   24  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   25  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   26  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   27  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   28  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   29  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+...

From 353f4cb3614eb610da1e6e6587042b1eaf21c388 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 14:53:53 -0500
Subject: [PATCH 51/79] must not remove settings for lepton library if colvars
 package is installed

---
 src/LEPTON/Install.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/LEPTON/Install.sh b/src/LEPTON/Install.sh
index 6696d0c776..81f50a99fb 100755
--- a/src/LEPTON/Install.sh
+++ b/src/LEPTON/Install.sh
@@ -58,11 +58,15 @@ include ..\/..\/lib\/lepton\/Makefile.lammps
 elif (test $1 = 0) then
 
   if (test -e ../Makefile.package) then
-    sed -i -e 's/[^ \t]*lepton[^ \t]* //g' ../Makefile.package
+    if (test ! -e ../fix_colvars.cpp) then
+      sed -i -e 's/[^ \t]*lepton[^ \t]* //g' ../Makefile.package
+    fi
   fi
 
   if (test -e ../Makefile.package.settings) then
-    sed -i -e '/^[ \t]*include.*lepton.*$/d' ../Makefile.package.settings
+    if (test ! -e ../fix_colvars.cpp) then
+      sed -i -e '/^[ \t]*include.*lepton.*$/d' ../Makefile.package.settings
+    fi
   fi
 
 fi

From 2a3d1a1ba56fbc5db1fb5becbf03b51ec4893698 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 15:16:58 -0500
Subject: [PATCH 52/79] import JIT settings to colvars library makefile

---
 lib/colvars/Makefile.common | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/lib/colvars/Makefile.common b/lib/colvars/Makefile.common
index 3db2dae495..2a47fc529d 100644
--- a/lib/colvars/Makefile.common
+++ b/lib/colvars/Makefile.common
@@ -63,9 +63,24 @@ COLVARS_SRCS = \
 
 # Allow to selectively turn off Lepton
 ifeq ($(COLVARS_LEPTON),no)
+
 LEPTON_INCFLAGS =
+
 else
-LEPTON_INCFLAGS = -I../lepton/include -DLEPTON
+
+# imported from ../lepton/Common.mk
+ENABLE_JIT=0
+ifeq ($(shell uname -m),x86_64)
+ENABLE_JIT=1
+endif
+ifeq ($(shell uname -m),amd64)
+ENABLE_JIT=1
+endif
+LEPTON_INCFLAGS = -I../lepton/include -DLEPTON -DLEPTON_BUILDING_STATIC_LIBRARY=1
+ifeq ($(ENABLE_JIT),1)
+LEPTON_INCLFLAGS += -I../lepton -DLEPTON_USE_JIT=1 -DASMJIT_BUILD_X86=1 -DASMJIT_STATIC=1 -DASMJIT_BUILD_RELEASE=1
+endif
+
 endif
 
 COLVARS_OBJS = $(COLVARS_SRCS:.cpp=.o)

From efc2e96a9e4fcccc1ce7a67862bdb31509aa7c9a Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 17:42:59 -0500
Subject: [PATCH 53/79] explicitly share Lepton settings between lepton and
 colvars folders

---
 lib/colvars/Makefile.common | 15 +++------------
 lib/lepton/Common.mk        | 18 ++++++------------
 lib/lepton/Settings.mk      | 17 +++++++++++++++++
 3 files changed, 26 insertions(+), 24 deletions(-)
 create mode 100644 lib/lepton/Settings.mk

diff --git a/lib/colvars/Makefile.common b/lib/colvars/Makefile.common
index 2a47fc529d..356a7f4a91 100644
--- a/lib/colvars/Makefile.common
+++ b/lib/colvars/Makefile.common
@@ -68,18 +68,9 @@ LEPTON_INCFLAGS =
 
 else
 
-# imported from ../lepton/Common.mk
-ENABLE_JIT=0
-ifeq ($(shell uname -m),x86_64)
-ENABLE_JIT=1
-endif
-ifeq ($(shell uname -m),amd64)
-ENABLE_JIT=1
-endif
-LEPTON_INCFLAGS = -I../lepton/include -DLEPTON -DLEPTON_BUILDING_STATIC_LIBRARY=1
-ifeq ($(ENABLE_JIT),1)
-LEPTON_INCLFLAGS += -I../lepton -DLEPTON_USE_JIT=1 -DASMJIT_BUILD_X86=1 -DASMJIT_STATIC=1 -DASMJIT_BUILD_RELEASE=1
-endif
+LEPTON_DIR = ../lepton
+include $(LEPTON_DIR)/Settings.mk
+LEPTON_INCFLAGS = $(LEPTON_INC) $(LEPTON_DEF)
 
 endif
 
diff --git a/lib/lepton/Common.mk b/lib/lepton/Common.mk
index 4dc2b95d94..3eead392a4 100644
--- a/lib/lepton/Common.mk
+++ b/lib/lepton/Common.mk
@@ -77,26 +77,20 @@ JITOBJ=$(JITX86:asmjit/x86/%.cpp=build/x86.%.o) \
        $(JITARM:asmjit/arm/%.cpp=build/arm.%.o) \
        $(JIXCORE:asmjit/core/%.cpp=build/core.%.o)
 
-ENABLE_JIT=0
-ifeq ($(shell uname -m),x86_64)
-ENABLE_JIT=1
-endif
-ifeq ($(shell uname -m),amd64)
-ENABLE_JIT=1
-endif
+LEPTON_DIR=.
+
+include $(LEPTON_DIR)/Settings.mk
 
 EXTRAMAKE=Makefile.lammps.empty
-INC=-I include
-DEF=-DLEPTON_BUILDING_STATIC_LIBRARY=1
-
 LIB=liblepton.a
 
 ifeq ($(ENABLE_JIT),1)
 OBJ += $(JITOBJ)
-INC += -I .
-DEF += -DLEPTON_USE_JIT=1 -DASMJIT_BUILD_X86=1 -DASMJIT_STATIC=1 -DASMJIT_BUILD_RELEASE=1
 endif
 
+INC += $(LEPTON_INC)
+CXXFLAGS += $(LEPTON_DEF)
+
 all: $(LIB) Makefile.lammps
 
 build:
diff --git a/lib/lepton/Settings.mk b/lib/lepton/Settings.mk
new file mode 100644
index 0000000000..d7fd0ac22e
--- /dev/null
+++ b/lib/lepton/Settings.mk
@@ -0,0 +1,17 @@
+# makefile variables and settings related to configuring JIT with Lepton.
+
+ENABLE_JIT=0
+ifeq ($(shell uname -m),x86_64)
+ENABLE_JIT=1
+endif
+ifeq ($(shell uname -m),amd64)
+ENABLE_JIT=1
+endif
+
+LEPTON_INC = -I$(LEPTON_DIR)/include
+LEPTON_DEF = -DLEPTON_BUILDING_STATIC_LIBRARY=1
+
+ifeq ($(ENABLE_JIT),1)
+LEPTON_INC += -I$(LEPTON_DIR)
+LEPTON_DEF += -DLEPTON_USE_JIT=1 -DASMJIT_BUILD_X86=1 -DASMJIT_STATIC=1 -DASMJIT_BUILD_RELEASE=1
+endif

From c63f1647fb2e0a2f316f031d8cf8734d5f3aa344 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 17:43:31 -0500
Subject: [PATCH 54/79] work around pointer aliasing issue with JIT enabled

---
 lib/lepton/src/CompiledExpression.cpp       | 12 ++++++++++--
 lib/lepton/src/CompiledVectorExpression.cpp | 10 ++++++++--
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/lib/lepton/src/CompiledExpression.cpp b/lib/lepton/src/CompiledExpression.cpp
index 61dd942c62..b3a1fadf41 100644
--- a/lib/lepton/src/CompiledExpression.cpp
+++ b/lib/lepton/src/CompiledExpression.cpp
@@ -38,6 +38,7 @@ using namespace Lepton;
 using namespace std;
 #ifdef LEPTON_USE_JIT
     using namespace asmjit;
+    #include <cinttypes>
 #endif
 
 CompiledExpression::CompiledExpression() : jitCode(NULL) {
@@ -513,6 +514,12 @@ void CompiledExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& dest, ar
     invoke->setRet(0, dest);
 }
 #else
+
+union int64_vs_double {
+    int64_t i;
+    double  d;
+};
+
 void CompiledExpression::generateJitCode() {
     const CpuInfo& cpu = CpuInfo::host();
     if (!cpu.hasFeature(CpuFeatures::X86::kAVX))
@@ -561,8 +568,9 @@ void CompiledExpression::generateJitCode() {
         else if (op.getId() == Operation::DELTA)
             value = 1.0;
         else if (op.getId() == Operation::ABS) {
-            long long mask = 0x7FFFFFFFFFFFFFFF;
-            value = *reinterpret_cast<double*>(&mask);
+            int64_vs_double mask;
+            mask.i = 0x7FFFFFFFFFFFFFFF;
+            value = mask.d;
         }
         else if (op.getId() == Operation::POWER_CONSTANT) {
             if (stepGroup[step] == -1)
diff --git a/lib/lepton/src/CompiledVectorExpression.cpp b/lib/lepton/src/CompiledVectorExpression.cpp
index c8262b3873..85b492bb3c 100644
--- a/lib/lepton/src/CompiledVectorExpression.cpp
+++ b/lib/lepton/src/CompiledVectorExpression.cpp
@@ -573,6 +573,11 @@ void CompiledVectorExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& de
 }
 #else
 
+union int_vs_float {
+    int   i;
+    float f;
+};
+
 void CompiledVectorExpression::generateJitCode() {
     const CpuInfo& cpu = CpuInfo::host();
     if (!cpu.hasFeature(CpuFeatures::X86::kAVX))
@@ -624,8 +629,9 @@ void CompiledVectorExpression::generateJitCode() {
         else if (op.getId() == Operation::DELTA)
             value = 1.0;
         else if (op.getId() == Operation::ABS) {
-            int mask = 0x7FFFFFFF;
-            value = *reinterpret_cast<float*>(&mask);
+            int_vs_float mask;
+            mask.i = 0x7FFFFFFF;
+            value = mask.f;
         }
         else if (op.getId() == Operation::POWER_CONSTANT) {
             if (stepGroup[step] == -1)

From 246b25e2edbbf6173ab9066866f1d7eec1c6ac17 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 17:43:39 -0500
Subject: [PATCH 55/79] silence compiler warning

---
 lib/lepton/src/ParsedExpression.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/lepton/src/ParsedExpression.cpp b/lib/lepton/src/ParsedExpression.cpp
index 6a9df8097a..9ff0b2aaf7 100644
--- a/lib/lepton/src/ParsedExpression.cpp
+++ b/lib/lepton/src/ParsedExpression.cpp
@@ -157,6 +157,7 @@ ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const Expressio
     bool first_const = children.size() > 0 && isConstant(children[0]); // is first child constant?
     bool second_const = children.size() > 1 && isConstant(children[1]); // is second child constant?
     double first, second; // if yes, value of first and second child
+    first = second = 0.0;
     if (first_const)
         first = getConstantValue(children[0]);
     if (second_const)

From 9f15ad4795caddeecc95073120d5a1056ca2bb8a Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 18:26:29 -0500
Subject: [PATCH 56/79] simplify by using a custom constructor

---
 lib/lepton/src/CompiledExpression.cpp       | 7 +++----
 lib/lepton/src/CompiledVectorExpression.cpp | 7 +++----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/lib/lepton/src/CompiledExpression.cpp b/lib/lepton/src/CompiledExpression.cpp
index b3a1fadf41..622c1c2ecf 100644
--- a/lib/lepton/src/CompiledExpression.cpp
+++ b/lib/lepton/src/CompiledExpression.cpp
@@ -515,7 +515,8 @@ void CompiledExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& dest, ar
 }
 #else
 
-union int64_vs_double {
+union int64_to_double {
+    int64_to_double(int64_t _i) { i = _i; }
     int64_t i;
     double  d;
 };
@@ -568,9 +569,7 @@ void CompiledExpression::generateJitCode() {
         else if (op.getId() == Operation::DELTA)
             value = 1.0;
         else if (op.getId() == Operation::ABS) {
-            int64_vs_double mask;
-            mask.i = 0x7FFFFFFFFFFFFFFF;
-            value = mask.d;
+            value = int64_to_double(0x7FFFFFFFFFFFFFFF).d;
         }
         else if (op.getId() == Operation::POWER_CONSTANT) {
             if (stepGroup[step] == -1)
diff --git a/lib/lepton/src/CompiledVectorExpression.cpp b/lib/lepton/src/CompiledVectorExpression.cpp
index 85b492bb3c..2e560d2051 100644
--- a/lib/lepton/src/CompiledVectorExpression.cpp
+++ b/lib/lepton/src/CompiledVectorExpression.cpp
@@ -573,7 +573,8 @@ void CompiledVectorExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& de
 }
 #else
 
-union int_vs_float {
+union int_to_float {
+    int_to_float(int _i) { i = _i; }
     int   i;
     float f;
 };
@@ -629,9 +630,7 @@ void CompiledVectorExpression::generateJitCode() {
         else if (op.getId() == Operation::DELTA)
             value = 1.0;
         else if (op.getId() == Operation::ABS) {
-            int_vs_float mask;
-            mask.i = 0x7FFFFFFF;
-            value = mask.f;
+            value = int_to_float(0x7FFFFFFF).f;
         }
         else if (op.getId() == Operation::POWER_CONSTANT) {
             if (stepGroup[step] == -1)

From afae6222b0d7297ba87e8db93d02952e457230b8 Mon Sep 17 00:00:00 2001
From: Giacomo Fiorin <giacomo.fiorin@gmail.com>
Date: Tue, 27 Dec 2022 19:57:43 -0500
Subject: [PATCH 57/79] Update build instructions for COLVARS package

---
 cmake/Modules/Packages/COLVARS.cmake |  7 ++--
 doc/src/Build_extras.rst             | 51 +++++++++++++---------------
 doc/src/Packages_details.rst         |  6 ++--
 3 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/cmake/Modules/Packages/COLVARS.cmake b/cmake/Modules/Packages/COLVARS.cmake
index 2a29553a67..3ca48b81fc 100644
--- a/cmake/Modules/Packages/COLVARS.cmake
+++ b/cmake/Modules/Packages/COLVARS.cmake
@@ -2,10 +2,9 @@ set(COLVARS_SOURCE_DIR ${LAMMPS_LIB_SOURCE_DIR}/colvars)
 
 file(GLOB COLVARS_SOURCES ${COLVARS_SOURCE_DIR}/[^.]*.cpp)
 
-option(COLVARS_DEBUG "Debugging messages for Colvars (quite verbose)" OFF)
+option(COLVARS_DEBUG "Enable debugging messages for Colvars (quite verbose)" OFF)
 
-# Build Lepton by default
-option(COLVARS_LEPTON "Build and link the Lepton library" ON)
+option(COLVARS_LEPTON "Use the Lepton library for custom expressions" ON)
 
 if(COLVARS_LEPTON)
   if(NOT LEPTON_SOURCE_DIR)
@@ -26,7 +25,7 @@ target_include_directories(colvars PRIVATE ${LAMMPS_SOURCE_DIR})
 target_link_libraries(lammps PRIVATE colvars)
 
 if(COLVARS_DEBUG)
-  # Need to export the macro publicly to also affect the proxy
+  # Need to export the macro publicly to be valid in interface code
   target_compile_definitions(colvars PUBLIC -DCOLVARS_DEBUG)
 endif()
 
diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst
index ce67dd40de..7a67b4bc85 100644
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@@ -1321,9 +1321,8 @@ AWPMD package
 COLVARS package
 ---------------------------------------
 
-This package includes the `Colvars library
-<https://colvars.github.io/>`_ into the LAMMPS distribution, which can
-be built for the most part with all major versions of the C++ language.
+This package enables the use of the `Colvars <https://colvars.github.io/>`_
+module included in the LAMMPS source distribution.
 
 
 .. tabs::
@@ -1336,42 +1335,40 @@ be built for the most part with all major versions of the C++ language.
 
    .. tab:: Traditional make
 
-      Before building LAMMPS, one must build the Colvars library in lib/colvars.
+      As with other libraries distributed with LAMMPS, the Colvars library
+      needs to be built before building the LAMMPS program with the COLVARS
+      package enabled.
 
-      This can be done manually in the same folder by using or adapting
-      one of the provided Makefiles: for example, ``Makefile.g++`` for
-      the GNU C++ compiler.  C++11 compatibility may need to be enabled
-      for some older compilers (as is done in the example makefile).
-
-      In general, it is safer to use build setting consistent with the
-      rest of LAMMPS.  This is best carried out from the LAMMPS src
-      directory using a command like these, which simply invokes the
-      ``lib/colvars/Install.py`` script with the specified args:
+      From the LAMMPS ``src`` directory, this is most easily and safely done
+      via one of the following commands, which implicitly rely on the
+      ``lib/colvars/Install.py`` script with optional arguments:
 
       .. code-block:: bash
 
-         $ make lib-colvars                      # print help message
-         $ make lib-colvars args="-m serial"     # build with GNU g++ compiler (settings as with "make serial")
-         $ make lib-colvars args="-m mpi"        # build with default MPI compiler (settings as with "make mpi")
-         $ make lib-colvars args="-m g++-debug"  # build with GNU g++ compiler and colvars debugging enabled
+         $ make lib-colvars # print help message
+         $ make lib-colvars args="-m mpi" # build with default MPI compiler (settings as with "make mpi")
+         $ make lib-colvars args="-m serial" # build with GNU g++ compiler (settings as with "make serial") 
 
       The "machine" argument of the "-m" flag is used to find a
-      Makefile.machine to use as build recipe.  If it does not already
-      exist in ``lib/colvars``, it will be auto-generated by using
-      compiler flags consistent with those parsed from the core LAMMPS
-      makefiles.
+      ``Makefile.machine`` file to use as build recipe.  If such recipe does
+      not already exist in ``lib/colvars``, suitable settings will be
+      auto-generated consistent with those used in the core LAMMPS makefiles.
+
+      Please note that Colvars uses the Lepton library, which is enabled in
+      the LEPTON package; if you use anything other than the ``make
+      lib-colvars`` command, please make sure to build Lepton beforehand.
 
       Optional flags may be specified as environment variables:
 
       .. code-block:: bash
 
-         $ COLVARS_DEBUG=yes make lib-colvars args="-m machine"  # Build with debug code (much slower)
-         $ COLVARS_LEPTON=no make lib-colvars args="-m machine"  # Build without Lepton (included otherwise)
+         $ COLVARS_DEBUG=yes make lib-colvars args="-m g++-debug" # Build with debug code (much slower)
+         $ COLVARS_LEPTON=no make lib-colvars args="-m mpi" # Build without Lepton (included otherwise)
 
-      The build should produce two files: the library ``lib/colvars/libcolvars.a``
-      (which also includes Lepton objects if enabled) and the specification file
-      ``lib/colvars/Makefile.lammps``.  The latter is auto-generated, and normally does
-      not need to be edited.
+      The build should produce two files: the library
+      ``lib/colvars/libcolvars.a`` and the specification file
+      ``lib/colvars/Makefile.lammps``.  The latter is auto-generated, and
+      normally does not need to be edited.
 
 ----------
 
diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst
index dc99e478fa..ef6ee16c4d 100644
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@@ -493,7 +493,7 @@ COLVARS package
 
 **Contents:**
 
-COLVARS stands for collective variables, which can be used to
+Colvars stands for collective variables, which can be used to
 implement various enhanced sampling methods, including Adaptive
 Biasing Force, Metadynamics, Steered MD, Umbrella Sampling and
 Restraints.  A :doc:`fix colvars <fix_colvars>` command is implemented
@@ -501,8 +501,8 @@ which wraps a COLVARS library, which implements these methods.
 simulations.
 
 **Authors:** The COLVARS library is written and maintained by
-Giacomo Fiorin (ICMS, Temple University, Philadelphia, PA, USA)
-and Jerome Henin (LISM, CNRS, Marseille, France), originally for
+Giacomo Fiorin (NIH, Bethesda, MD, USA)
+and Jerome Henin (CNRS, Paris, France), originally for
 the NAMD MD code, but with portability in mind.  Axel Kohlmeyer
 (Temple U) provided the interface to LAMMPS.
 

From 24b16cf130d1aff80deb2b56bf598e86cb97bb61 Mon Sep 17 00:00:00 2001
From: Giacomo Fiorin <giacomo.fiorin@gmail.com>
Date: Tue, 27 Dec 2022 20:25:53 -0500
Subject: [PATCH 58/79] More updates to fix colvars doc

---
 doc/src/fix_colvars.rst | 100 ++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 51 deletions(-)

diff --git a/doc/src/fix_colvars.rst b/doc/src/fix_colvars.rst
index ec7b33ce51..695343107f 100644
--- a/doc/src/fix_colvars.rst
+++ b/doc/src/fix_colvars.rst
@@ -35,68 +35,66 @@ Examples
 Description
 """""""""""
 
-This fix interfaces LAMMPS to the collective variables (Colvars)
-library, which allows to calculate potentials of mean force (PMFs) for
-any set of colvars, using different sampling methods: currently
-implemented are the Adaptive Biasing Force (ABF) method, metadynamics,
-Steered Molecular Dynamics (SMD) and Umbrella Sampling (US) via a
-flexible harmonic restraint bias.
+This fix interfaces LAMMPS to the collective variables (Colvars) library,
+which allows to calculate potentials of mean force (PMFs) for any set of
+colvars, using sampling methods, including but not limited to Adaptive
+Biasing Force (ABF), metadynamics (MtD), Steered Molecular Dynamics (SMD)
+and Umbrella Sampling (US) via a flexible harmonic restraint bias.
 
-This documentation describes only the fix colvars command itself and
-LAMMPS specific parts of the code.  The full documentation of the
-colvars library is available as `this supplementary PDF document <PDF/colvars-refman-lammps.pdf>`_
+This documentation describes only the ``fix colvars`` command itself in a
+LAMMPS script.  The Colvars library is documented via the included
+`PDF manual <PDF/colvars-refman-lammps.pdf>`_ or at the webpage `https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html <https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html>`_.
 
-The Colvars library is developed at `https://github.com/colvars/colvars <https://github.com/colvars/colvars>`_
-A detailed discussion of its implementation is in :ref:`(Fiorin) <Fiorin>`.
+The Colvars library is developed at `https://github.com/Colvars/colvars
+<https://github.com/Colvars/colvars>`_  A detailed discussion of its
+implementation is in :ref:`(Fiorin) <Fiorin>`; additional references are
+printed at runtime based on specific features being used.
 
 There are some example scripts for using this package with LAMMPS in the
-examples/PACKAGES/colvars directory.
+``examples/PACKAGES/colvars`` directory.
 
 ----------
 
-The only mandatory argument to the fix is the filename to the colvars
-input file that contains the input that is independent from the MD
-program in which the colvars library has been integrated.
+The only required argument to ``fix colvars`` is the filename to the Colvars
+configuration file that contains the definition of the variables and any
+biasing methods applied to them.
+from the MD program in which the colvars library has been integrated.
 
-The *group-ID* entry is ignored. The collective variable module will
-always apply to the entire system and there can only be one instance
-of the colvars fix at a time. The colvars fix will only communicate
-the minimum information necessary and the colvars library supports
-multiple, completely independent collective variables, so there is
-no restriction to functionality by limiting the number of colvars fixes.
+The *group-ID* entry is ignored.  ``fix colvars`` will always apply to the
+entire system, but specific atoms will be selected based on selection
+keywords in the Colvars configuration file or files.  There is no need to
+define multiple ``fix colvars`` instances and it is not allowed.
 
-The *input* keyword allows to specify a state file that would contain
-the restart information required in order to continue a calculation from
-a prerecorded state. Fix colvars records it state in :doc:`binary restart <restart>`
-files, so when using the :doc:`read_restart <read_restart>` command,
-this is usually not needed.
+The *output* keyword allows to specify the prefix of output files generated
+by Colvars, for example ``output.colvars.traj`` or ``output.pmf``.
 
-The *output* keyword allows to specify the output prefix. All output
-files generated will use this prefix followed by the ".colvars." and
-a word like "state" or "traj".
+The *input* keyword allows to specify an optional state file that contains
+the restart information needed to continue a previous simulation state.
+Note, however, that ``fix colvars`` records its state in :doc:`binary restart
+<restart>` files, so when using the :doc:`read_restart <read_restart>`
+command, this is usually not needed.
 
-The *seed* keyword contains the seed for the random number generator
-that will be used in the colvars module.
+The *seed* keyword contains the seed for the random number generator used by
+Colvars.
 
 The *unwrap* keyword controls whether wrapped or unwrapped coordinates
-are passed to the colvars library for calculation of the collective
+are passed to the Colvars library for calculation of the collective
 variables and the resulting forces. The default is *yes*, i.e. to use
 the image flags to reconstruct the absolute atom positions.
 Setting this to *no* will use the current local coordinates that are
-wrapped back into the simulation cell at each re-neighboring instead.
+wrapped back into the simulation cell at each re-neighboring instead.  For information about when and how this affects results, please see `https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html#sec:colvar_atom_groups_wrapping <https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html#sec:colvar_atom_groups_wrapping>`_.
 
-The *tstat* keyword can be either NULL or the label of a thermostatting
-fix that thermostats all atoms in the fix colvars group. This will be
-used to provide the colvars module with the current thermostat target
-temperature.
+The *tstat* keyword can be either NULL or the label of a thermostatting fix
+that thermostats all atoms in the fix colvars group.  This will be used to
+let Colvars know what is the current thermostat target temperature.
 
 Restart, fix_modify, output, run start/stop, minimize info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 
-This fix writes the current status of the colvars module into
-:doc:`binary restart files <restart>`. This is in addition to the text
-mode status file that is written by the colvars module itself and the
-kind of information in both files is identical.
+This fix writes the current status of the colvars module into :doc:`binary
+restart files <restart>`. This is in addition to the text mode
+``.colvars.state`` written by Colvars itself and the information in both
+files is identical.
 
 The :doc:`fix_modify <fix_modify>` *energy* option is supported by
 this fix to add the energy change from the biasing force added by
@@ -104,15 +102,14 @@ Colvars to the global potential energy of the system as part of
 :doc:`thermodynamic output <thermo_style>`.  The default setting for
 this fix is :doc:`fix_modify energy no <fix_modify>`.
 
-The *fix_modify configfile <config file>* option allows to add settings
-from an additional config file to the colvars module. This option can
-only be used, after the system has been initialized with a :doc:`run <run>`
-command.
+The *fix_modify configfile <config file>* option loads Colvars configuration
+from an additional file. This option can only be used, after the system has
+been initialized with a :doc:`run <run>` command.
 
 The *fix_modify config <quoted string>* option allows to add settings
 from inline strings. Those have to fit on a single line when enclosed
 in a pair of double quotes ("), or can span multiple lines when bracketed
-by a pair of triple double quotes (""", like python embedded documentation).
+by a pair of triple double quotes (""", like Python embedded documentation).
 
 This fix computes a global scalar which can be accessed by various
 :doc:`output commands <Howto_output>`.  The scalar is the Colvars
@@ -122,13 +119,14 @@ energy mentioned above.  The scalar value calculated by this fix is
 Restrictions
 """"""""""""
 
-This fix is part of the COLVARS package.  It is only enabled if
-LAMMPS was built with that package.  See the :doc:`Build package
+``fix colvars`` is provided by the COLVARS package and is only available if
+LAMMPS was built with that package.  Some of the features also require code
+available from the LEPTON package.  See the :doc:`Build package
 <Build_package>` page for more info.
 
-There can only be one colvars fix active at a time. Since the interface
-communicates only the minimum amount of information and colvars module
-itself can handle an arbitrary number of collective variables, this is
+There can only be one Colvars instance defined at a time.  Since the
+interface communicates only the minimum amount of information and the Colvars
+module itself can handle an arbitrary number of collective variables, this is
 not a limitation of functionality.
 
 Related commands

From d10e7195dc9b44a1828683ed7c6ac406cb57995f Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 21:13:34 -0500
Subject: [PATCH 59/79] add missing entries for dihedral style lepton

---
 doc/src/Packages_details.rst | 1 +
 doc/src/dihedral_style.rst   | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst
index ef6ee16c4d..d1297cc2d8 100644
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@@ -1421,6 +1421,7 @@ the :doc:`Build extras <Build_extras>` page.
 * :doc:`pair_style lepton <pair_lepton>`
 * :doc:`bond_style lepton <bond_lepton>`
 * :doc:`angle_style lepton <angle_lepton>`
+* :doc:`dihedral_style lepton <dihedral_lepton>`
 
 ----------
 
diff --git a/doc/src/dihedral_style.rst b/doc/src/dihedral_style.rst
index 4e56d1f787..45dd66e750 100644
--- a/doc/src/dihedral_style.rst
+++ b/doc/src/dihedral_style.rst
@@ -10,7 +10,7 @@ Syntax
 
    dihedral_style style
 
-* style = *none* or *zero* or *hybrid* or *charmm* or *charmmfsw* or *class2* or *osine/shift/exp* or *fourier* or *harmonic* or *helix* or *multi/harmonic* or *nharmonic* or *opls* or *spherical* or *table* or *table/cut*
+* style = *none* or *zero* or *hybrid* or *charmm* or *charmmfsw* or *class2* or *cosine/shift/exp* or *fourier* or *harmonic* or *helix* or *lepton* or *multi/harmonic* or *nharmonic* or *opls* or *spherical* or *table* or *table/cut*
 
 Examples
 """"""""
@@ -108,6 +108,7 @@ exist.
 * :doc:`fourier <dihedral_fourier>` - dihedral with multiple cosine terms
 * :doc:`harmonic <dihedral_harmonic>` - harmonic dihedral
 * :doc:`helix <dihedral_helix>` - helix dihedral
+* :doc:`lepton <dihedral_lepton>` - dihedral potential from evaluating a string
 * :doc:`multi/harmonic <dihedral_multi_harmonic>` - dihedral with 5 harmonic terms
 * :doc:`nharmonic <dihedral_nharmonic>` - same as multi-harmonic with N terms
 * :doc:`opls <dihedral_opls>` - OPLS dihedral

From 52fcd08e1c9493d1715c6fa672b51c1c115dd370 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 21:15:33 -0500
Subject: [PATCH 60/79] reformat colvars related docs. add false positive for
 spellchecker

---
 doc/src/Build_extras.rst                    | 16 ++++++++++------
 doc/utils/sphinx-config/false_positives.txt |  2 +-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst
index 7a67b4bc85..3e0939c745 100644
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@@ -1347,16 +1347,20 @@ module included in the LAMMPS source distribution.
 
          $ make lib-colvars # print help message
          $ make lib-colvars args="-m mpi" # build with default MPI compiler (settings as with "make mpi")
-         $ make lib-colvars args="-m serial" # build with GNU g++ compiler (settings as with "make serial") 
+         $ make lib-colvars args="-m serial" # build with GNU g++ compiler (settings as with "make serial")
 
       The "machine" argument of the "-m" flag is used to find a
       ``Makefile.machine`` file to use as build recipe.  If such recipe does
       not already exist in ``lib/colvars``, suitable settings will be
       auto-generated consistent with those used in the core LAMMPS makefiles.
 
-      Please note that Colvars uses the Lepton library, which is enabled in
-      the LEPTON package; if you use anything other than the ``make
-      lib-colvars`` command, please make sure to build Lepton beforehand.
+
+      .. versionchanged:: TBD
+
+      Please note that Colvars uses the Lepton library, which is now
+      included with the LEPTON package; if you use anything other than
+      the ``make lib-colvars`` command, please make sure to :ref:`build
+      Lepton beforehand <lepton>`.
 
       Optional flags may be specified as environment variables:
 
@@ -1367,8 +1371,8 @@ module included in the LAMMPS source distribution.
 
       The build should produce two files: the library
       ``lib/colvars/libcolvars.a`` and the specification file
-      ``lib/colvars/Makefile.lammps``.  The latter is auto-generated, and
-      normally does not need to be edited.
+      ``lib/colvars/Makefile.lammps``.  The latter is auto-generated,
+      and normally does not need to be edited.
 
 ----------
 
diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt
index c879bdb244..54c475ba45 100644
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@@ -2235,7 +2235,7 @@ msm
 msmflag
 msse
 msst
-Mtchell
+MtD
 Mth
 mtk
 Mtotal

From b28607234eb362d80d1c9307453fc1f37952327a Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 21:29:37 -0500
Subject: [PATCH 61/79] update rst file formatting. work around duplicate
 target issue.

---
 doc/src/Build_extras.rst     |   8 +--
 doc/src/Build_package.rst    |   2 +-
 doc/src/Packages_details.rst |  25 +++----
 doc/src/fix_colvars.rst      | 122 +++++++++++++++++++----------------
 4 files changed, 84 insertions(+), 73 deletions(-)

diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst
index 3e0939c745..605f2291a8 100644
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@@ -34,7 +34,7 @@ This is the list of packages that may require additional steps.
    * :ref:`ADIOS <adios>`
    * :ref:`ATC <atc>`
    * :ref:`AWPMD <awpmd>`
-   * :ref:`COLVARS <colvars>`
+   * :ref:`COLVARS <colvar>`
    * :ref:`COMPRESS <compress>`
    * :ref:`ELECTRODE <electrode>`
    * :ref:`GPU <gpu>`
@@ -1268,7 +1268,7 @@ The ATC package requires the MANYBODY package also be installed.
 .. _awpmd:
 
 AWPMD package
-------------------
+-------------
 
 .. tabs::
 
@@ -1316,10 +1316,10 @@ AWPMD package
 
 ----------
 
-.. _colvars:
+.. _colvar:
 
 COLVARS package
----------------------------------------
+---------------
 
 This package enables the use of the `Colvars <https://colvars.github.io/>`_
 module included in the LAMMPS source distribution.
diff --git a/doc/src/Build_package.rst b/doc/src/Build_package.rst
index 9eeda4d8d4..6e2e729471 100644
--- a/doc/src/Build_package.rst
+++ b/doc/src/Build_package.rst
@@ -36,7 +36,7 @@ packages:
    * :ref:`ADIOS <adios>`
    * :ref:`ATC <atc>`
    * :ref:`AWPMD <awpmd>`
-   * :ref:`COLVARS <colvars>`
+   * :ref:`COLVARS <colvar>`
    * :ref:`COMPRESS <compress>`
    * :ref:`GPU <gpu>`
    * :ref:`H5MD <h5md>`
diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst
index d1297cc2d8..6c7395ddab 100644
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@@ -493,22 +493,21 @@ COLVARS package
 
 **Contents:**
 
-Colvars stands for collective variables, which can be used to
-implement various enhanced sampling methods, including Adaptive
-Biasing Force, Metadynamics, Steered MD, Umbrella Sampling and
-Restraints.  A :doc:`fix colvars <fix_colvars>` command is implemented
-which wraps a COLVARS library, which implements these methods.
-simulations.
+Colvars stands for collective variables, which can be used to implement
+various enhanced sampling methods, including Adaptive Biasing Force,
+Metadynamics, Steered MD, Umbrella Sampling and Restraints.  A :doc:`fix
+colvars <fix_colvars>` command is implemented which wraps a COLVARS
+library, which implements these methods.  simulations.
 
-**Authors:** The COLVARS library is written and maintained by
-Giacomo Fiorin (NIH, Bethesda, MD, USA)
-and Jerome Henin (CNRS, Paris, France), originally for
-the NAMD MD code, but with portability in mind.  Axel Kohlmeyer
-(Temple U) provided the interface to LAMMPS.
+**Authors:** The COLVARS library is written and maintained by Giacomo
+Fiorin (NIH, Bethesda, MD, USA) and Jerome Henin (CNRS, Paris, France),
+originally for the NAMD MD code, but with portability in mind.  Axel
+Kohlmeyer (Temple U) provided the interface to LAMMPS.
 
 **Install:**
 
-This package has :ref:`specific installation instructions <colvars>` on the :doc:`Build extras <Build_extras>` page.
+This package has :ref:`specific installation instructions <colvar>` on
+the :doc:`Build extras <Build_extras>` page.
 
 **Supporting info:**
 
@@ -517,6 +516,8 @@ This package has :ref:`specific installation instructions <colvars>` on the :doc
 * src/COLVARS/README
 * lib/colvars/README
 * :doc:`fix colvars <fix_colvars>`
+* :doc:`group2ndx <group2ndx>`
+* :doc:`ndx2group <group2ndx>`
 * examples/PACKAGES/colvars
 
 ----------
diff --git a/doc/src/fix_colvars.rst b/doc/src/fix_colvars.rst
index 695343107f..77a90cc54f 100644
--- a/doc/src/fix_colvars.rst
+++ b/doc/src/fix_colvars.rst
@@ -35,18 +35,21 @@ Examples
 Description
 """""""""""
 
-This fix interfaces LAMMPS to the collective variables (Colvars) library,
-which allows to calculate potentials of mean force (PMFs) for any set of
-colvars, using sampling methods, including but not limited to Adaptive
-Biasing Force (ABF), metadynamics (MtD), Steered Molecular Dynamics (SMD)
-and Umbrella Sampling (US) via a flexible harmonic restraint bias.
+This fix interfaces LAMMPS to the collective variables (Colvars)
+library, which allows to calculate potentials of mean force (PMFs) for
+any set of colvars, using sampling methods, including but not limited to
+Adaptive Biasing Force (ABF), metadynamics (MtD), Steered Molecular
+Dynamics (SMD) and Umbrella Sampling (US) via a flexible harmonic
+restraint bias.
 
-This documentation describes only the ``fix colvars`` command itself in a
-LAMMPS script.  The Colvars library is documented via the included
-`PDF manual <PDF/colvars-refman-lammps.pdf>`_ or at the webpage `https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html <https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html>`_.
+This documentation describes only the ``fix colvars`` command itself in
+a LAMMPS script.  The Colvars library is documented via the included
+`PDF manual <PDF/colvars-refman-lammps.pdf>`_ or at the webpage
+`https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html
+<https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html>`_.
 
 The Colvars library is developed at `https://github.com/Colvars/colvars
-<https://github.com/Colvars/colvars>`_  A detailed discussion of its
+<https://github.com/Colvars/colvars>`_ A detailed discussion of its
 implementation is in :ref:`(Fiorin) <Fiorin>`; additional references are
 printed at runtime based on specific features being used.
 
@@ -55,79 +58,86 @@ There are some example scripts for using this package with LAMMPS in the
 
 ----------
 
-The only required argument to ``fix colvars`` is the filename to the Colvars
-configuration file that contains the definition of the variables and any
-biasing methods applied to them.
-from the MD program in which the colvars library has been integrated.
+The only required argument to ``fix colvars`` is the filename to the
+Colvars configuration file that contains the definition of the variables
+and any biasing methods applied to them.  from the MD program in which
+the colvars library has been integrated.
 
-The *group-ID* entry is ignored.  ``fix colvars`` will always apply to the
-entire system, but specific atoms will be selected based on selection
-keywords in the Colvars configuration file or files.  There is no need to
-define multiple ``fix colvars`` instances and it is not allowed.
+The *group-ID* entry is ignored.  ``fix colvars`` will always apply to
+the entire system, but specific atoms will be selected based on
+selection keywords in the Colvars configuration file or files.  There is
+no need to define multiple ``fix colvars`` instances and it is not
+allowed.
 
-The *output* keyword allows to specify the prefix of output files generated
-by Colvars, for example ``output.colvars.traj`` or ``output.pmf``.
+The *output* keyword allows to specify the prefix of output files
+generated by Colvars, for example ``output.colvars.traj`` or
+``output.pmf``.
 
-The *input* keyword allows to specify an optional state file that contains
-the restart information needed to continue a previous simulation state.
-Note, however, that ``fix colvars`` records its state in :doc:`binary restart
-<restart>` files, so when using the :doc:`read_restart <read_restart>`
-command, this is usually not needed.
+The *input* keyword allows to specify an optional state file that
+contains the restart information needed to continue a previous
+simulation state.  Note, however, that ``fix colvars`` records its state
+in :doc:`binary restart <restart>` files, so when using the
+:doc:`read_restart <read_restart>` command, this is usually not needed.
 
-The *seed* keyword contains the seed for the random number generator used by
-Colvars.
+The *seed* keyword contains the seed for the random number generator
+used by Colvars.
 
 The *unwrap* keyword controls whether wrapped or unwrapped coordinates
 are passed to the Colvars library for calculation of the collective
 variables and the resulting forces. The default is *yes*, i.e. to use
-the image flags to reconstruct the absolute atom positions.
-Setting this to *no* will use the current local coordinates that are
-wrapped back into the simulation cell at each re-neighboring instead.  For information about when and how this affects results, please see `https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html#sec:colvar_atom_groups_wrapping <https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html#sec:colvar_atom_groups_wrapping>`_.
+the image flags to reconstruct the absolute atom positions.  Setting
+this to *no* will use the current local coordinates that are wrapped
+back into the simulation cell at each re-neighboring instead.  For
+information about when and how this affects results, please see
+`https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html#sec:colvar_atom_groups_wrapping
+<https://colvars.github.io/colvars-refman-lammps/colvars-refman-lammps.html#sec:colvar_atom_groups_wrapping>`_.
 
-The *tstat* keyword can be either NULL or the label of a thermostatting fix
-that thermostats all atoms in the fix colvars group.  This will be used to
-let Colvars know what is the current thermostat target temperature.
+The *tstat* keyword can be either NULL or the label of a thermostatting
+fix that thermostats all atoms in the fix colvars group.  This will be
+used to let Colvars know what is the current thermostat target
+temperature.
 
 Restart, fix_modify, output, run start/stop, minimize info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 
-This fix writes the current status of the colvars module into :doc:`binary
-restart files <restart>`. This is in addition to the text mode
-``.colvars.state`` written by Colvars itself and the information in both
-files is identical.
+This fix writes the current status of the colvars module into
+:doc:`binary restart files <restart>`. This is in addition to the text
+mode ``.colvars.state`` written by Colvars itself and the information in
+both files is identical.
 
-The :doc:`fix_modify <fix_modify>` *energy* option is supported by
-this fix to add the energy change from the biasing force added by
-Colvars to the global potential energy of the system as part of
-:doc:`thermodynamic output <thermo_style>`.  The default setting for
-this fix is :doc:`fix_modify energy no <fix_modify>`.
+The :doc:`fix_modify <fix_modify>` *energy* option is supported by this
+fix to add the energy change from the biasing force added by Colvars to
+the global potential energy of the system as part of :doc:`thermodynamic
+output <thermo_style>`.  The default setting for this fix is
+:doc:`fix_modify energy no <fix_modify>`.
 
-The *fix_modify configfile <config file>* option loads Colvars configuration
-from an additional file. This option can only be used, after the system has
-been initialized with a :doc:`run <run>` command.
+The *fix_modify configfile <config file>* option loads Colvars
+configuration from an additional file. This option can only be used,
+after the system has been initialized with a :doc:`run <run>` command.
 
 The *fix_modify config <quoted string>* option allows to add settings
-from inline strings. Those have to fit on a single line when enclosed
-in a pair of double quotes ("), or can span multiple lines when bracketed
-by a pair of triple double quotes (""", like Python embedded documentation).
+from inline strings. Those have to fit on a single line when enclosed in
+a pair of double quotes ("), or can span multiple lines when bracketed
+by a pair of triple double quotes (""", like Python embedded
+documentation).
 
 This fix computes a global scalar which can be accessed by various
-:doc:`output commands <Howto_output>`.  The scalar is the Colvars
-energy mentioned above.  The scalar value calculated by this fix is
+:doc:`output commands <Howto_output>`.  The scalar is the Colvars energy
+mentioned above.  The scalar value calculated by this fix is
 "extensive".
 
 Restrictions
 """"""""""""
 
-``fix colvars`` is provided by the COLVARS package and is only available if
-LAMMPS was built with that package.  Some of the features also require code
-available from the LEPTON package.  See the :doc:`Build package
-<Build_package>` page for more info.
+``fix colvars`` is provided by the COLVARS package and is only available
+if LAMMPS was built with that package.  Some of the features also
+require code available from the LEPTON package.  See the :doc:`Build
+package <Build_package>` page for more info.
 
 There can only be one Colvars instance defined at a time.  Since the
-interface communicates only the minimum amount of information and the Colvars
-module itself can handle an arbitrary number of collective variables, this is
-not a limitation of functionality.
+interface communicates only the minimum amount of information and the
+Colvars module itself can handle an arbitrary number of collective
+variables, this is not a limitation of functionality.
 
 Related commands
 """"""""""""""""

From 50a370c4a5b27133b1b177838cb21cb7c07f3555 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 27 Dec 2022 21:39:35 -0500
Subject: [PATCH 62/79] use memcpy instead of union to avoid pointer aliasing

---
 lib/lepton/src/CompiledExpression.cpp       | 11 +++--------
 lib/lepton/src/CompiledVectorExpression.cpp | 10 +++-------
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/lib/lepton/src/CompiledExpression.cpp b/lib/lepton/src/CompiledExpression.cpp
index 622c1c2ecf..a253368d53 100644
--- a/lib/lepton/src/CompiledExpression.cpp
+++ b/lib/lepton/src/CompiledExpression.cpp
@@ -32,13 +32,13 @@
 #include "lepton/CompiledExpression.h"
 #include "lepton/Operation.h"
 #include "lepton/ParsedExpression.h"
+#include <cstring>
 #include <utility>
 
 using namespace Lepton;
 using namespace std;
 #ifdef LEPTON_USE_JIT
     using namespace asmjit;
-    #include <cinttypes>
 #endif
 
 CompiledExpression::CompiledExpression() : jitCode(NULL) {
@@ -515,12 +515,6 @@ void CompiledExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& dest, ar
 }
 #else
 
-union int64_to_double {
-    int64_to_double(int64_t _i) { i = _i; }
-    int64_t i;
-    double  d;
-};
-
 void CompiledExpression::generateJitCode() {
     const CpuInfo& cpu = CpuInfo::host();
     if (!cpu.hasFeature(CpuFeatures::X86::kAVX))
@@ -569,7 +563,8 @@ void CompiledExpression::generateJitCode() {
         else if (op.getId() == Operation::DELTA)
             value = 1.0;
         else if (op.getId() == Operation::ABS) {
-            value = int64_to_double(0x7FFFFFFFFFFFFFFF).d;
+            long long mask = 0x7FFFFFFFFFFFFFFF;
+            memcpy(&value, &mask, sizeof(value));
         }
         else if (op.getId() == Operation::POWER_CONSTANT) {
             if (stepGroup[step] == -1)
diff --git a/lib/lepton/src/CompiledVectorExpression.cpp b/lib/lepton/src/CompiledVectorExpression.cpp
index 2e560d2051..fdbba8b172 100644
--- a/lib/lepton/src/CompiledVectorExpression.cpp
+++ b/lib/lepton/src/CompiledVectorExpression.cpp
@@ -33,6 +33,7 @@
 #include "lepton/Operation.h"
 #include "lepton/ParsedExpression.h"
 #include <algorithm>
+#include <cstring>
 #include <utility>
 
 using namespace Lepton;
@@ -573,12 +574,6 @@ void CompiledVectorExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& de
 }
 #else
 
-union int_to_float {
-    int_to_float(int _i) { i = _i; }
-    int   i;
-    float f;
-};
-
 void CompiledVectorExpression::generateJitCode() {
     const CpuInfo& cpu = CpuInfo::host();
     if (!cpu.hasFeature(CpuFeatures::X86::kAVX))
@@ -630,7 +625,8 @@ void CompiledVectorExpression::generateJitCode() {
         else if (op.getId() == Operation::DELTA)
             value = 1.0;
         else if (op.getId() == Operation::ABS) {
-            value = int_to_float(0x7FFFFFFF).f;
+            int mask = 0x7FFFFFFF;
+            memcpy(&value, &mask, sizeof(value));
         }
         else if (op.getId() == Operation::POWER_CONSTANT) {
             if (stepGroup[step] == -1)

From 67156420d48d97d3bbaf00ae8bd8a22aab9cba60 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 28 Dec 2022 00:12:45 -0500
Subject: [PATCH 63/79] avoid out-of-range read

---
 lib/lepton/src/CompiledExpression.cpp       | 4 ++--
 lib/lepton/src/CompiledVectorExpression.cpp | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/lepton/src/CompiledExpression.cpp b/lib/lepton/src/CompiledExpression.cpp
index a253368d53..f69ee1130d 100644
--- a/lib/lepton/src/CompiledExpression.cpp
+++ b/lib/lepton/src/CompiledExpression.cpp
@@ -549,7 +549,7 @@ void CompiledExpression::generateJitCode() {
         // Find the constant value (if any) used by this operation.
 
         Operation& op = *operation[step];
-        double value;
+        double value = 0.0;
         if (op.getId() == Operation::CONSTANT)
             value = dynamic_cast<Operation::Constant&>(op).getValue();
         else if (op.getId() == Operation::ADD_CONSTANT)
@@ -564,7 +564,7 @@ void CompiledExpression::generateJitCode() {
             value = 1.0;
         else if (op.getId() == Operation::ABS) {
             long long mask = 0x7FFFFFFFFFFFFFFF;
-            memcpy(&value, &mask, sizeof(value));
+            memcpy(&value, &mask, sizeof(mask));
         }
         else if (op.getId() == Operation::POWER_CONSTANT) {
             if (stepGroup[step] == -1)
diff --git a/lib/lepton/src/CompiledVectorExpression.cpp b/lib/lepton/src/CompiledVectorExpression.cpp
index fdbba8b172..d2859085d9 100644
--- a/lib/lepton/src/CompiledVectorExpression.cpp
+++ b/lib/lepton/src/CompiledVectorExpression.cpp
@@ -611,7 +611,7 @@ void CompiledVectorExpression::generateJitCode() {
         // Find the constant value (if any) used by this operation.
 
         Operation& op = *operation[step];
-        double value;
+        double value = 0.0;
         if (op.getId() == Operation::CONSTANT)
             value = dynamic_cast<Operation::Constant&> (op).getValue();
         else if (op.getId() == Operation::ADD_CONSTANT)
@@ -626,7 +626,7 @@ void CompiledVectorExpression::generateJitCode() {
             value = 1.0;
         else if (op.getId() == Operation::ABS) {
             int mask = 0x7FFFFFFF;
-            memcpy(&value, &mask, sizeof(value));
+            memcpy(&value, &mask, sizeof(mask));
         }
         else if (op.getId() == Operation::POWER_CONSTANT) {
             if (stepGroup[step] == -1)

From 49eb9ca5fd9b59d2ac30728cf8ce70bb3d4dfa2b Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 29 Dec 2022 19:10:15 -0500
Subject: [PATCH 64/79] revert to using the unions. looks nicer and passes the
 tests.

---
 lib/lepton/src/CompiledExpression.cpp       | 17 +++++++++++------
 lib/lepton/src/CompiledVectorExpression.cpp | 15 +++++++++------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/lib/lepton/src/CompiledExpression.cpp b/lib/lepton/src/CompiledExpression.cpp
index f69ee1130d..bdc5350ce1 100644
--- a/lib/lepton/src/CompiledExpression.cpp
+++ b/lib/lepton/src/CompiledExpression.cpp
@@ -32,7 +32,8 @@
 #include "lepton/CompiledExpression.h"
 #include "lepton/Operation.h"
 #include "lepton/ParsedExpression.h"
-#include <cstring>
+
+#include <cinttypes>
 #include <utility>
 
 using namespace Lepton;
@@ -515,6 +516,12 @@ void CompiledExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& dest, ar
 }
 #else
 
+union int64_to_double {
+  int64_to_double(const int64_t &_i) { i = _i; }
+  int64_t i;
+  double  d;
+};
+
 void CompiledExpression::generateJitCode() {
     const CpuInfo& cpu = CpuInfo::host();
     if (!cpu.hasFeature(CpuFeatures::X86::kAVX))
@@ -549,7 +556,7 @@ void CompiledExpression::generateJitCode() {
         // Find the constant value (if any) used by this operation.
 
         Operation& op = *operation[step];
-        double value = 0.0;
+        double value;
         if (op.getId() == Operation::CONSTANT)
             value = dynamic_cast<Operation::Constant&>(op).getValue();
         else if (op.getId() == Operation::ADD_CONSTANT)
@@ -562,10 +569,8 @@ void CompiledExpression::generateJitCode() {
             value = 1.0;
         else if (op.getId() == Operation::DELTA)
             value = 1.0;
-        else if (op.getId() == Operation::ABS) {
-            long long mask = 0x7FFFFFFFFFFFFFFF;
-            memcpy(&value, &mask, sizeof(mask));
-        }
+        else if (op.getId() == Operation::ABS)
+          value = int64_to_double(0x7FFFFFFFFFFFFFFF).d;
         else if (op.getId() == Operation::POWER_CONSTANT) {
             if (stepGroup[step] == -1)
                 value = dynamic_cast<Operation::PowerConstant&>(op).getValue();
diff --git a/lib/lepton/src/CompiledVectorExpression.cpp b/lib/lepton/src/CompiledVectorExpression.cpp
index d2859085d9..302cefaabc 100644
--- a/lib/lepton/src/CompiledVectorExpression.cpp
+++ b/lib/lepton/src/CompiledVectorExpression.cpp
@@ -33,7 +33,6 @@
 #include "lepton/Operation.h"
 #include "lepton/ParsedExpression.h"
 #include <algorithm>
-#include <cstring>
 #include <utility>
 
 using namespace Lepton;
@@ -574,6 +573,12 @@ void CompiledVectorExpression::generateTwoArgCall(a64::Compiler& c, arm::Vec& de
 }
 #else
 
+union int_to_float {
+  int_to_float(const int &_i) { i = _i; }
+  int i;
+  float  f;
+};
+
 void CompiledVectorExpression::generateJitCode() {
     const CpuInfo& cpu = CpuInfo::host();
     if (!cpu.hasFeature(CpuFeatures::X86::kAVX))
@@ -611,7 +616,7 @@ void CompiledVectorExpression::generateJitCode() {
         // Find the constant value (if any) used by this operation.
 
         Operation& op = *operation[step];
-        double value = 0.0;
+        double value;
         if (op.getId() == Operation::CONSTANT)
             value = dynamic_cast<Operation::Constant&> (op).getValue();
         else if (op.getId() == Operation::ADD_CONSTANT)
@@ -624,10 +629,8 @@ void CompiledVectorExpression::generateJitCode() {
             value = 1.0;
         else if (op.getId() == Operation::DELTA)
             value = 1.0;
-        else if (op.getId() == Operation::ABS) {
-            int mask = 0x7FFFFFFF;
-            memcpy(&value, &mask, sizeof(mask));
-        }
+        else if (op.getId() == Operation::ABS)
+            value = int_to_float(0x7FFFFFFF).f;
         else if (op.getId() == Operation::POWER_CONSTANT) {
             if (stepGroup[step] == -1)
                 value = dynamic_cast<Operation::PowerConstant&> (op).getValue();

From cae18d01a3cfaae9def3e173cb8e329c0a29ee79 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 29 Dec 2022 19:10:46 -0500
Subject: [PATCH 65/79] add unit tests for Lepton lib and LeptonUtils functions

---
 unittest/utils/CMakeLists.txt  |   6 +
 unittest/utils/test_lepton.cpp | 523 +++++++++++++++++++++++++++++++++
 2 files changed, 529 insertions(+)
 create mode 100644 unittest/utils/test_lepton.cpp

diff --git a/unittest/utils/CMakeLists.txt b/unittest/utils/CMakeLists.txt
index c01313ad8d..a6d5545873 100644
--- a/unittest/utils/CMakeLists.txt
+++ b/unittest/utils/CMakeLists.txt
@@ -19,6 +19,12 @@ add_executable(test_platform test_platform.cpp)
 target_link_libraries(test_platform PRIVATE lammps GTest::GMockMain)
 add_test(NAME Platform COMMAND test_platform)
 
+if(PKG_LEPTON)
+  add_executable(test_lepton test_lepton.cpp)
+  target_link_libraries(test_lepton PRIVATE lepton lammps GTest::GMockMain)
+  add_test(NAME Lepton COMMAND test_lepton)
+endif()
+
 set_tests_properties(Utils Platform PROPERTIES
   ENVIRONMENT "LAMMPS_POTENTIALS=${LAMMPS_POTENTIALS_DIR}")
 
diff --git a/unittest/utils/test_lepton.cpp b/unittest/utils/test_lepton.cpp
new file mode 100644
index 0000000000..5f3de06aba
--- /dev/null
+++ b/unittest/utils/test_lepton.cpp
@@ -0,0 +1,523 @@
+
+// Adapted for GoogleTest from TestParser.cpp from OpenMM
+
+#include "lammps.h"
+
+#include "info.h"
+#include "input.h"
+#include "update.h"
+#include "variable.h"
+
+#include "../../src/LEPTON/lepton_utils.h"
+#include "Lepton.h"
+#include "lepton/CompiledVectorExpression.h"
+#include "utils.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "../testing/core.h"
+
+#include <exception>
+#include <iostream>
+#include <limits>
+#include <map>
+#include <sstream>
+#include <string>
+
+using LAMMPS_NS::utils::split_words;
+using ::testing::StrEq;
+
+bool verbose = false;
+
+class LeptonUtilsTest : public LAMMPSTest {
+protected:
+    LAMMPS_NS::Variable *variable;
+
+    void SetUp() override
+    {
+        testbinary = "LeptonUtilsTest";
+        args       = {"-log", "none", "-echo", "screen", "-nocite", "-v", "num", "1"};
+        LAMMPSTest::SetUp();
+        command("region box block 0 1 0 1 0 1");
+        command("create_box 1 box");
+        variable = lmp->input->variable;
+    }
+};
+
+// remove quotes and spaces from expression
+
+TEST(LeptonUtils, condense)
+{
+    ASSERT_THAT(LeptonUtils::condense("\"k*r^2; k=250.0\""), StrEq("k*r^2;k=250.0"));
+    ASSERT_THAT(LeptonUtils::condense("'k2*r^2 + k3*r^3 + k4*r^4; k2=300.0; k3=-100.0; k4=50.0'"),
+                StrEq("k2*r^2+k3*r^3+k4*r^4;k2=300.0;k3=-100.0;k4=50.0"));
+    ASSERT_THAT(LeptonUtils::condense("k*(r-0.2)^2;k=500.0"), StrEq("k*(r-0.2)^2;k=500.0"));
+    ASSERT_THAT(LeptonUtils::condense("\"xx' \"'xx"), StrEq("xxxx"));
+    ASSERT_THAT(LeptonUtils::condense("\t \"x\n\r"), StrEq("x"));
+}
+
+// substitute variable references (v_<name>) with values
+
+TEST_F(LeptonUtilsTest, substitute)
+{
+    BEGIN_HIDE_OUTPUT();
+    command("variable val1 index 100.0");
+    command("variable pre equal 0.001*step");
+    END_HIDE_OUTPUT();
+    ASSERT_THAT(LeptonUtils::substitute("v_num", lmp), StrEq("1"));
+    ASSERT_THAT(LeptonUtils::substitute("eps*v_val1*k", lmp), StrEq("eps*100.0*k"));
+    ASSERT_THAT(LeptonUtils::substitute("(2.5/v_pre)", lmp), StrEq("(2.5/0)"));
+    lmp->update->reset_timestep(100, false);
+    ASSERT_THAT(LeptonUtils::substitute("(2.5/v_pre)", lmp), StrEq("(2.5/0.1)"));
+
+    if (LAMMPS_NS::Info::has_exceptions()) {
+        bool caught = false;
+        try {
+            LeptonUtils::substitute("v_none", lmp);
+        } catch (std::exception &e) {
+            ASSERT_THAT(e.what(), StrEq("Variable none in expression v_none does not exist"));
+            caught = true;
+        }
+        ASSERT_TRUE(caught);
+    }
+}
+
+/**
+ * This is a custom function equal to f(x,y) = 2*x*y.
+ */
+
+class ExampleFunction : public Lepton::CustomFunction {
+    int getNumArguments() const { return 2; }
+    double evaluate(const double *arguments) const { return 2.0 * arguments[0] * arguments[1]; }
+    double evaluateDerivative(const double *arguments, const int *derivOrder) const
+    {
+        if (derivOrder[0] == 1) {
+            if (derivOrder[1] == 0)
+                return 2.0 * arguments[1];
+            else if (derivOrder[1] == 1)
+                return 2.0;
+        }
+        if (derivOrder[1] == 1 && derivOrder[0] == 0) return 2.0 * arguments[0];
+        return 0.0;
+    }
+    Lepton::CustomFunction *clone() const { return new ExampleFunction(); }
+};
+
+/**
+ * Verify that an expression gives the correct value.
+ */
+
+void verifyEvaluation(const std::string &expression, double expectedValue)
+{
+    std::map<std::string, Lepton::CustomFunction *> customFunctions;
+    Lepton::ParsedExpression parsed = Lepton::Parser::parse(expression, customFunctions);
+    double value                    = parsed.evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Try optimizing it and make sure the result is still correct.
+
+    value = parsed.optimize().evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Create an ExpressionProgram and see if that also gives the same result.
+
+    Lepton::ExpressionProgram program = parsed.createProgram();
+    value                             = program.evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Create a CompiledExpression and see if that also gives the same result.
+
+    Lepton::CompiledExpression compiled = parsed.createCompiledExpression();
+    value                               = compiled.evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+}
+
+/**
+ * Verify that an expression with variables gives the correct value.
+ */
+
+void verifyEvaluation(const std::string &expression, double x, double y, double expectedValue)
+{
+    if (verbose) std::cout << "Checking expression: " << expression << "\n";
+    std::map<std::string, double> variables;
+    variables["x"]                  = x;
+    variables["y"]                  = y;
+    Lepton::ParsedExpression parsed = Lepton::Parser::parse(expression);
+    double value                    = parsed.evaluate(variables);
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Try optimizing it and make sure the result is still correct.
+
+    value = parsed.optimize().evaluate(variables);
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Try optimizing with predefined values for the variables.
+
+    value = parsed.optimize(variables).evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Create an ExpressionProgram and see if that also gives the same result.
+
+    Lepton::ExpressionProgram program = parsed.createProgram();
+    value                             = program.evaluate(variables);
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Create a CompiledExpression and see if that also gives the same result.
+
+    Lepton::CompiledExpression compiled = parsed.createCompiledExpression();
+    if (compiled.getVariables().find("x") != compiled.getVariables().end())
+        compiled.getVariableReference("x") = x;
+    if (compiled.getVariables().find("y") != compiled.getVariables().end())
+        compiled.getVariableReference("y") = y;
+    value = compiled.evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+
+    // Try specifying memory locations for the compiled expression.
+
+    std::map<std::string, double *> variablePointers;
+    variablePointers["x"]                = &x;
+    variablePointers["y"]                = &y;
+    Lepton::CompiledExpression compiled2 = parsed.createCompiledExpression();
+    compiled2.setVariableLocations(variablePointers);
+    value = compiled2.evaluate();
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+    ASSERT_EQ(&x, &compiled2.getVariableReference("x"));
+    ASSERT_EQ(&y, &compiled2.getVariableReference("y"));
+
+    // Try evaluating it as a vector.
+
+    for (int width : Lepton::CompiledVectorExpression::getAllowedWidths()) {
+        Lepton::CompiledVectorExpression vector = parsed.createCompiledVectorExpression(width);
+        for (int i = 0; i < width; i++) {
+            if (vector.getVariables().find("x") != vector.getVariables().end())
+                for (int j = 0; j < width; j++)
+                    vector.getVariablePointer("x")[j] = (i == j ? x : -100.0);
+            if (vector.getVariables().find("y") != vector.getVariables().end())
+                for (int j = 0; j < width; j++)
+                    vector.getVariablePointer("y")[j] = (i == j ? y : -100.0);
+            const float *result = vector.evaluate();
+            ASSERT_NEAR(expectedValue, result[i], 1e-6);
+        }
+    }
+
+    // Specify memory locations for the vector expression.
+
+    float xvec[8], yvec[8];
+    std::map<std::string, float *> vecVariablePointers;
+    vecVariablePointers["x"] = xvec;
+    vecVariablePointers["y"] = yvec;
+    for (int width : Lepton::CompiledVectorExpression::getAllowedWidths()) {
+        Lepton::CompiledVectorExpression vector2 = parsed.createCompiledVectorExpression(width);
+        vector2.setVariableLocations(vecVariablePointers);
+        for (int i = 0; i < width; i++) {
+            for (int j = 0; j < width; j++) {
+                xvec[j] = (i == j ? x : -100.0);
+                yvec[j] = (i == j ? y : -100.0);
+            }
+            const float *result = vector2.evaluate();
+            ASSERT_NEAR(expectedValue, result[i], 1e-6);
+        }
+    }
+
+    // Make sure that variable renaming works.
+
+    variables.clear();
+    variables["w"] = x;
+    variables["y"] = y;
+    std::map<std::string, std::string> replacements;
+    replacements["x"] = "w";
+    value             = parsed.renameVariables(replacements).evaluate(variables);
+    ASSERT_NEAR(expectedValue, value, 1e-10);
+}
+
+/**
+ * Confirm that a parse error gets thrown.
+ */
+
+void verifyInvalidExpression(const std::string &expression)
+{
+    if (verbose) std::cout << "Checking invalid expression: " << expression << "\n";
+    try {
+        Lepton::Parser::parse(expression);
+    } catch (const std::exception &ex) {
+        return;
+    }
+    throw std::exception();
+}
+
+/**
+ * Verify that two numbers have the same value.
+ */
+
+void assertNumbersEqual(double val1, double val2, double tol = 1e-10)
+{
+    const double inf = std::numeric_limits<double>::infinity();
+    if (val1 == val1 || val2 == val2)           // If both are NaN, that's fine.
+        if (val1 != inf || val2 != inf)         // Both infinity is also fine.
+            if (val1 != -inf || val2 != -inf) { // Same for -infinity.
+                ASSERT_NEAR(val1, val2, tol);
+            }
+}
+
+/**
+ * Verify that two expressions give the same value.
+ */
+
+void verifySameValue(const Lepton::ParsedExpression &exp1, const Lepton::ParsedExpression &exp2,
+                     double x, double y)
+{
+    std::map<std::string, double> variables;
+    variables["x"] = x;
+    variables["y"] = y;
+    double val1    = exp1.evaluate(variables);
+    double val2    = exp2.evaluate(variables);
+    assertNumbersEqual(val1, val2);
+
+    // Now create CompiledExpressions from them and see if those also match.
+
+    Lepton::CompiledExpression compiled1 = exp1.createCompiledExpression();
+    Lepton::CompiledExpression compiled2 = exp2.createCompiledExpression();
+    if (compiled1.getVariables().find("x") != compiled1.getVariables().end())
+        compiled1.getVariableReference("x") = x;
+    if (compiled1.getVariables().find("y") != compiled1.getVariables().end())
+        compiled1.getVariableReference("y") = y;
+    if (compiled2.getVariables().find("x") != compiled2.getVariables().end())
+        compiled2.getVariableReference("x") = x;
+    if (compiled2.getVariables().find("y") != compiled2.getVariables().end())
+        compiled2.getVariableReference("y") = y;
+    assertNumbersEqual(val1, compiled1.evaluate());
+    assertNumbersEqual(val2, compiled2.evaluate());
+
+    // Now check CompiledVectorizedExpressions.
+
+    for (int width : Lepton::CompiledVectorExpression::getAllowedWidths()) {
+        Lepton::CompiledVectorExpression vector1 = exp1.createCompiledVectorExpression(width);
+        Lepton::CompiledVectorExpression vector2 = exp2.createCompiledVectorExpression(width);
+        for (int i = 0; i < width; i++) {
+            if (vector1.getVariables().find("x") != vector1.getVariables().end())
+                for (int j = 0; j < width; j++)
+                    vector1.getVariablePointer("x")[j] = (i == j ? x : -100.0);
+            if (vector1.getVariables().find("y") != vector1.getVariables().end())
+                for (int j = 0; j < width; j++)
+                    vector1.getVariablePointer("y")[j] = (i == j ? y : -100.0);
+            if (vector2.getVariables().find("x") != vector2.getVariables().end())
+                for (int j = 0; j < width; j++)
+                    vector2.getVariablePointer("x")[j] = (i == j ? x : -100.0);
+            if (vector2.getVariables().find("y") != vector2.getVariables().end())
+                for (int j = 0; j < width; j++)
+                    vector2.getVariablePointer("y")[j] = (i == j ? y : -100.0);
+            const float *result1 = vector1.evaluate();
+            const float *result2 = vector2.evaluate();
+            assertNumbersEqual(val1, result1[i], 5e-6);
+            assertNumbersEqual(val2, result2[i], 5e-6);
+        }
+    }
+}
+
+/**
+ * Verify that the derivative of an expression is calculated correctly.
+ */
+
+void verifyDerivative(const std::string &expression, const std::string &expectedDeriv)
+{
+    if (verbose) std::cout << "Checking derivative of: " << expression << "\n";
+    Lepton::ParsedExpression computed =
+        Lepton::Parser::parse(expression).differentiate("x").optimize();
+    Lepton::ParsedExpression expected = Lepton::Parser::parse(expectedDeriv);
+    verifySameValue(computed, expected, 1.0, 2.0);
+    verifySameValue(computed, expected, 2.0, 3.0);
+    verifySameValue(computed, expected, -2.0, 3.0);
+    verifySameValue(computed, expected, 2.0, -3.0);
+    verifySameValue(computed, expected, 0.0, -3.0);
+    verifySameValue(computed, expected, 2.0, 0.0);
+}
+
+/**
+ * Test the use of a custom function.
+ */
+
+void testCustomFunction(const std::string &expression, const std::string &equivalent)
+{
+    if (verbose) std::cout << "Checking custom function expression: " << expression << "\n";
+    std::map<std::string, Lepton::CustomFunction *> functions;
+    ExampleFunction exp;
+    functions["custom"]           = &exp;
+    Lepton::ParsedExpression exp1 = Lepton::Parser::parse(expression, functions);
+    Lepton::ParsedExpression exp2 = Lepton::Parser::parse(equivalent);
+    verifySameValue(exp1, exp2, 1.0, 2.0);
+    verifySameValue(exp1, exp2, 2.0, 3.0);
+    verifySameValue(exp1, exp2, -2.0, 3.0);
+    verifySameValue(exp1, exp2, 2.0, -3.0);
+    Lepton::ParsedExpression deriv1 = exp1.differentiate("x").optimize();
+    Lepton::ParsedExpression deriv2 = exp2.differentiate("x").optimize();
+    verifySameValue(deriv1, deriv2, 1.0, 2.0);
+    verifySameValue(deriv1, deriv2, 2.0, 3.0);
+    verifySameValue(deriv1, deriv2, -2.0, 3.0);
+    verifySameValue(deriv1, deriv2, 2.0, -3.0);
+    Lepton::ParsedExpression deriv3 = deriv1.differentiate("y").optimize();
+    Lepton::ParsedExpression deriv4 = deriv2.differentiate("y").optimize();
+    verifySameValue(deriv3, deriv4, 1.0, 2.0);
+    verifySameValue(deriv3, deriv4, 2.0, 3.0);
+    verifySameValue(deriv3, deriv4, -2.0, 3.0);
+    verifySameValue(deriv3, deriv4, 2.0, -3.0);
+}
+
+TEST(Lepton, Evaluation)
+{
+    verifyEvaluation("5", 5.0);
+    verifyEvaluation("5*2", 10.0);
+    verifyEvaluation("2*3+4*5", 26.0);
+    verifyEvaluation("2^-3", 0.125);
+    verifyEvaluation("1e+2", 100.0);
+    verifyEvaluation("-x", 2.0, 3.0, -2.0);
+    verifyEvaluation("y^-x", 3.0, 2.0, 0.125);
+    verifyEvaluation("1/-x", 3.0, 2.0, -1.0 / 3.0);
+    verifyEvaluation("2.1e-4*x*(y+1)", 3.0, 1.0, 1.26e-3);
+    verifyEvaluation("sin(2.5)", std::sin(2.5));
+    verifyEvaluation("cot(x)", 3.0, 1.0, 1.0 / std::tan(3.0));
+    verifyEvaluation("log(x)", 3.0, 1.0, std::log(3.0));
+    verifyEvaluation("x^2+y^3+x^-1+y^(1/2)", 1.0, 1.0, 4.0);
+    verifyEvaluation("(2*x)*3", 4.0, 4.0, 24.0);
+    verifyEvaluation("(x*2)*3", 4.0, 4.0, 24.0);
+    verifyEvaluation("2*(x*3)", 4.0, 4.0, 24.0);
+    verifyEvaluation("2*(3*x)", 4.0, 4.0, 24.0);
+    verifyEvaluation("2*x/3", 1.0, 4.0, 2.0 / 3.0);
+    verifyEvaluation("x*2/3", 1.0, 4.0, 2.0 / 3.0);
+    verifyEvaluation("5*(-x)*(-y)", 1.0, 4.0, 20.0);
+    verifyEvaluation("5*(-x)*(y)", 1.0, 4.0, -20.0);
+    verifyEvaluation("5*(x)*(-y)", 1.0, 4.0, -20.0);
+    verifyEvaluation("5*(-x)/(-y)", 1.0, 4.0, 1.25);
+    verifyEvaluation("5*(-x)/(y)", 1.0, 4.0, -1.25);
+    verifyEvaluation("5*(x)/(-y)", 1.0, 4.0, -1.25);
+    verifyEvaluation("x+(-y)", 1.0, 4.0, -3.0);
+    verifyEvaluation("(-x)+y", 1.0, 4.0, 3.0);
+    verifyEvaluation("x/(1/y)", 1.0, 4.0, 4.0);
+    verifyEvaluation("x*w; w = 5", 3.0, 1.0, 15.0);
+    verifyEvaluation("a+b^2;a=x-b;b=3*y", 2.0, 3.0, 74.0);
+    verifyEvaluation("erf(x)+erfc(x)", 2.0, 3.0, 1.0);
+    verifyEvaluation("min(3, x)", 2.0, 3.0, 2.0);
+    verifyEvaluation("min(y, 5)", 2.0, 3.0, 3.0);
+    verifyEvaluation("max(x, y)", 2.0, 3.0, 3.0);
+    verifyEvaluation("max(x, -1)", 2.0, 3.0, 2.0);
+    verifyEvaluation("abs(x-y)", 2.0, 3.0, 1.0);
+    verifyEvaluation("delta(x)+3*delta(y-1.5)", 2.0, 1.5, 3.0);
+    verifyEvaluation("step(x-3)+y*step(x)", 2.0, 3.0, 3.0);
+    verifyEvaluation("floor(x)", -2.1, 3.0, -3.0);
+    verifyEvaluation("ceil(x)", -2.1, 3.0, -2.0);
+    verifyEvaluation("select(x, 1.0, y)", 0.3, 2.0, 1.0);
+    verifyEvaluation("select(x, 1.0, y)", 0.0, 2.0, 2.0);
+    verifyEvaluation("atan2(x, y)", 3.0, 1.5, std::atan(2.0));
+    verifyEvaluation("sqrt(x^2)", -2.2, 0.0, 2.2);
+    verifyEvaluation("sqrt(x)^2", 2.2, 0.0, 2.2);
+    verifyEvaluation("x^2+x^4", 2.0, 0.0, 20.0);
+    verifyEvaluation("x^-2+x^-3", 2.0, 0.0, 0.375);
+    verifyEvaluation("x^1.8", 2.2, 0.0, std::pow(2.2, 1.8));
+}
+
+TEST(Lepton, InvalidEvaluation)
+{
+    ASSERT_NO_THROW(verifyInvalidExpression("1..2"));
+    ASSERT_NO_THROW(verifyInvalidExpression("1*(2+3"));
+    ASSERT_NO_THROW(verifyInvalidExpression("5++4"));
+    ASSERT_NO_THROW(verifyInvalidExpression("1+2)"));
+    ASSERT_NO_THROW(verifyInvalidExpression("cos(2,3)"));
+}
+
+TEST(Lepton, VerifyDerivative)
+{
+    verifyDerivative("x", "1");
+    verifyDerivative("x^2+x", "2*x+1");
+    verifyDerivative("y^x-x", "log(y)*(y^x)-1");
+    verifyDerivative("sin(x)", "cos(x)");
+    verifyDerivative("cos(x)", "-sin(x)");
+    verifyDerivative("tan(x)", "square(sec(x))");
+    verifyDerivative("cot(x)", "-square(csc(x))");
+    verifyDerivative("sec(x)", "sec(x)*tan(x)");
+    verifyDerivative("csc(x)", "-csc(x)*cot(x)");
+    verifyDerivative("exp(2*x)", "2*exp(2*x)");
+    verifyDerivative("log(x)", "1/x");
+    verifyDerivative("sqrt(x)", "0.5/sqrt(x)");
+    verifyDerivative("asin(x)", "1/sqrt(1-x^2)");
+    verifyDerivative("acos(x)", "-1/sqrt(1-x^2)");
+    verifyDerivative("atan(x)", "1/(1+x^2)");
+    verifyDerivative("atan2(2*x,y)", "2*y/(4*x^2+y^2)");
+    verifyDerivative("sinh(x)", "cosh(x)");
+    verifyDerivative("cosh(x)", "sinh(x)");
+    verifyDerivative("tanh(x)", "1/(cosh(x)^2)");
+    verifyDerivative("erf(x)", "1.12837916709551*exp(-x^2)");
+    verifyDerivative("erfc(x)", "-1.12837916709551*exp(-x^2)");
+    verifyDerivative("step(x)*x+step(1-x)*2*x", "step(x)+step(1-x)*2");
+    verifyDerivative("recip(x)", "-1/x^2");
+    verifyDerivative("square(x)", "2*x");
+    verifyDerivative("cube(x)", "3*x^2");
+    verifyDerivative("min(x, 2*x)", "step(x-2*x)*2+(1-step(x-2*x))*1");
+    verifyDerivative("max(5, x^2)", "(1-step(5-x^2))*2*x");
+    verifyDerivative("abs(3*x)", "step(3*x)*3+(1-step(3*x))*-3");
+    verifyDerivative("floor(x)+0.5*x*ceil(x)", "0.5*ceil(x)");
+    verifyDerivative("select(x, x^2, 3*x)", "select(x, 2*x, 3)");
+}
+
+TEST(Lepton, CustomFunction)
+{
+    testCustomFunction("custom(x, y)/2", "x*y");
+    testCustomFunction("custom(x^2, 1)+custom(2, y-1)", "2*x^2+4*(y-1)");
+}
+
+TEST(Lepton, Optimize)
+{
+    std::string buffer;
+    std::stringstream out(buffer);
+
+    out << Lepton::Parser::parse("x*x").optimize();
+    ASSERT_THAT(out.str(), StrEq("square(x)"));
+    out.str("");
+
+    out << Lepton::Parser::parse("x*x*x").optimize();
+    ASSERT_THAT(out.str(), StrEq("cube(x)"));
+    out.str("");
+
+    out << Lepton::Parser::parse("x*(x*x)").optimize();
+    ASSERT_THAT(out.str(), StrEq("cube(x)"));
+    out.str("");
+
+    out << Lepton::Parser::parse("(x*x)*x").optimize();
+    ASSERT_THAT(out.str(), StrEq("cube(x)"));
+    out.str("");
+
+    out << Lepton::Parser::parse("2*3*x").optimize();
+    ASSERT_THAT(out.str(), StrEq("6*(x)"));
+    out.str("");
+
+    out << Lepton::Parser::parse("1/(1+x)").optimize();
+    ASSERT_THAT(out.str(), StrEq("recip(1+(x))"));
+    out.str("");
+
+    out << Lepton::Parser::parse("x^(1/2)").optimize();
+    ASSERT_THAT(out.str(), StrEq("sqrt(x)"));
+    out.str("");
+    out << Lepton::Parser::parse("log(3*cos(x))^(sqrt(4)-2)").optimize();
+    ASSERT_THAT(out.str(), StrEq("1"));
+    out.str("");
+}
+
+int main(int argc, char **argv)
+{
+    MPI_Init(&argc, &argv);
+    ::testing::InitGoogleMock(&argc, argv);
+
+    // handle arguments passed via environment variable
+    if (const char *var = getenv("TEST_ARGS")) {
+        std::vector<std::string> env = split_words(var);
+        for (auto arg : env) {
+            if (arg == "-v") {
+                verbose = true;
+            }
+        }
+    }
+    if ((argc > 1) && (strcmp(argv[1], "-v") == 0)) verbose = true;
+
+    int rv = RUN_ALL_TESTS();
+    MPI_Finalize();
+    return rv;
+}

From ec244dbad32fd6244df52f4b4c861bc30b6847b7 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 29 Dec 2022 20:09:42 -0500
Subject: [PATCH 66/79] get lepton compiler flags without having to link its
 library twice

---
 unittest/utils/CMakeLists.txt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/unittest/utils/CMakeLists.txt b/unittest/utils/CMakeLists.txt
index a6d5545873..1881113dde 100644
--- a/unittest/utils/CMakeLists.txt
+++ b/unittest/utils/CMakeLists.txt
@@ -20,8 +20,12 @@ target_link_libraries(test_platform PRIVATE lammps GTest::GMockMain)
 add_test(NAME Platform COMMAND test_platform)
 
 if(PKG_LEPTON)
+  get_target_property(LEPTON_DEF lepton COMPILE_DEFINITONS)
+  get_target_property(LEPTON_INC lepton INCLUDE_DIRECTORIES)
   add_executable(test_lepton test_lepton.cpp)
-  target_link_libraries(test_lepton PRIVATE lepton lammps GTest::GMockMain)
+  target_link_libraries(test_lepton PRIVATE lammps GTest::GMockMain)
+  target_compile_definitions(test_lepton PRIVATE ${LEPTON_DEF})
+  target_include_directories(test_lepton PRIVATE ${LEPTON_INC})
   add_test(NAME Lepton COMMAND test_lepton)
 endif()
 

From 6c318b5e8e24dae87e80350c292633c2f70e8513 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 29 Dec 2022 21:17:36 -0500
Subject: [PATCH 67/79] fix typo

---
 unittest/utils/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unittest/utils/CMakeLists.txt b/unittest/utils/CMakeLists.txt
index 1881113dde..5384bae6ec 100644
--- a/unittest/utils/CMakeLists.txt
+++ b/unittest/utils/CMakeLists.txt
@@ -20,7 +20,7 @@ target_link_libraries(test_platform PRIVATE lammps GTest::GMockMain)
 add_test(NAME Platform COMMAND test_platform)
 
 if(PKG_LEPTON)
-  get_target_property(LEPTON_DEF lepton COMPILE_DEFINITONS)
+  get_target_property(LEPTON_DEF lepton COMPILE_DEFINITIONS)
   get_target_property(LEPTON_INC lepton INCLUDE_DIRECTORIES)
   add_executable(test_lepton test_lepton.cpp)
   target_link_libraries(test_lepton PRIVATE lammps GTest::GMockMain)

From 064e1abd5b3cfc8c5803b5f727aaaeaa7c6c20e0 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 29 Dec 2022 22:22:45 -0500
Subject: [PATCH 68/79] Small tweaks to make Lepton test compile/link with MSVC

---
 unittest/utils/CMakeLists.txt  | 6 +-----
 unittest/utils/test_lepton.cpp | 6 ++++--
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/unittest/utils/CMakeLists.txt b/unittest/utils/CMakeLists.txt
index 5384bae6ec..a6d5545873 100644
--- a/unittest/utils/CMakeLists.txt
+++ b/unittest/utils/CMakeLists.txt
@@ -20,12 +20,8 @@ target_link_libraries(test_platform PRIVATE lammps GTest::GMockMain)
 add_test(NAME Platform COMMAND test_platform)
 
 if(PKG_LEPTON)
-  get_target_property(LEPTON_DEF lepton COMPILE_DEFINITIONS)
-  get_target_property(LEPTON_INC lepton INCLUDE_DIRECTORIES)
   add_executable(test_lepton test_lepton.cpp)
-  target_link_libraries(test_lepton PRIVATE lammps GTest::GMockMain)
-  target_compile_definitions(test_lepton PRIVATE ${LEPTON_DEF})
-  target_include_directories(test_lepton PRIVATE ${LEPTON_INC})
+  target_link_libraries(test_lepton PRIVATE lepton lammps GTest::GMockMain)
   add_test(NAME Lepton COMMAND test_lepton)
 endif()
 
diff --git a/unittest/utils/test_lepton.cpp b/unittest/utils/test_lepton.cpp
index 5f3de06aba..91532b385a 100644
--- a/unittest/utils/test_lepton.cpp
+++ b/unittest/utils/test_lepton.cpp
@@ -39,8 +39,10 @@ protected:
         testbinary = "LeptonUtilsTest";
         args       = {"-log", "none", "-echo", "screen", "-nocite", "-v", "num", "1"};
         LAMMPSTest::SetUp();
+        BEGIN_HIDE_OUTPUT();
         command("region box block 0 1 0 1 0 1");
         command("create_box 1 box");
+        END_HIDE_OUTPUT();
         variable = lmp->input->variable;
     }
 };
@@ -68,7 +70,7 @@ TEST_F(LeptonUtilsTest, substitute)
     ASSERT_THAT(LeptonUtils::substitute("v_num", lmp), StrEq("1"));
     ASSERT_THAT(LeptonUtils::substitute("eps*v_val1*k", lmp), StrEq("eps*100.0*k"));
     ASSERT_THAT(LeptonUtils::substitute("(2.5/v_pre)", lmp), StrEq("(2.5/0)"));
-    lmp->update->reset_timestep(100, false);
+    lmp->update->reset_timestep(100LL, false);
     ASSERT_THAT(LeptonUtils::substitute("(2.5/v_pre)", lmp), StrEq("(2.5/0.1)"));
 
     if (LAMMPS_NS::Info::has_exceptions()) {
@@ -240,7 +242,7 @@ void verifyInvalidExpression(const std::string &expression)
     if (verbose) std::cout << "Checking invalid expression: " << expression << "\n";
     try {
         Lepton::Parser::parse(expression);
-    } catch (const std::exception &ex) {
+    } catch (const std::exception &) {
         return;
     }
     throw std::exception();

From 93689f40ddad1f4597611c7c40ee8a33a0ba671c Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 30 Dec 2022 00:39:23 -0500
Subject: [PATCH 69/79] fix compiler flags issue on Ubuntu18.04

---
 unittest/CMakeLists.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt
index 9819a53e10..45501d92ae 100644
--- a/unittest/CMakeLists.txt
+++ b/unittest/CMakeLists.txt
@@ -34,6 +34,14 @@ foreach(_FLAG ${CMAKE_TUNE_FLAGS})
   add_compile_options(${_FLAG})
 endforeach()
 
+# must repeat handling coverage for older CMake
+if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND ENABLE_COVERAGE)
+  if(CMAKE_VERSION VERSION_LESS 3.13)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
+  else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_${CMAKE_BUILD_TYPE}_FLAGS} --coverage")
+  endif()
+endif()
 
 ########################################
 # General tests using the LAMMPS executable itself

From f9a398c9a8477534687f5767e3e0f40cc0fd21b0 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 30 Dec 2022 06:33:10 -0500
Subject: [PATCH 70/79] add to list

---
 doc/src/Build_package.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/src/Build_package.rst b/doc/src/Build_package.rst
index 6e2e729471..6ff1d837f1 100644
--- a/doc/src/Build_package.rst
+++ b/doc/src/Build_package.rst
@@ -44,6 +44,7 @@ packages:
    * :ref:`KIM <kim>`
    * :ref:`KOKKOS <kokkos>`
    * :ref:`LATTE <latte>`
+   * :ref:`LEPTON <lepton>`
    * :ref:`MACHDYN <machdyn>`
    * :ref:`ML-HDNNP <ml-hdnnp>`
    * :ref:`ML-PACE <ml-pace>`

From 9a0fd9d2375bf6f478c895f8c2ed68b729970373 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 5 Jan 2023 17:47:47 -0500
Subject: [PATCH 71/79] clarifications and documentation additions

---
 doc/src/pair_lepton.rst | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/doc/src/pair_lepton.rst b/doc/src/pair_lepton.rst
index 5cf11b6f91..ecd90a0b61 100644
--- a/doc/src/pair_lepton.rst
+++ b/doc/src/pair_lepton.rst
@@ -37,12 +37,15 @@ Description
 
 .. versionadded:: TBD
 
-Pair style *lepton* computes spherical pairwise interactions based on
-evaluating strings between neighboring atoms within the given cutoff.
-The potential function must be provided as an expression string using
-"r" as the distance variable. For example `"200.0*(r-1.5)^2"` represents
-a harmonic potential around the distance :math:`r_0` of 1.5 distance
-units and a force constant *K* of 200.0 energy units:
+Pair style *lepton* computes pairwise interactions between particles
+which depend solely on the distance and have a cutoff.  The potential
+function must be provided as an expression string using "r" as the
+distance variable. Note that additional constants in the expression can
+be defined in the same string as additional expressions separated by
+semi-colons as shown in the examples above.  The expression
+`"200.0*(r-1.5)^2"`, for instance, represents a harmonic potential
+around the pairwise distance :math:`r_0` of 1.5 distance units and a
+force constant *K* of 200.0 energy units:
 
 .. math::
 
@@ -50,10 +53,10 @@ units and a force constant *K* of 200.0 energy units:
 
 The `Lepton library <https://simtk.org/projects/lepton>`_, that the
 *lepton* pair style interfaces with, evaluates this expression string at
-run time to compute the pairwise energy.  It also creates an
-analytical representation of the first derivative of this expression with
-respect to "r" and then uses that to compute the force between the pairs
-of particles within the given cutoff.
+run time to compute the pairwise energy.  It also creates an analytical
+representation of the first derivative of this expression with respect
+to "r" and then uses that to compute the force between the pairs of
+particles within the given cutoff.
 
 The following coefficients must be defined for each pair of atoms types
 via the :doc:`pair_coeff <pair_coeff>` command as in the examples above,

From da9e117e474ecf100037c726264ee8898bbe36cd Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 6 Jan 2023 01:05:39 -0500
Subject: [PATCH 72/79] remove bogus comment

---
 src/LEPTON/pair_lepton.h | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/src/LEPTON/pair_lepton.h b/src/LEPTON/pair_lepton.h
index 02105d1f27..e8454ce80e 100644
--- a/src/LEPTON/pair_lepton.h
+++ b/src/LEPTON/pair_lepton.h
@@ -9,16 +9,6 @@
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
-
-   Pair zero is a dummy pair interaction useful for requiring a
-   force cutoff distance in the absence of pair-interactions or
-   with hybrid/overlay if a larger force cutoff distance is required.
-
-   This can be used in conjunction with bond/create to create bonds
-   that are longer than the cutoff of a given force field, or to
-   calculate radial distribution functions for models without
-   pair interactions.
-
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS

From bf63cccda4517516db96212833fe7b5a9d53b3ac Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 6 Jan 2023 01:06:58 -0500
Subject: [PATCH 73/79] implement pair style lepton/coul and lepton/coul/omp

---
 src/LEPTON/pair_lepton.cpp          |   3 +-
 src/LEPTON/pair_lepton_coul.cpp     | 250 ++++++++++++++++++++++++++++
 src/LEPTON/pair_lepton_coul.h       |  44 +++++
 src/OPENMP/pair_lepton_coul_omp.cpp | 184 ++++++++++++++++++++
 src/OPENMP/pair_lepton_coul_omp.h   |  44 +++++
 5 files changed, 524 insertions(+), 1 deletion(-)
 create mode 100644 src/LEPTON/pair_lepton_coul.cpp
 create mode 100644 src/LEPTON/pair_lepton_coul.h
 create mode 100644 src/OPENMP/pair_lepton_coul_omp.cpp
 create mode 100644 src/OPENMP/pair_lepton_coul_omp.h

diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index ab23d18368..d2e5f91337 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -12,7 +12,7 @@
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
-   Contributing authors: Axel Kohlmeyer (Temple U)
+   Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "pair_lepton.h"
@@ -105,6 +105,7 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
       pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      pairforce.back().getVariableReference("r");
       if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
     }
   } catch (std::exception &e) {
diff --git a/src/LEPTON/pair_lepton_coul.cpp b/src/LEPTON/pair_lepton_coul.cpp
new file mode 100644
index 0000000000..4481a07ac0
--- /dev/null
+++ b/src/LEPTON/pair_lepton_coul.cpp
@@ -0,0 +1,250 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pair_lepton_coul.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "update.h"
+
+#include "Lepton.h"
+#include "lepton_utils.h"
+#include <cmath>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void PairLeptonCoul::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+  if (evflag) {
+    if (eflag) {
+      if (force->newton_pair)
+        eval<1, 1, 1>();
+      else
+        eval<1, 1, 0>();
+    } else {
+      if (force->newton_pair)
+        eval<1, 0, 1>();
+      else
+        eval<1, 0, 0>();
+    }
+  } else {
+    if (force->newton_pair)
+      eval<0, 0, 1>();
+    else
+      eval<0, 0, 0>();
+  }
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLeptonCoul::eval()
+{
+  const double *const *const x = atom->x;
+  double *const *const f = atom->f;
+  const double *const q = atom->q;
+  const int *const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double *const special_coul = force->special_coul;
+
+  const int inum = list->inum;
+  const int *const ilist = list->ilist;
+  const int *const numneigh = list->numneigh;
+  const int *const *const firstneigh = list->firstneigh;
+  double fxtmp, fytmp, fztmp;
+
+  const double q2e = sqrt(force->qqrd2e);
+
+  std::vector<Lepton::CompiledExpression> pairforce;
+  std::vector<Lepton::CompiledExpression> pairpot;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+      pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
+      pairforce.back().getVariableReference("r");
+      pairforce.back().getVariableReference("qi");
+      pairforce.back().getVariableReference("qj");
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  // loop over neighbors of my atoms
+
+  for (int ii = 0; ii < inum; ii++) {
+    const int i = ilist[ii];
+    const double xtmp = x[i][0];
+    const double ytmp = x[i][1];
+    const double ztmp = x[i][2];
+    const int itype = type[i];
+    const int *jlist = firstneigh[i];
+    const int jnum = numneigh[i];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = jlist[jj];
+      const double factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+      const int jtype = type[j];
+
+      const double delx = xtmp - x[j][0];
+      const double dely = ytmp - x[j][1];
+      const double delz = ztmp - x[j][2];
+      const double rsq = delx * delx + dely * dely + delz * delz;
+
+      if (rsq < cutsq[itype][jtype]) {
+        const double r = sqrt(rsq);
+        const int idx = type2expression[itype][jtype];
+        pairforce[idx].getVariableReference("r") = r;
+        pairforce[idx].getVariableReference("qi") = q2e * q[i];
+        pairforce[idx].getVariableReference("qj") = q2e * q[j];
+        const double fpair = -pairforce[idx].evaluate() / r * factor_coul;
+
+        fxtmp += delx * fpair;
+        fytmp += dely * fpair;
+        fztmp += delz * fpair;
+        if (NEWTON_PAIR || (j < nlocal)) {
+          f[j][0] -= delx * fpair;
+          f[j][1] -= dely * fpair;
+          f[j][2] -= delz * fpair;
+        }
+
+        double evdwl = 0.0;
+        if (EFLAG) {
+          pairpot[idx].getVariableReference("r") = r;
+          pairpot[idx].getVariableReference("qi") = q2e * q[i];
+          pairpot[idx].getVariableReference("qj") = q2e * q[j];
+          evdwl = pairpot[idx].evaluate();
+          evdwl *= factor_coul;
+        }
+
+        if (EVFLAG) ev_tally(i, j, nlocal, NEWTON_PAIR, 0.0, evdwl, fpair, delx, dely, delz);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairLeptonCoul::settings(int narg, char **arg)
+{
+  if (narg < 1) utils::missing_cmd_args(FLERR, "pair_style lepton/coul", error);
+  cut_global = utils::numeric(FLERR, arg[0], false, lmp);
+
+  // optional keywords
+  // assert the pair style is compatible with a specific long-range solver
+
+  int iarg = 1;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg], "ewald") == 0)
+      ewaldflag = 1;
+    else if (strcmp(arg[iarg], "pppm") == 0)
+      pppmflag = 1;
+    else if (strcmp(arg[iarg], "msm") == 0)
+      msmflag = 1;
+    else if (strcmp(arg[iarg], "dispersion") == 0)
+      dispersionflag = 1;
+    else if (strcmp(arg[iarg], "tip4p") == 0)
+      tip4pflag = 1;
+    else
+      error->all(FLERR, "Unknown pair_style lepton/coul keyword: {}", arg[iarg]);
+    iarg++;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLeptonCoul::init_style()
+{
+  if (!atom->q_flag) error->all(FLERR, "Pair style lepton/coul requires atom attribute q");
+  if (offset_flag) error->all(FLERR, "Pair style lepton/coul does not suport pair_modify shift");
+  neighbor->add_request(this);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairLeptonCoul::write_restart_settings(FILE *fp)
+{
+  fwrite(&cut_global, sizeof(double), 1, fp);
+  fwrite(&ewaldflag, sizeof(int), 1, fp);
+  fwrite(&pppmflag, sizeof(int), 1, fp);
+  fwrite(&msmflag, sizeof(int), 1, fp);
+  fwrite(&dispersionflag, sizeof(int), 1, fp);
+  fwrite(&tip4pflag, sizeof(int), 1, fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairLeptonCoul::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    utils::sfread(FLERR, &cut_global, sizeof(double), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &ewaldflag, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &pppmflag, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &msmflag, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &dispersionflag, sizeof(int), 1, fp, nullptr, error);
+    utils::sfread(FLERR, &tip4pflag, sizeof(int), 1, fp, nullptr, error);
+  }
+  MPI_Bcast(&cut_global, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&ewaldflag, 1, MPI_INT, 0, world);
+  MPI_Bcast(&pppmflag, 1, MPI_INT, 0, world);
+  MPI_Bcast(&msmflag, 1, MPI_INT, 0, world);
+  MPI_Bcast(&dispersionflag, 1, MPI_INT, 0, world);
+  MPI_Bcast(&tip4pflag, 1, MPI_INT, 0, world);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLeptonCoul::single(int i, int j, int itype, int jtype, double rsq, double factor_coul,
+                              double /* factor_lj */, double &fforce)
+{
+  auto expr = expressions[type2expression[itype][jtype]];
+  auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+  auto pairpot = parsed.createCompiledExpression();
+  auto pairforce = parsed.differentiate("r").createCompiledExpression();
+
+  const double r = sqrt(rsq);
+  const double q2e = sqrt(force->qqrd2e);
+  pairpot.getVariableReference("r") = r;
+  pairpot.getVariableReference("qi") = q2e * atom->q[i];
+  pairpot.getVariableReference("qj") = q2e * atom->q[j];
+
+  pairforce.getVariableReference("r") = r;
+  pairforce.getVariableReference("qi") = q2e * atom->q[i];
+  pairforce.getVariableReference("qj") = q2e * atom->q[j];
+
+  fforce = -pairforce.evaluate() / r * factor_coul;
+  return pairpot.evaluate() * factor_coul;
+}
diff --git a/src/LEPTON/pair_lepton_coul.h b/src/LEPTON/pair_lepton_coul.h
new file mode 100644
index 0000000000..23677667bf
--- /dev/null
+++ b/src/LEPTON/pair_lepton_coul.h
@@ -0,0 +1,44 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lepton/coul,PairLeptonCoul);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_LEPTON_COUL_H
+#define LMP_PAIR_LEPTON_COUL_H
+
+#include "pair_lepton.h"
+
+namespace LAMMPS_NS {
+
+class PairLeptonCoul : public PairLepton {
+ public:
+  PairLeptonCoul(class LAMMPS *_lmp) : PairLepton(_lmp){};
+  ~PairLeptonCoul() override{};
+  void compute(int, int) override;
+  void settings(int, char **) override;
+  void init_style() override;
+  void write_restart_settings(FILE *) override;
+  void read_restart_settings(FILE *) override;
+  double single(int, int, int, int, double, double, double, double &) override;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void eval();
+};
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/OPENMP/pair_lepton_coul_omp.cpp b/src/OPENMP/pair_lepton_coul_omp.cpp
new file mode 100644
index 0000000000..632128bdd9
--- /dev/null
+++ b/src/OPENMP/pair_lepton_coul_omp.cpp
@@ -0,0 +1,184 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pair_lepton_coul_omp.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neigh_list.h"
+#include "suffix.h"
+
+#include <cmath>
+
+#include "Lepton.h"
+#include "lepton_utils.h"
+#include "omp_compat.h"
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairLeptonCoulOMP::PairLeptonCoulOMP(LAMMPS *lmp) : PairLeptonCoul(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  suffix_flag |= Suffix::OMP;
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLeptonCoulOMP::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag, vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
+
+    if (evflag) {
+      if (eflag) {
+        if (force->newton_pair)
+          eval<1, 1, 1>(ifrom, ito, thr);
+        else
+          eval<1, 1, 0>(ifrom, ito, thr);
+      } else {
+        if (force->newton_pair)
+          eval<1, 0, 1>(ifrom, ito, thr);
+        else
+          eval<1, 0, 0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair)
+        eval<0, 0, 1>(ifrom, ito, thr);
+      else
+        eval<0, 0, 0>(ifrom, ito, thr);
+    }
+
+    thr->timer(Timer::PAIR);
+    reduce_thr(this, eflag, vflag, thr);
+  }    // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLeptonCoulOMP::eval(int iifrom, int iito, ThrData *const thr)
+{
+  const auto *_noalias const x = (dbl3_t *) atom->x[0];
+  auto *_noalias const f = (dbl3_t *) thr->get_f()[0];
+  const auto *_noalias const q = atom->q;
+  const int *_noalias const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double *_noalias const special_coul = force->special_coul;
+
+  const int *const ilist = list->ilist;
+  const int *const numneigh = list->numneigh;
+  const int *const *const firstneigh = list->firstneigh;
+  double fxtmp, fytmp, fztmp;
+
+  const double q2e = sqrt(force->qqrd2e);
+
+  std::vector<Lepton::CompiledExpression> pairforce;
+  std::vector<Lepton::CompiledExpression> pairpot;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
+      pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
+      pairforce.back().getVariableReference("r");
+      pairforce.back().getVariableReference("qi");
+      pairforce.back().getVariableReference("qj");
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
+  }
+
+  // loop over neighbors of my atoms
+
+  for (int ii = iifrom; ii < iito; ++ii) {
+    const int i = ilist[ii];
+    const double xtmp = x[i].x;
+    const double ytmp = x[i].y;
+    const double ztmp = x[i].z;
+    const int itype = type[i];
+    const int *jlist = firstneigh[i];
+    const int jnum = numneigh[i];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = jlist[jj];
+      const double factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+      const int jtype = type[j];
+
+      const double delx = xtmp - x[j].x;
+      const double dely = ytmp - x[j].y;
+      const double delz = ztmp - x[j].z;
+      const double rsq = delx * delx + dely * dely + delz * delz;
+
+      if (rsq < cutsq[itype][jtype]) {
+        const double r = sqrt(rsq);
+        const int idx = type2expression[itype][jtype];
+        pairforce[idx].getVariableReference("r") = r;
+        pairforce[idx].getVariableReference("qi") = q2e * q[i];
+        pairforce[idx].getVariableReference("qj") = q2e * q[j];
+        const double fpair = -pairforce[idx].evaluate() / r * factor_coul;
+
+        fxtmp += delx * fpair;
+        fytmp += dely * fpair;
+        fztmp += delz * fpair;
+        if (NEWTON_PAIR || j < nlocal) {
+          f[j].x -= delx * fpair;
+          f[j].y -= dely * fpair;
+          f[j].z -= delz * fpair;
+        }
+
+        double evdwl = 0.0;
+        if (EFLAG) {
+          pairpot[idx].getVariableReference("r") = r;
+          pairpot[idx].getVariableReference("qi") = q2e * q[i];
+          pairpot[idx].getVariableReference("qj") = q2e * q[j];
+          evdwl = pairpot[idx].evaluate();
+          evdwl *= factor_coul;
+        }
+
+        if (EVFLAG)
+          ev_tally_thr(this, i, j, nlocal, NEWTON_PAIR, evdwl, 0.0, fpair, delx, dely, delz, thr);
+      }
+    }
+    f[i].x += fxtmp;
+    f[i].y += fytmp;
+    f[i].z += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLeptonCoulOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLeptonCoul::memory_usage();
+
+  return bytes;
+}
diff --git a/src/OPENMP/pair_lepton_coul_omp.h b/src/OPENMP/pair_lepton_coul_omp.h
new file mode 100644
index 0000000000..b6d04e7e02
--- /dev/null
+++ b/src/OPENMP/pair_lepton_coul_omp.h
@@ -0,0 +1,44 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lepton/coul/omp,PairLeptonCoulOMP);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_LEPTON_COUL_OMP_H
+#define LMP_PAIR_LEPTON_COUL_OMP_H
+
+#include "pair_lepton_coul.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLeptonCoulOMP : public PairLeptonCoul, public ThrOMP {
+
+ public:
+  PairLeptonCoulOMP(class LAMMPS *);
+
+  void compute(int, int) override;
+  double memory_usage() override;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData *const thr);
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif

From 8813a65fe890bd69a5b4041522e4683eb7a4d3f4 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 6 Jan 2023 08:01:26 -0500
Subject: [PATCH 74/79] make use of charges in Lepton expressions optional

---
 src/LEPTON/pair_lepton_coul.cpp     | 41 +++++++++++++++++++++--------
 src/OPENMP/pair_lepton_coul_omp.cpp | 24 ++++++++++++-----
 2 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/src/LEPTON/pair_lepton_coul.cpp b/src/LEPTON/pair_lepton_coul.cpp
index 4481a07ac0..0f492d0c9d 100644
--- a/src/LEPTON/pair_lepton_coul.cpp
+++ b/src/LEPTON/pair_lepton_coul.cpp
@@ -79,14 +79,26 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLeptonCoul::eval()
 
   std::vector<Lepton::CompiledExpression> pairforce;
   std::vector<Lepton::CompiledExpression> pairpot;
+  std::vector<std::pair<bool, bool>> have_q;
   try {
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
       pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
       if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
       pairforce.back().getVariableReference("r");
-      pairforce.back().getVariableReference("qi");
-      pairforce.back().getVariableReference("qj");
+      have_q.emplace_back(std::make_pair(true, true));
+
+      // check if there are references to charges
+      try {
+        pairforce.back().getVariableReference("qi");
+      } catch (std::exception &) {
+        have_q.back().first = false;
+      }
+      try {
+        pairforce.back().getVariableReference("qj");
+      } catch (std::exception &) {
+        have_q.back().second = false;
+      }
     }
   } catch (std::exception &e) {
     error->all(FLERR, e.what());
@@ -119,8 +131,8 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLeptonCoul::eval()
         const double r = sqrt(rsq);
         const int idx = type2expression[itype][jtype];
         pairforce[idx].getVariableReference("r") = r;
-        pairforce[idx].getVariableReference("qi") = q2e * q[i];
-        pairforce[idx].getVariableReference("qj") = q2e * q[j];
+        if (have_q[idx].first) pairforce[idx].getVariableReference("qi") = q2e * q[i];
+        if (have_q[idx].second) pairforce[idx].getVariableReference("qj") = q2e * q[j];
         const double fpair = -pairforce[idx].evaluate() / r * factor_coul;
 
         fxtmp += delx * fpair;
@@ -135,8 +147,8 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLeptonCoul::eval()
         double evdwl = 0.0;
         if (EFLAG) {
           pairpot[idx].getVariableReference("r") = r;
-          pairpot[idx].getVariableReference("qi") = q2e * q[i];
-          pairpot[idx].getVariableReference("qj") = q2e * q[j];
+          if (have_q[idx].first) pairpot[idx].getVariableReference("qi") = q2e * q[i];
+          if (have_q[idx].second) pairpot[idx].getVariableReference("qj") = q2e * q[j];
           evdwl = pairpot[idx].evaluate();
           evdwl *= factor_coul;
         }
@@ -238,12 +250,19 @@ double PairLeptonCoul::single(int i, int j, int itype, int jtype, double rsq, do
   const double r = sqrt(rsq);
   const double q2e = sqrt(force->qqrd2e);
   pairpot.getVariableReference("r") = r;
-  pairpot.getVariableReference("qi") = q2e * atom->q[i];
-  pairpot.getVariableReference("qj") = q2e * atom->q[j];
-
   pairforce.getVariableReference("r") = r;
-  pairforce.getVariableReference("qi") = q2e * atom->q[i];
-  pairforce.getVariableReference("qj") = q2e * atom->q[j];
+  try {
+    pairpot.getVariableReference("qi") = q2e * atom->q[i];
+    pairforce.getVariableReference("qi") = q2e * atom->q[i];
+  } catch (std::exception &) {
+    /* ignore */
+  }
+  try {
+    pairpot.getVariableReference("qj") = q2e * atom->q[j];
+    pairforce.getVariableReference("qj") = q2e * atom->q[j];
+  } catch (std::exception &) {
+    /* ignore */
+  }
 
   fforce = -pairforce.evaluate() / r * factor_coul;
   return pairpot.evaluate() * factor_coul;
diff --git a/src/OPENMP/pair_lepton_coul_omp.cpp b/src/OPENMP/pair_lepton_coul_omp.cpp
index 632128bdd9..f70e3d8002 100644
--- a/src/OPENMP/pair_lepton_coul_omp.cpp
+++ b/src/OPENMP/pair_lepton_coul_omp.cpp
@@ -101,14 +101,26 @@ void PairLeptonCoulOMP::eval(int iifrom, int iito, ThrData *const thr)
 
   std::vector<Lepton::CompiledExpression> pairforce;
   std::vector<Lepton::CompiledExpression> pairpot;
+  std::vector<std::pair<bool, bool>> have_q;
   try {
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
       pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
       if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
       pairforce.back().getVariableReference("r");
-      pairforce.back().getVariableReference("qi");
-      pairforce.back().getVariableReference("qj");
+      have_q.emplace_back(std::make_pair(true, true));
+
+      // check if there are references to charges
+      try {
+        pairforce.back().getVariableReference("qi");
+      } catch (std::exception &) {
+        have_q.back().first = false;
+      }
+      try {
+        pairforce.back().getVariableReference("qj");
+      } catch (std::exception &) {
+        have_q.back().second = false;
+      }
     }
   } catch (std::exception &e) {
     error->all(FLERR, e.what());
@@ -141,8 +153,8 @@ void PairLeptonCoulOMP::eval(int iifrom, int iito, ThrData *const thr)
         const double r = sqrt(rsq);
         const int idx = type2expression[itype][jtype];
         pairforce[idx].getVariableReference("r") = r;
-        pairforce[idx].getVariableReference("qi") = q2e * q[i];
-        pairforce[idx].getVariableReference("qj") = q2e * q[j];
+        if (have_q[idx].first) pairforce[idx].getVariableReference("qi") = q2e * q[i];
+        if (have_q[idx].second) pairforce[idx].getVariableReference("qj") = q2e * q[j];
         const double fpair = -pairforce[idx].evaluate() / r * factor_coul;
 
         fxtmp += delx * fpair;
@@ -157,8 +169,8 @@ void PairLeptonCoulOMP::eval(int iifrom, int iito, ThrData *const thr)
         double evdwl = 0.0;
         if (EFLAG) {
           pairpot[idx].getVariableReference("r") = r;
-          pairpot[idx].getVariableReference("qi") = q2e * q[i];
-          pairpot[idx].getVariableReference("qj") = q2e * q[j];
+          if (have_q[idx].first) pairpot[idx].getVariableReference("qi") = q2e * q[i];
+          if (have_q[idx].second) pairpot[idx].getVariableReference("qj") = q2e * q[j];
           evdwl = pairpot[idx].evaluate();
           evdwl *= factor_coul;
         }

From 523821d83ede3af404aa194f1bc75f63373e0715 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 6 Jan 2023 08:43:10 -0500
Subject: [PATCH 75/79] add extract() function to pair style lepton/coul for
 kspace compatibility

---
 src/LEPTON/pair_lepton_coul.cpp | 18 ++++++++++++++++++
 src/LEPTON/pair_lepton_coul.h   |  1 +
 2 files changed, 19 insertions(+)

diff --git a/src/LEPTON/pair_lepton_coul.cpp b/src/LEPTON/pair_lepton_coul.cpp
index 0f492d0c9d..374e655b7e 100644
--- a/src/LEPTON/pair_lepton_coul.cpp
+++ b/src/LEPTON/pair_lepton_coul.cpp
@@ -267,3 +267,21 @@ double PairLeptonCoul::single(int i, int j, int itype, int jtype, double rsq, do
   fforce = -pairforce.evaluate() / r * factor_coul;
   return pairpot.evaluate() * factor_coul;
 }
+
+/* ---------------------------------------------------------------------- */
+
+void *PairLeptonCoul::extract(const char *str, int &dim)
+{
+  if (pppmflag || ewaldflag || msmflag || dispersionflag || tip4pflag) {
+    if (strcmp(str, "cut_coul") == 0) {
+      dim = 0;
+      return (void *) &cut_global;
+    }
+  } else {
+    if (strcmp(str, "cut_coul") == 0) {
+      dim = 2;
+      return (void *) &cut;
+    }
+  }
+  return nullptr;
+}
diff --git a/src/LEPTON/pair_lepton_coul.h b/src/LEPTON/pair_lepton_coul.h
index 23677667bf..8153792bd5 100644
--- a/src/LEPTON/pair_lepton_coul.h
+++ b/src/LEPTON/pair_lepton_coul.h
@@ -35,6 +35,7 @@ class PairLeptonCoul : public PairLepton {
   void write_restart_settings(FILE *) override;
   void read_restart_settings(FILE *) override;
   double single(int, int, int, int, double, double, double, double &) override;
+  void *extract(const char *, int &) override;
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void eval();

From 92df9f1c7153d2a727375aa2c54f7e6cf9eed464 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 6 Jan 2023 08:43:44 -0500
Subject: [PATCH 76/79] update docs

---
 doc/src/Commands_pair.rst |  1 +
 doc/src/pair_lepton.rst   | 66 ++++++++++++++++++++++++++++-----------
 doc/src/pair_style.rst    |  1 +
 3 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst
index 8a5f05d095..b95cbd79c6 100644
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@@ -135,6 +135,7 @@ OPT.
    * :doc:`lebedeva/z <pair_lebedeva_z>`
    * :doc:`lennard/mdf <pair_mdf>`
    * :doc:`lepton (o) <pair_lepton>`
+   * :doc:`lepton/coul (o) <pair_lepton>`
    * :doc:`line/lj <pair_line_lj>`
    * :doc:`lj/charmm/coul/charmm (giko) <pair_charmm>`
    * :doc:`lj/charmm/coul/charmm/implicit (ko) <pair_charmm>`
diff --git a/doc/src/pair_lepton.rst b/doc/src/pair_lepton.rst
index ecd90a0b61..ae02735ffc 100644
--- a/doc/src/pair_lepton.rst
+++ b/doc/src/pair_lepton.rst
@@ -1,10 +1,12 @@
 .. index:: pair_style lepton
 .. index:: pair_style lepton/omp
+.. index:: pair_style lepton/coul
+.. index:: pair_style lepton/coul/omp
 
 pair_style lepton command
 =========================
 
-Accelerator Variants: *lepton/omp*
+Accelerator Variants: *lepton/omp*, *lepton/coul/comp*
 
 Syntax
 """"""
@@ -13,13 +15,17 @@ Syntax
 
    pair_style style args
 
-* style = *lepton*
+* style = *lepton* or *lepton/coul*
 * args = list of arguments for a particular style
 
 .. parsed-literal::
 
     *lepton* args = cutoff
       cutoff = global cutoff for the interactions (distance units)
+    *lepton/coul* args = cutoff keyword
+      cutoff = global cutoff for the interactions (distance units)
+      zero or more keywords may be appended
+      keyword = *ewald* or *pppm* or *msm* or *dispersion* or *tip4p*
 
 Examples
 """"""""
@@ -32,18 +38,26 @@ Examples
    pair_coeff  1 2  "4.0*eps*((sig/r)^12 - (sig/r)^6);eps=1.0;sig=1.0" 1.12246204830937
    pair_coeff  2 2  "eps*(2.0*(sig/r)^9 - 3.0*(sig/r)^6);eps=1.0;sig=1.0"
 
+   pair_style lepton/coul 2.5
+   pair_coeff 1 1 "qi*qj/r" 4.0
+   pair_coeff 1 2 "lj+coul; lj=4.0*eps*((sig/r)^12 - (sig/r)^6); eps=1.0; sig=1.0; coul=qi*qj/r"
+
+
 Description
 """""""""""
 
 .. versionadded:: TBD
 
-Pair style *lepton* computes pairwise interactions between particles
-which depend solely on the distance and have a cutoff.  The potential
-function must be provided as an expression string using "r" as the
-distance variable. Note that additional constants in the expression can
-be defined in the same string as additional expressions separated by
-semi-colons as shown in the examples above.  The expression
-`"200.0*(r-1.5)^2"`, for instance, represents a harmonic potential
+Pair styles *lepton* and *lepton/coul* compute pairwise interactions
+between particles which depend solely on the distance and have a cutoff.
+The potential function must be provided as an expression string using
+"r" as the distance variable.  With pair style *lepton/coul* one may
+additionally reference the charges of the two atoms of the pair with
+"qi" and "qj", respectively.  Note that further constants in the
+expression can be defined in the same string as additional expressions
+separated by semi-colons as shown in the examples above.
+
+The expression `"200.0*(r-1.5)^2"` represents a harmonic potential
 around the pairwise distance :math:`r_0` of 1.5 distance units and a
 force constant *K* of 200.0 energy units:
 
@@ -51,6 +65,12 @@ force constant *K* of 200.0 energy units:
 
    U_{ij} = K (r-r_0)^2
 
+The expression `"qi*qj/r"` represents a regular Coulombic potential with cutoff:
+
+.. math::
+
+   U_{ij} = \frac{C q_i q_j}{\epsilon  r} \qquad r < r_c
+
 The `Lepton library <https://simtk.org/projects/lepton>`_, that the
 *lepton* pair style interfaces with, evaluates this expression string at
 run time to compute the pairwise energy.  It also creates an analytical
@@ -72,6 +92,14 @@ More on valid Lepton expressions below.  The last coefficient is
 optional; it allows to set the cutoff for a pair of atom types to a
 different value than the global cutoff.
 
+For pair style *lepton* only the "lj" value of the :doc:`special_bonds <special_bonds>`
+settings apply in case the interacting pair is also connected with a bond.
+The potential energy will *only* be added to the "evdwl" property.
+
+For pair style *lepton/coul* only the "coul" value of the :doc:`special_bonds <special_bonds>`
+settings apply in case the interacting pair is also connected with a bond.
+The potential energy will *only* be added to the "ecoul" property.
+
 ----------
 
 .. include:: lepton_expression.rst
@@ -85,26 +113,26 @@ different value than the global cutoff.
 Mixing, shift, table, tail correction, restart, rRESPA info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 
-Pair style *lepton* does not support mixing.  Thus, expressions for
-*all* I,J pairs must be specified explicitly.
+Pair styles *lepton* and *lepton/coul* do not support mixing.  Thus,
+expressions for *all* I,J pairs must be specified explicitly.
 
-This pair style does supports the :doc:`pair_modify shift <pair_modify>`
+Only pair style *lepton* supports the :doc:`pair_modify shift <pair_modify>`
 option for shifting the energy of the pair interaction so that it is
-0 at the cutoff.
+0 at the cutoff, pair style *lepton/coul* does *not*.
 
 The :doc:`pair_modify table <pair_modify>` options are not relevant for
-the this pair style.
+the these pair styles.
 
-This pair style does not support the :doc:`pair_modify tail
+These pair styles do not support the :doc:`pair_modify tail
 <pair_modify>` option for adding long-range tail corrections to energy
 and pressure.
 
-This pair style writes its information to :doc:`binary restart files
+These pair styles write its information to :doc:`binary restart files
 <restart>`, so pair_style and pair_coeff commands do not need to be
 specified in an input script that reads a restart file.
 
-This pair style can only be used via the *pair* keyword of the
-:doc:`run_style respa <run_style>` command.  It does not support the
+These pair styles can only be used via the *pair* keyword of the
+:doc:`run_style respa <run_style>` command.  They do not support the
 *inner*, *middle*, *outer* keywords.
 
 ----------
@@ -112,7 +140,7 @@ This pair style can only be used via the *pair* keyword of the
 Restrictions
 """"""""""""
 
-This pair style is part of the LEPTON package and only enabled if
+These pair styles are part of the LEPTON package and only enabled if
 LAMMPS was built with this package.  See the :doc:`Build package
 <Build_package>` page for more info.
 
diff --git a/doc/src/pair_style.rst b/doc/src/pair_style.rst
index ac8888f8ad..3f91bfc0b4 100644
--- a/doc/src/pair_style.rst
+++ b/doc/src/pair_style.rst
@@ -213,6 +213,7 @@ accelerated styles exist.
 * :doc:`lebedeva/z <pair_lebedeva_z>` - Lebedeva interlayer potential for graphene with normals along z-axis
 * :doc:`lennard/mdf <pair_mdf>` - LJ potential in A/B form with a taper function
 * :doc:`lepton <pair_lepton>` - pair potential from evaluating a string
+* :doc:`lepton/coul <pair_lepton>` - pair potential from evaluating a string with support for charges
 * :doc:`line/lj <pair_line_lj>` - LJ potential between line segments
 * :doc:`list <pair_list>` - potential between pairs of atoms explicitly listed in an input file
 * :doc:`lj/charmm/coul/charmm <pair_charmm>` - CHARMM potential with cutoff Coulomb

From 909bbcfdbdc03a056214454fefb7bbf26cad61f9 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 6 Jan 2023 09:03:00 -0500
Subject: [PATCH 77/79] a few more tweaks for consistency

---
 doc/src/pair_lepton.rst             | 3 +++
 src/LEPTON/pair_lepton.cpp          | 8 +++++++-
 src/LEPTON/pair_lepton_coul.cpp     | 8 ++++----
 src/OPENMP/pair_lepton_coul_omp.cpp | 8 ++++----
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/doc/src/pair_lepton.rst b/doc/src/pair_lepton.rst
index ae02735ffc..303bc13bb9 100644
--- a/doc/src/pair_lepton.rst
+++ b/doc/src/pair_lepton.rst
@@ -42,6 +42,9 @@ Examples
    pair_coeff 1 1 "qi*qj/r" 4.0
    pair_coeff 1 2 "lj+coul; lj=4.0*eps*((sig/r)^12 - (sig/r)^6); eps=1.0; sig=1.0; coul=qi*qj/r"
 
+   pair_style lepton/coul 2.5 pppm
+   kspace_style pppm 1.0e-4
+   pair_coeff 1 1 "qi*qj/r*erfc(alpha*r); alpha=1.067"
 
 Description
 """""""""""
diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index d2e5f91337..9e3381d32c 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -210,7 +210,13 @@ void PairLepton::coeff(int narg, char **arg)
   utils::bounds(FLERR, arg[1], 1, atom->ntypes, jlo, jhi, error);
 
   double cut_one = cut_global;
-  if (narg == 4) cut_one = utils::numeric(FLERR, arg[3], false, lmp);
+  if (narg == 4) {
+    if (pppmflag || ewaldflag || msmflag || dispersionflag || tip4pflag) {
+      error->all(FLERR, "Only a global cutoff is allowed with Kspace compatibility enabled");
+    } else {
+      cut_one = utils::numeric(FLERR, arg[3], false, lmp);
+    }
+  }
 
   // remove whitespace and quotes from expression string and then
   // check if the expression can be parsed and evaluated without error
diff --git a/src/LEPTON/pair_lepton_coul.cpp b/src/LEPTON/pair_lepton_coul.cpp
index 374e655b7e..81d2ff7a81 100644
--- a/src/LEPTON/pair_lepton_coul.cpp
+++ b/src/LEPTON/pair_lepton_coul.cpp
@@ -144,16 +144,16 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLeptonCoul::eval()
           f[j][2] -= delz * fpair;
         }
 
-        double evdwl = 0.0;
+        double ecoul = 0.0;
         if (EFLAG) {
           pairpot[idx].getVariableReference("r") = r;
           if (have_q[idx].first) pairpot[idx].getVariableReference("qi") = q2e * q[i];
           if (have_q[idx].second) pairpot[idx].getVariableReference("qj") = q2e * q[j];
-          evdwl = pairpot[idx].evaluate();
-          evdwl *= factor_coul;
+          ecoul = pairpot[idx].evaluate();
+          ecoul *= factor_coul;
         }
 
-        if (EVFLAG) ev_tally(i, j, nlocal, NEWTON_PAIR, 0.0, evdwl, fpair, delx, dely, delz);
+        if (EVFLAG) ev_tally(i, j, nlocal, NEWTON_PAIR, 0.0, ecoul, fpair, delx, dely, delz);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/OPENMP/pair_lepton_coul_omp.cpp b/src/OPENMP/pair_lepton_coul_omp.cpp
index f70e3d8002..a67779aeb8 100644
--- a/src/OPENMP/pair_lepton_coul_omp.cpp
+++ b/src/OPENMP/pair_lepton_coul_omp.cpp
@@ -166,17 +166,17 @@ void PairLeptonCoulOMP::eval(int iifrom, int iito, ThrData *const thr)
           f[j].z -= delz * fpair;
         }
 
-        double evdwl = 0.0;
+        double ecoul = 0.0;
         if (EFLAG) {
           pairpot[idx].getVariableReference("r") = r;
           if (have_q[idx].first) pairpot[idx].getVariableReference("qi") = q2e * q[i];
           if (have_q[idx].second) pairpot[idx].getVariableReference("qj") = q2e * q[j];
-          evdwl = pairpot[idx].evaluate();
-          evdwl *= factor_coul;
+          ecoul = pairpot[idx].evaluate();
+          ecoul *= factor_coul;
         }
 
         if (EVFLAG)
-          ev_tally_thr(this, i, j, nlocal, NEWTON_PAIR, evdwl, 0.0, fpair, delx, dely, delz, thr);
+          ev_tally_thr(this, i, j, nlocal, NEWTON_PAIR, 0.0, ecoul, fpair, delx, dely, delz, thr);
       }
     }
     f[i].x += fxtmp;

From ce1e997de074aba9950c4aec0631547fbe13535d Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 6 Jan 2023 12:12:13 -0500
Subject: [PATCH 78/79] do now write out per-type pair cutoff with kspace
 enabled

---
 src/LEPTON/pair_lepton.cpp | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index 9e3381d32c..f0ccebbb44 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -388,8 +388,13 @@ void PairLepton::read_restart_settings(FILE *fp)
 
 void PairLepton::write_data(FILE *fp)
 {
-  for (int i = 1; i <= atom->ntypes; i++)
-    fprintf(fp, "%d %s %g\n", i, expressions[type2expression[i][i]].c_str(), cut[i][i]);
+  if (pppmflag || ewaldflag || msmflag || dispersionflag || tip4pflag) {
+    for (int i = 1; i <= atom->ntypes; i++)
+      fprintf(fp, "%d %s\n", i, expressions[type2expression[i][i]].c_str());
+  } else {
+    for (int i = 1; i <= atom->ntypes; i++)
+      fprintf(fp, "%d %s %g\n", i, expressions[type2expression[i][i]].c_str(), cut[i][i]);
+  }
 }
 
 /* ----------------------------------------------------------------------
@@ -398,9 +403,15 @@ void PairLepton::write_data(FILE *fp)
 
 void PairLepton::write_data_all(FILE *fp)
 {
-  for (int i = 1; i <= atom->ntypes; i++)
-    for (int j = i; j <= atom->ntypes; j++)
-      fprintf(fp, "%d %d %s %g\n", i, j, expressions[type2expression[i][j]].c_str(), cut[i][j]);
+  if (pppmflag || ewaldflag || msmflag || dispersionflag || tip4pflag) {
+    for (int i = 1; i <= atom->ntypes; i++)
+      for (int j = i; j <= atom->ntypes; j++)
+        fprintf(fp, "%d %d %s\n", i, j, expressions[type2expression[i][j]].c_str());
+  } else {
+    for (int i = 1; i <= atom->ntypes; i++)
+      for (int j = i; j <= atom->ntypes; j++)
+        fprintf(fp, "%d %d %s %g\n", i, j, expressions[type2expression[i][j]].c_str(), cut[i][j]);
+  }
 }
 
 /* ---------------------------------------------------------------------- */

From f6d8df5706ed91d77a6ef86029d77bad985ffede Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 6 Jan 2023 12:12:45 -0500
Subject: [PATCH 79/79] add unit tests for lepton/coul

---
 .../tests/mol-pair-lepton_coul.yaml           | 98 +++++++++++++++++++
 .../tests/mol-pair-lepton_coul_long.yaml      | 91 +++++++++++++++++
 2 files changed, 189 insertions(+)
 create mode 100644 unittest/force-styles/tests/mol-pair-lepton_coul.yaml
 create mode 100644 unittest/force-styles/tests/mol-pair-lepton_coul_long.yaml

diff --git a/unittest/force-styles/tests/mol-pair-lepton_coul.yaml b/unittest/force-styles/tests/mol-pair-lepton_coul.yaml
new file mode 100644
index 0000000000..06dba1ebb1
--- /dev/null
+++ b/unittest/force-styles/tests/mol-pair-lepton_coul.yaml
@@ -0,0 +1,98 @@
+---
+lammps_version: 22 Dec 2022
+date_generated: Fri Jan  6 13:07:07 2023
+epsilon: 5e-13
+skip_tests:
+prerequisites: ! |
+  atom full
+  pair lepton/coul
+pre_commands: ! |
+  variable write_data_pair index ij
+post_commands: ! ""
+input_file: in.fourmol
+pair_style: lepton/coul 8.0
+pair_coeff: ! |
+  * *    "lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.015;sig=3.1;coul=qi*qj/r"
+  1 1    "lj+coul;lj=4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.02;sig=2.5; coul=qi*qj/r"
+  1 2    "lj+coul;lj=4.0*eps*((sig/r)^12 - (sig/r)^6);eps=0.01;sig=1.75; coul=qi*qj/r"
+  1 3    "lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);coul=qi*qj/r*exp(-kappa*r); eps=0.02;sig=2.85; kappa=1.4"
+  1 4*5  'lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205;\tsig=2.8; coul=qi*qj/r'
+  2 2    'lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.005;sig=1.0; coul=qi*qj/r'
+  2 3    'lj+qi*qj/r;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.01;sig=2.1' 7.0
+  2 4    'qi*qj*recip(r)*4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.005;sig=0.5' 6.0
+  2 5    'lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.00866025;sig=2.05;  coul=qi*qj/r'
+  3 3    'lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.02;sig=3.2; coul=qi*qj/r'
+  3 4    'lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205;sig=3.15; coul=qi*qj/r'
+  3 5    'lj+coul;lj=4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205;sig=3.15; coul=qi*qj/r'
+extract: ! |
+  cut_coul 2
+natoms: 29
+init_vdwl: 0
+init_coul: 821.5981589340703
+init_stress: ! |2-
+   2.0717392629239480e+03  2.1978008748795769e+03  1.4328942671383857e+03 -6.2083752020563338e+02  5.6994144907661041e+02 -1.2054987450344272e+02
+init_forces: ! |2
+    1 -2.1428108724791500e+01  2.7457780352136433e+02  3.3334818906196404e+02
+    2  1.5327322515896745e+02  1.3335315481349363e+02 -1.7964855973984820e+02
+    3 -1.3507218366871652e+02 -3.8727773775819475e+02 -1.4586697469323420e+02
+    4 -9.8073889503850040e+00  2.6059292292027516e+00 -5.0509854554658213e+00
+    5 -4.2334210982020188e+00 -2.9433448862036782e+00  1.3212067069335845e+01
+    6 -7.6465683110851501e+02  6.1860839968068080e+02 -5.5436850502685866e+02
+    7 -2.1067686349905152e+01 -1.7826316587547311e+00 -2.1071937979630817e+01
+    8  1.4439804357757154e+02 -1.0417049227521566e+02  4.0200948231534448e+02
+    9  7.7448575647692280e+01  8.6035840224653199e+01  3.5312911264774755e+02
+   10  5.3137889943602158e+02 -6.1094612504122324e+02 -1.8285534289458627e+02
+   11 -2.1359563768760506e+00 -7.4922123224296424e+00 -1.0168300107889717e+01
+   12  1.7648416505999929e+01  1.1117487405968582e+01 -5.2080419426096007e+00
+   13  9.7461134016771993e+00 -3.9653981312508977e+00 -4.6169582725063507e-01
+   14 -1.4629046448090548e+00 -9.2708173720626008e-01 -9.3160815172733855e+00
+   15  2.3142847651270890e+00  7.9712516289727775e+00  3.4231594444107696e+00
+   16  1.1245251835221486e+01 -1.8917897333687616e+01  1.8382792004905447e+01
+   17  7.7245233417667762e+00 -9.2706355613052622e+00  3.5649985260544508e+00
+   18  4.8507682207052847e+00  1.4770190051651220e+01 -1.5417992172051264e+01
+   19 -3.0569015588084558e+00 -2.4557623214724408e-01 -6.7586483983164003e+00
+   20  5.6849158422160961e+00  8.1460775328385413e+00 -2.5267301017183690e+00
+   21 -6.9068952751966862e+01 -8.0138116375988176e+01  2.1538477896980069e+02
+   22 -1.0415407422776596e+02 -2.8946831224326232e+01 -1.6527204322769362e+02
+   23  1.7850115075306337e+02  1.0085334232387973e+02 -5.3915259879043830e+01
+   24  3.4173068949839234e+01 -2.0194449586908371e+02  1.0982812303394940e+02
+   25 -1.4543956338306600e+02  2.4174388095258362e+01 -1.1555961263369524e+02
+   26  1.1050717698955798e+02  1.8300044636662693e+02  1.6322926420751710e+01
+   27  4.8960638929347951e+01 -2.1594451942422438e+02  8.6425489362011888e+01
+   28 -1.7915626333180091e+02  6.9115187531736083e+01 -1.1640128162803138e+02
+   29  1.2288518282083334e+02  1.4058359742491464e+02  3.4836874368921215e+01
+run_vdwl: 0
+run_coul: 811.4544702033843
+run_stress: ! |2-
+   2.0254673611730052e+03  2.1546399059317710e+03  1.4004786862056685e+03 -5.9753131395499736e+02  5.5083571091239537e+02 -1.0835693140693489e+02
+run_forces: ! |2
+    1 -1.8521335843758965e+01  2.7133645764220114e+02  3.2433574995927717e+02
+    2  1.4809392027442047e+02  1.2917749741377241e+02 -1.7310196073526532e+02
+    3 -1.3304520923162895e+02 -3.7968494911049333e+02 -1.4319959366292466e+02
+    4 -9.7736308287090505e+00  2.5987154015188216e+00 -5.0400007952335040e+00
+    5 -4.2154589459913527e+00 -2.9161272367460431e+00  1.3160135247239504e+01
+    6 -7.2967769309854623e+02  5.9046852573724129e+02 -5.3051467856382442e+02
+    7 -2.0997425589654178e+01 -1.8160544107113790e+00 -2.0891534666943986e+01
+    8  1.2188724749342416e+02 -8.4551328253319213e+01  3.8615306756463031e+02
+    9  7.5011955012730112e+01  8.3255606934348421e+01  3.4219458836850811e+02
+   10  5.2148704316806834e+02 -5.9979324430671443e+02 -1.8043491013717136e+02
+   11 -2.1401078493346208e+00 -7.4442612787591065e+00 -1.0110204992858977e+01
+   12  1.7636474354482754e+01  1.1112699270001475e+01 -5.3437599490543386e+00
+   13  9.6958143635276102e+00 -3.9319928346575428e+00 -4.5785318797926611e-01
+   14 -1.4220607754262848e+00 -9.5007647196679845e-01 -9.1977567175041592e+00
+   15  2.2926582075298532e+00  7.9855204374638635e+00  3.4455597854291540e+00
+   16  1.1272775987202634e+01 -1.8982953731926997e+01  1.8482663994119836e+01
+   17  7.7322649716681617e+00 -9.2832395767254834e+00  3.5764268171001139e+00
+   18  4.8381507413706926e+00  1.4767160007230055e+01 -1.5415297255213202e+01
+   19 -3.0301967571908386e+00 -2.3380675365092468e-01 -6.7543372757881919e+00
+   20  5.6680537800775825e+00  8.1303039215636126e+00 -2.5343304205297250e+00
+   21 -6.8044494839094128e+01 -7.8355487189544334e+01  2.1140553129180978e+02
+   22 -1.0252397670436311e+02 -2.8706370645813312e+01 -1.6229775211078731e+02
+   23  1.7584423056701743e+02  9.8828824413269928e+01 -5.2908452768081467e+01
+   24  3.5846101050381414e+01 -2.0052956252896166e+02  1.1035850811135379e+02
+   25 -1.4625407226107674e+02  2.4065311783305738e+01 -1.1630437467893353e+02
+   26  1.0964729150033426e+02  1.8169458378109391e+02  1.6534914020055329e+01
+   27  4.7966516737697603e+01 -2.1211441650201505e+02  8.4284596173292087e+01
+   28 -1.7587014571064992e+02  6.7684463683262237e+01 -1.1397174911908617e+02
+   29  1.2059531022549152e+02  1.3818820040573286e+02  3.4546805704364580e+01
+...
diff --git a/unittest/force-styles/tests/mol-pair-lepton_coul_long.yaml b/unittest/force-styles/tests/mol-pair-lepton_coul_long.yaml
new file mode 100644
index 0000000000..e9e6e12ec4
--- /dev/null
+++ b/unittest/force-styles/tests/mol-pair-lepton_coul_long.yaml
@@ -0,0 +1,91 @@
+---
+lammps_version: 22 Dec 2022
+date_generated: Fri Jan  6 11:43:19 2023
+epsilon: 5e-14
+skip_tests:
+prerequisites: ! |
+  atom full
+  pair lepton/coul
+  kspace ewald
+pre_commands: ! |
+  variable write_data_pair index ij
+post_commands: ! |
+  kspace_style ewald 1.0e-6
+  kspace_modify gewald 0.3
+  kspace_modify compute no
+input_file: in.fourmol
+pair_style: lepton/coul 8.0 ewald
+pair_coeff: ! |
+  * *   "qi*qj*recip(r)*erfc(alpha*r); alpha=0.3"
+extract: ! |
+  cut_coul 0
+natoms: 29
+init_vdwl: 0
+init_coul: -70.38250510640493
+init_stress: ! |-
+  -2.5008210217356783e+01 -3.8816639759691036e+01 -4.2034333823535029e+01  1.1409444941634803e+00 -4.1295997638172759e+00  8.4201568655309131e+00
+init_forces: ! |2
+    1  2.4892982161789945e+00  5.4110514108668406e-02 -4.4566809618096226e-02
+    2 -5.4331464474966795e-01 -2.1216561683169477e+00 -6.4422859548063194e-01
+    3 -6.0146768249533106e-03 -8.1326388986176695e-02  3.7065361416437584e-02
+    4  8.6681021081257373e-02  1.5580526332113424e-02 -2.0670607018885634e-01
+    5 -3.8522709006299222e-01  7.0133082015759385e-01 -1.0434991847387730e-01
+    6  1.4434447556206305e+00 -2.8440303025280831e+00 -4.6179150068498664e+00
+    7 -4.5864147114531395e-01  5.9024955376048638e-01  4.1553218572882535e+00
+    8 -8.6081518152315473e-01  3.0956066377879825e+00  3.4033032982271463e+00
+    9  1.5261523156304517e+00 -5.2211589506142237e+00  6.4640754802415712e-01
+   10 -3.2801240413423194e-01  4.4014940340050213e-01 -2.4197018970780454e-01
+   11 -9.1061593846995292e-01  1.0375459413421175e+00 -6.1908492313390162e-01
+   12  2.5338520559714999e+00 -8.7929983601815476e-01  1.4596729132964696e+00
+   13 -1.4071672888191875e-01  2.1411576396316268e-01 -2.6081010297459956e-01
+   14 -1.1829747209994355e+00  5.5630332040276298e-01 -1.2672751438959518e-01
+   15  2.0670863023413566e-01 -1.5738997078396350e-02 -1.0480439260753627e+00
+   16 -8.5610959855133872e-01 -2.9498750855107880e-01  3.6594115342400819e+00
+   17 -1.4898051785401010e+00  4.5112516824383366e+00 -7.0891179993525872e+00
+   18  4.2098013755298869e-01  5.3779573566440924e+00 -1.0259427207235309e+01
+   19  2.2670790003068690e+00 -7.5228898694897461e-01  6.7833651123358383e+00
+   20 -3.2376742045417646e+00 -4.5336415466466278e+00  5.2386297902208794e+00
+   21  2.5070017147159147e+00  5.1306536772315816e+00 -1.1181953169079817e+01
+   22  1.8409085017557936e+00 -7.9710212673754954e-01  7.1963402788166793e+00
+   23 -4.7557630605535390e+00 -3.9276462944101285e+00  4.4810840896139208e+00
+   24 -2.0947896827942634e+00  1.1192949743826818e+01 -6.2310311947608952e+00
+   25  3.6287203008748117e+00 -3.7687374993637501e+00  4.1890995206612853e+00
+   26 -2.0869274982695774e+00 -7.6713243106409479e+00  1.4445429367653724e+00
+   27 -2.2885314331888633e+00  1.2284277786306042e+01 -4.4417851629928391e+00
+   28  4.8138126021013559e+00 -5.2552567477954808e+00  4.0949271840163188e+00
+   29 -2.1387057387936306e+00 -7.0378870630657362e+00  3.2854636539119964e-01
+run_vdwl: 0
+run_coul: -70.44917655348034
+run_stress: ! |-
+  -2.5113123634782742e+01 -3.8829080724625797e+01 -4.1974866539740219e+01  1.0992234948320725e+00 -4.2133088504442426e+00  8.3411051648873880e+00
+run_forces: ! |2
+    1  2.4901412551649300e+00  5.7578653428409510e-02 -4.1371607623628126e-02
+    2 -5.5016258001424978e-01 -2.1253709224208284e+00 -6.4675464562515428e-01
+    3 -6.1181321977963887e-03 -8.1603298903751717e-02  3.6960785600450803e-02
+    4  8.7664325692039677e-02  1.5421259781568622e-02 -2.0732825200733390e-01
+    5 -3.8426968316648857e-01  7.0133255072202327e-01 -1.0476754203354563e-01
+    6  1.4397341598006732e+00 -2.8402394782115676e+00 -4.6100516195557670e+00
+    7 -4.5653174471706293e-01  5.8740258024502268e-01  4.1428488591886587e+00
+    8 -8.5296361579479296e-01  3.0913766100847191e+00  3.4036983616153718e+00
+    9  1.5243103951275745e+00 -5.2263379168195243e+00  6.5393457865716798e-01
+   10 -3.2821334506572997e-01  4.3971066168353573e-01 -2.4286942093472491e-01
+   11 -9.1028816015853564e-01  1.0378115705103634e+00 -6.1764845972882765e-01
+   12  2.5331296442141347e+00 -8.7784221234396187e-01  1.4615298895650435e+00
+   13 -1.4156045923285593e-01  2.1289742571056952e-01 -2.6069154431898300e-01
+   14 -1.1831640746902548e+00  5.5740094013927854e-01 -1.2578321992777053e-01
+   15  2.0841553748238612e-01 -1.7735699273721650e-02 -1.0507998538736403e+00
+   16 -8.5592937232497124e-01 -2.9499330116027700e-01  3.6596869057608190e+00
+   17 -1.4921853960121267e+00  4.5201460939997453e+00 -7.0955732968497722e+00
+   18  3.7454832859071807e-01  5.3289765761099757e+00 -1.0216517192599614e+01
+   19  2.3040416130164512e+00 -7.2163590974331215e-01  6.7964873033426567e+00
+   20 -3.2272895016107213e+00 -4.5149041691827723e+00  5.1825710348837140e+00
+   21  2.5158885590673008e+00  5.1066184546650719e+00 -1.1178503591543096e+01
+   22  1.8668709182252012e+00 -7.7491832605292943e-01  7.2018200873121181e+00
+   23 -4.7901034321003175e+00 -3.9269711077380931e+00  4.4714696806765986e+00
+   24 -2.1181643059791524e+00  1.1212571247691631e+01 -6.2484485049803284e+00
+   25  3.6682989527010301e+00 -3.7627024244034528e+00  4.2233692491713928e+00
+   26 -2.1013736492934769e+00 -7.6947415821246645e+00  1.4312211135584418e+00
+   27 -2.3073378850139967e+00  1.2300051223668088e+01 -4.4256454159786633e+00
+   28  4.8331575574345518e+00 -5.2611718528271307e+00  4.0937238161728597e+00
+   29 -2.1405459091444592e+00 -7.0481276472340166e+00  3.1343250207555534e-01
+...