add support for JIT compilation
This commit is contained in:
148
lib/lepton/asmjit/x86/x86archtraits_p.h
Normal file
148
lib/lepton/asmjit/x86/x86archtraits_p.h
Normal file
@ -0,0 +1,148 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#ifndef ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED
|
||||
#define ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED
|
||||
|
||||
#include "../core/archtraits.h"
|
||||
#include "../core/misc_p.h"
|
||||
#include "../x86/x86operand.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
//! \cond INTERNAL
|
||||
//! \addtogroup asmjit_x86
|
||||
//! \{
|
||||
|
||||
//! X86 architecture traits (internal).
|
||||
static const constexpr ArchTraits x86ArchTraits = {
|
||||
// SP/FP/LR/PC.
|
||||
Gp::kIdSp, Gp::kIdBp, 0xFF, 0xFF,
|
||||
|
||||
// Reserved.
|
||||
{ 0, 0, 0 },
|
||||
|
||||
// HW stack alignment.
|
||||
1,
|
||||
|
||||
// Min/Max stack offset
|
||||
0x7FFFFFFFu, 0x7FFFFFFFu,
|
||||
|
||||
// ISA features [Gp, Vec, Other0, Other1].
|
||||
{{
|
||||
InstHints::kRegSwap | InstHints::kPushPop,
|
||||
InstHints::kNoHints,
|
||||
InstHints::kNoHints,
|
||||
InstHints::kNoHints
|
||||
}},
|
||||
|
||||
// Register signatures.
|
||||
#define V(index) OperandSignature{x86::RegTraits<RegType(index)>::kSignature}
|
||||
{{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
|
||||
#undef V
|
||||
|
||||
// RegTypeToTypeId.
|
||||
#define V(index) TypeId(x86::RegTraits<RegType(index)>::kTypeId)
|
||||
{{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
|
||||
#undef V
|
||||
|
||||
// TypeIdToRegType.
|
||||
#define V(index) (index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt8) ? RegType::kX86_GpbLo : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt8) ? RegType::kX86_GpbLo : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt16) ? RegType::kX86_Gpw : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt16) ? RegType::kX86_Gpw : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt32) ? RegType::kX86_Gpd : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt32) ? RegType::kX86_Gpd : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kIntPtr) ? RegType::kX86_Gpd : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUIntPtr) ? RegType::kX86_Gpd : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat32) ? RegType::kX86_Xmm : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat64) ? RegType::kX86_Xmm : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask8) ? RegType::kX86_KReg : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask16) ? RegType::kX86_KReg : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask32) ? RegType::kX86_KReg : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask64) ? RegType::kX86_KReg : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx32) ? RegType::kX86_Mm : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx64) ? RegType::kX86_Mm : RegType::kNone)
|
||||
{{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
|
||||
#undef V
|
||||
|
||||
// Word names of 8-bit, 16-bit, 32-bit, and 64-bit quantities.
|
||||
{
|
||||
ArchTypeNameId::kDB,
|
||||
ArchTypeNameId::kDW,
|
||||
ArchTypeNameId::kDD,
|
||||
ArchTypeNameId::kDQ
|
||||
}
|
||||
};
|
||||
|
||||
//! X64 architecture traits (internal).
|
||||
static const constexpr ArchTraits x64ArchTraits = {
|
||||
// SP/FP/LR/PC.
|
||||
Gp::kIdSp, Gp::kIdBp, 0xFF, 0xFF,
|
||||
|
||||
// Reserved.
|
||||
{ 0, 0, 0 },
|
||||
|
||||
// HW stack alignment.
|
||||
1,
|
||||
|
||||
// Min/Max stack offset
|
||||
0x7FFFFFFFu, 0x7FFFFFFFu,
|
||||
|
||||
// ISA features [Gp, Vec, Other0, Other1].
|
||||
{{
|
||||
InstHints::kRegSwap | InstHints::kPushPop,
|
||||
InstHints::kNoHints,
|
||||
InstHints::kNoHints,
|
||||
InstHints::kNoHints
|
||||
}},
|
||||
|
||||
// Register signatures.
|
||||
#define V(index) OperandSignature{x86::RegTraits<RegType(index)>::kSignature}
|
||||
{{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
|
||||
#undef V
|
||||
|
||||
// RegTypeToTypeId.
|
||||
#define V(index) TypeId(x86::RegTraits<RegType(index)>::kTypeId)
|
||||
{{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
|
||||
#undef V
|
||||
|
||||
// TypeIdToRegType.
|
||||
#define V(index) (index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt8) ? RegType::kX86_GpbLo : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt8) ? RegType::kX86_GpbLo : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt16) ? RegType::kX86_Gpw : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt16) ? RegType::kX86_Gpw : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt32) ? RegType::kX86_Gpd : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt32) ? RegType::kX86_Gpd : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt64) ? RegType::kX86_Gpq : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt64) ? RegType::kX86_Gpq : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kIntPtr) ? RegType::kX86_Gpd : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUIntPtr) ? RegType::kX86_Gpd : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat32) ? RegType::kX86_Xmm : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat64) ? RegType::kX86_Xmm : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask8) ? RegType::kX86_KReg : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask16) ? RegType::kX86_KReg : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask32) ? RegType::kX86_KReg : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask64) ? RegType::kX86_KReg : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx32) ? RegType::kX86_Mm : \
|
||||
index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx64) ? RegType::kX86_Mm : RegType::kNone)
|
||||
{{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
|
||||
#undef V
|
||||
|
||||
// Word names of 8-bit, 16-bit, 32-bit, and 64-bit quantities.
|
||||
{
|
||||
ArchTypeNameId::kDB,
|
||||
ArchTypeNameId::kDW,
|
||||
ArchTypeNameId::kDD,
|
||||
ArchTypeNameId::kDQ
|
||||
}
|
||||
};
|
||||
|
||||
//! \}
|
||||
//! \endcond
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED
|
||||
5110
lib/lepton/asmjit/x86/x86assembler.cpp
Normal file
5110
lib/lepton/asmjit/x86/x86assembler.cpp
Normal file
File diff suppressed because it is too large
Load Diff
685
lib/lepton/asmjit/x86/x86assembler.h
Normal file
685
lib/lepton/asmjit/x86/x86assembler.h
Normal file
@ -0,0 +1,685 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#ifndef ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
|
||||
#define ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
|
||||
|
||||
#include "../core/assembler.h"
|
||||
#include "../x86/x86emitter.h"
|
||||
#include "../x86/x86operand.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
//! \addtogroup asmjit_x86
|
||||
//! \{
|
||||
|
||||
//! X86/X64 assembler implementation.
|
||||
//!
|
||||
//! x86::Assembler is a code emitter that emits machine code directly into the \ref CodeBuffer. The assembler is capable
|
||||
//! of targeting both 32-bit and 64-bit instruction sets, the instruction set can be configured through \ref CodeHolder.
|
||||
//!
|
||||
//! ### Basics
|
||||
//!
|
||||
//! The following example shows a basic use of `x86::Assembler`, how to generate a function that works in both 32-bit
|
||||
//! and 64-bit modes, and how to connect \ref JitRuntime, \ref CodeHolder, and `x86::Assembler`.
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! #include <stdio.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! // Signature of the generated function.
|
||||
//! typedef int (*SumFunc)(const int* arr, size_t count);
|
||||
//!
|
||||
//! int main() {
|
||||
//! JitRuntime rt; // Create a runtime specialized for JIT.
|
||||
//! CodeHolder code; // Create a CodeHolder.
|
||||
//!
|
||||
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
||||
//! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
|
||||
//!
|
||||
//! // Decide between 32-bit CDECL, WIN64, and SysV64 calling conventions:
|
||||
//! // 32-BIT - passed all arguments by stack.
|
||||
//! // WIN64 - passes first 4 arguments by RCX, RDX, R8, and R9.
|
||||
//! // UNIX64 - passes first 6 arguments by RDI, RSI, RCX, RDX, R8, and R9.
|
||||
//! x86::Gp arr, cnt;
|
||||
//! x86::Gp sum = x86::eax; // Use EAX as 'sum' as it's a return register.
|
||||
//!
|
||||
//! if (ASMJIT_ARCH_BITS == 64) {
|
||||
//! #if defined(_WIN32)
|
||||
//! arr = x86::rcx; // First argument (array ptr).
|
||||
//! cnt = x86::rdx; // Second argument (number of elements)
|
||||
//! #else
|
||||
//! arr = x86::rdi; // First argument (array ptr).
|
||||
//! cnt = x86::rsi; // Second argument (number of elements)
|
||||
//! #endif
|
||||
//! }
|
||||
//! else {
|
||||
//! arr = x86::edx; // Use EDX to hold the array pointer.
|
||||
//! cnt = x86::ecx; // Use ECX to hold the counter.
|
||||
//! // Fetch first and second arguments from [ESP + 4] and [ESP + 8].
|
||||
//! a.mov(arr, x86::ptr(x86::esp, 4));
|
||||
//! a.mov(cnt, x86::ptr(x86::esp, 8));
|
||||
//! }
|
||||
//!
|
||||
//! Label Loop = a.newLabel(); // To construct the loop, we need some labels.
|
||||
//! Label Exit = a.newLabel();
|
||||
//!
|
||||
//! a.xor_(sum, sum); // Clear 'sum' register (shorter than 'mov').
|
||||
//! a.test(cnt, cnt); // Border case:
|
||||
//! a.jz(Exit); // If 'cnt' is zero jump to 'Exit' now.
|
||||
//!
|
||||
//! a.bind(Loop); // Start of a loop iteration.
|
||||
//! a.add(sum, x86::dword_ptr(arr)); // Add int at [arr] to 'sum'.
|
||||
//! a.add(arr, 4); // Increment 'arr' pointer.
|
||||
//! a.dec(cnt); // Decrease 'cnt'.
|
||||
//! a.jnz(Loop); // If not zero jump to 'Loop'.
|
||||
//!
|
||||
//! a.bind(Exit); // Exit to handle the border case.
|
||||
//! a.ret(); // Return from function ('sum' == 'eax').
|
||||
//! // ----> x86::Assembler is no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! SumFunc fn;
|
||||
//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
|
||||
//!
|
||||
//! if (err) return 1; // Handle a possible error returned by AsmJit.
|
||||
//! // ----> CodeHolder is no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! static const int array[6] = { 4, 8, 15, 16, 23, 42 };
|
||||
//!
|
||||
//! int result = fn(array, 6); // Execute the generated code.
|
||||
//! printf("%d\n", result); // Print sum of array (108).
|
||||
//!
|
||||
//! rt.release(fn); // Explicitly remove the function from the runtime
|
||||
//! return 0; // Everything successful...
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! The example should be self-explanatory. It shows how to work with labels, how to use operands, and how to emit
|
||||
//! instructions that can use different registers based on runtime selection. It implements 32-bit CDECL, WIN64,
|
||||
//! and SysV64 caling conventions and will work on most X86/X64 environments.
|
||||
//!
|
||||
//! Although functions prologs / epilogs can be implemented manually, AsmJit provides utilities that can be used
|
||||
//! to create function prologs and epilogs automatically, see \ref asmjit_function for more details.
|
||||
//!
|
||||
//! ### Instruction Validation
|
||||
//!
|
||||
//! Assembler prefers speed over strictness by default. The implementation checks the type of operands and fails
|
||||
//! if the signature of types is invalid, however, it does only basic checks regarding registers and their groups
|
||||
//! used in instructions. It's possible to pass operands that don't form any valid signature to the implementation
|
||||
//! and succeed. This is usually not a problem as Assembler provides typed API so operand types are normally checked
|
||||
//! by C++ compiler at compile time, however, Assembler is fully dynamic and its \ref emit() function can be called
|
||||
//! with any instruction id, options, and operands. Moreover, it's also possible to form instructions that will be
|
||||
//! accepted by the typed API, for example by calling `mov(x86::eax, x86::al)` - the C++ compiler won't see a problem
|
||||
//! as both EAX and AL are \ref Gp registers.
|
||||
//!
|
||||
//! To help with common mistakes AsmJit allows to activate instruction validation. This feature instruments
|
||||
//! the Assembler to call \ref InstAPI::validate() before it attempts to encode any instruction.
|
||||
//!
|
||||
//! The example below illustrates how validation can be turned on:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! #include <stdio.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! int main(int argc, char* argv[]) {
|
||||
//! JitRuntime rt; // Create a runtime specialized for JIT.
|
||||
//! CodeHolder code; // Create a CodeHolder.
|
||||
//!
|
||||
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
||||
//! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
|
||||
//!
|
||||
//! // Enable strict validation.
|
||||
//! a.addDiagnosticOptions(DiagnosticOptions::kValidateAssembler);
|
||||
//!
|
||||
//! // Try to encode invalid or ill-formed instructions.
|
||||
//! Error err;
|
||||
//!
|
||||
//! // Invalid instruction.
|
||||
//! err = a.mov(x86::eax, x86::al);
|
||||
//! printf("Status: %s\n", DebugUtils::errorAsString(err));
|
||||
//!
|
||||
//! // Invalid instruction.
|
||||
//! err = a.emit(x86::Inst::kIdMovss, x86::eax, x86::xmm0);
|
||||
//! printf("Status: %s\n", DebugUtils::errorAsString(err));
|
||||
//!
|
||||
//! // Ambiguous operand size - the pointer requires size.
|
||||
//! err = a.inc(x86::ptr(x86::rax), 1);
|
||||
//! printf("Status: %s\n", DebugUtils::errorAsString(err));
|
||||
//!
|
||||
//! return 0;
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### Native Registers
|
||||
//!
|
||||
//! All emitters provide functions to construct machine-size registers depending on the target. This feature is
|
||||
//! for users that want to write code targeting both 32-bit and 64-bit architectures at the same time. In AsmJit
|
||||
//! terminology such registers have prefix `z`, so for example on X86 architecture the following native registers
|
||||
//! are provided:
|
||||
//!
|
||||
//! - `zax` - mapped to either `eax` or `rax`
|
||||
//! - `zbx` - mapped to either `ebx` or `rbx`
|
||||
//! - `zcx` - mapped to either `ecx` or `rcx`
|
||||
//! - `zdx` - mapped to either `edx` or `rdx`
|
||||
//! - `zsp` - mapped to either `esp` or `rsp`
|
||||
//! - `zbp` - mapped to either `ebp` or `rbp`
|
||||
//! - `zsi` - mapped to either `esi` or `rsi`
|
||||
//! - `zdi` - mapped to either `edi` or `rdi`
|
||||
//!
|
||||
//! They are accessible through \ref x86::Assembler, \ref x86::Builder, and \ref x86::Compiler. The example below
|
||||
//! illustrates how to use this feature:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! #include <stdio.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! typedef int (*Func)(void);
|
||||
//!
|
||||
//! int main(int argc, char* argv[]) {
|
||||
//! JitRuntime rt; // Create a runtime specialized for JIT.
|
||||
//! CodeHolder code; // Create a CodeHolder.
|
||||
//!
|
||||
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
||||
//! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
|
||||
//!
|
||||
//! // Let's get these registers from x86::Assembler.
|
||||
//! x86::Gp zbp = a.zbp();
|
||||
//! x86::Gp zsp = a.zsp();
|
||||
//!
|
||||
//! int stackSize = 32;
|
||||
//!
|
||||
//! // Function prolog.
|
||||
//! a.push(zbp);
|
||||
//! a.mov(zbp, zsp);
|
||||
//! a.sub(zsp, stackSize);
|
||||
//!
|
||||
//! // ... emit some code (this just sets return value to zero) ...
|
||||
//! a.xor_(x86::eax, x86::eax);
|
||||
//!
|
||||
//! // Function epilog and return.
|
||||
//! a.mov(zsp, zbp);
|
||||
//! a.pop(zbp);
|
||||
//! a.ret();
|
||||
//!
|
||||
//! // To make the example complete let's call it.
|
||||
//! Func fn;
|
||||
//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
|
||||
//! if (err) return 1; // Handle a possible error returned by AsmJit.
|
||||
//!
|
||||
//! int result = fn(); // Execute the generated code.
|
||||
//! printf("%d\n", result); // Print the resulting "0".
|
||||
//!
|
||||
//! rt.release(fn); // Remove the function from the runtime.
|
||||
//! return 0;
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! The example just returns `0`, but the function generated contains a standard prolog and epilog sequence and the
|
||||
//! function itself reserves 32 bytes of local stack. The advantage is clear - a single code-base can handle multiple
|
||||
//! targets easily. If you want to create a register of native size dynamically by specifying its id it's also possible:
|
||||
//!
|
||||
//! ```
|
||||
//! void example(x86::Assembler& a) {
|
||||
//! x86::Gp zax = a.gpz(x86::Gp::kIdAx);
|
||||
//! x86::Gp zbx = a.gpz(x86::Gp::kIdBx);
|
||||
//! x86::Gp zcx = a.gpz(x86::Gp::kIdCx);
|
||||
//! x86::Gp zdx = a.gpz(x86::Gp::kIdDx);
|
||||
//!
|
||||
//! // You can also change register's id easily.
|
||||
//! x86::Gp zsp = zax;
|
||||
//! zsp.setId(4); // or x86::Gp::kIdSp.
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### Data Embedding
|
||||
//!
|
||||
//! x86::Assembler extends the standard \ref BaseAssembler with X86/X64 specific conventions that are often used by
|
||||
//! assemblers to embed data next to the code. The following functions can be used to embed data:
|
||||
//!
|
||||
//! - \ref BaseAssembler::embedInt8() - embeds int8_t (portable naming).
|
||||
//! - \ref BaseAssembler::embedUInt8() - embeds uint8_t (portable naming).
|
||||
//! - \ref BaseAssembler::embedInt16() - embeds int16_t (portable naming).
|
||||
//! - \ref BaseAssembler::embedUInt16() - embeds uint16_t (portable naming).
|
||||
//! - \ref BaseAssembler::embedInt32() - embeds int32_t (portable naming).
|
||||
//! - \ref BaseAssembler::embedUInt32() - embeds uint32_t (portable naming).
|
||||
//! - \ref BaseAssembler::embedInt64() - embeds int64_t (portable naming).
|
||||
//! - \ref BaseAssembler::embedUInt64() - embeds uint64_t (portable naming).
|
||||
//! - \ref BaseAssembler::embedFloat() - embeds float (portable naming).
|
||||
//! - \ref BaseAssembler::embedDouble() - embeds double (portable naming).
|
||||
//!
|
||||
//! - \ref x86::Assembler::db() - embeds byte (8 bits) (x86 naming).
|
||||
//! - \ref x86::Assembler::dw() - embeds word (16 bits) (x86 naming).
|
||||
//! - \ref x86::Assembler::dd() - embeds dword (32 bits) (x86 naming).
|
||||
//! - \ref x86::Assembler::dq() - embeds qword (64 bits) (x86 naming).
|
||||
//!
|
||||
//! The following example illustrates how embed works:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! void embedData(x86::Assembler& a) {
|
||||
//! a.db(0xFF); // Embeds 0xFF byte.
|
||||
//! a.dw(0xFF00); // Embeds 0xFF00 word (little-endian).
|
||||
//! a.dd(0xFF000000); // Embeds 0xFF000000 dword (little-endian).
|
||||
//! a.embedFloat(0.4f); // Embeds 0.4f (32-bit float, little-endian).
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! Sometimes it's required to read the data that is embedded after code, for example. This can be done through
|
||||
//! \ref Label as shown below:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! void embedData(x86::Assembler& a, const Label& L_Data) {
|
||||
//! x86::Gp addr = a.zax(); // EAX or RAX.
|
||||
//! x86::Gp val = x86::edi; // Where to store some value...
|
||||
//!
|
||||
//! // Approach 1 - Load the address to register through LEA. This approach
|
||||
//! // is flexible as the address can be then manipulated, for
|
||||
//! // example if you have a data array, which would need index.
|
||||
//! a.lea(addr, L_Data); // Loads the address of the label to EAX or RAX.
|
||||
//! a.mov(val, dword_ptr(addr));
|
||||
//!
|
||||
//! // Approach 2 - Load the data directly by using L_Data in address. It's
|
||||
//! // worth noting that this doesn't work with indexes in X64
|
||||
//! // mode. It will use absolute address in 32-bit mode and
|
||||
//! // relative address (RIP) in 64-bit mode.
|
||||
//! a.mov(val, dword_ptr(L_Data));
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### Label Embedding
|
||||
//!
|
||||
//! It's also possible to embed labels. In general AsmJit provides the following options:
|
||||
//!
|
||||
//! - \ref BaseEmitter::embedLabel() - Embeds absolute address of a label. This is target dependent and would
|
||||
//! embed either 32-bit or 64-bit data that embeds absolute label address. This kind of embedding cannot be
|
||||
//! used in a position independent code.
|
||||
//!
|
||||
//! - \ref BaseEmitter::embedLabelDelta() - Embeds a difference between two labels. The size of the difference
|
||||
//! can be specified so it's possible to embed 8-bit, 16-bit, 32-bit, and 64-bit difference, which is sufficient
|
||||
//! for most purposes.
|
||||
//!
|
||||
//! The following example demonstrates how to embed labels and their differences:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! void embedLabel(x86::Assembler& a, const Label& L_Data) {
|
||||
//! // [1] Embed L_Data - the size of the data will be dependent on the target.
|
||||
//! a.embedLabel(L_Data);
|
||||
//!
|
||||
//! // [2] Embed a 32-bit difference of two labels.
|
||||
//! Label L_Here = a.newLabel();
|
||||
//! a.bind(L_Here);
|
||||
//! // Embeds int32_t(L_Data - L_Here).
|
||||
//! a.embedLabelDelta(L_Data, L_Here, 4);
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### Using FuncFrame and FuncDetail with x86::Assembler
|
||||
//!
|
||||
//! The example below demonstrates how \ref FuncFrame and \ref FuncDetail can be used together with \ref x86::Assembler
|
||||
//! to generate a function that will use platform dependent calling conventions automatically depending on the target:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! #include <stdio.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
|
||||
//!
|
||||
//! int main(int argc, char* argv[]) {
|
||||
//! JitRuntime rt; // Create JIT Runtime.
|
||||
//! CodeHolder code; // Create a CodeHolder.
|
||||
//!
|
||||
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
||||
//! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
|
||||
//!
|
||||
//! // Decide which registers will be mapped to function arguments. Try changing
|
||||
//! // registers of dst, src_a, and src_b and see what happens in function's
|
||||
//! // prolog and epilog.
|
||||
//! x86::Gp dst = a.zax();
|
||||
//! x86::Gp src_a = a.zcx();
|
||||
//! x86::Gp src_b = a.zdx();
|
||||
//!
|
||||
//! X86::Xmm vec0 = x86::xmm0;
|
||||
//! X86::Xmm vec1 = x86::xmm1;
|
||||
//!
|
||||
//! // Create/initialize FuncDetail and FuncFrame.
|
||||
//! FuncDetail func;
|
||||
//! func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost));
|
||||
//!
|
||||
//! FuncFrame frame;
|
||||
//! frame.init(func);
|
||||
//!
|
||||
//! // Make XMM0 and XMM1 dirty - RegGroup::kVec describes XMM|YMM|ZMM registers.
|
||||
//! frame.setDirtyRegs(RegGroup::kVec, IntUtils::mask(0, 1));
|
||||
//!
|
||||
//! // Alternatively, if you don't want to use register masks you can pass BaseReg
|
||||
//! // to addDirtyRegs(). The following code would add both xmm0 and xmm1.
|
||||
//! frame.addDirtyRegs(x86::xmm0, x86::xmm1);
|
||||
//!
|
||||
//! FuncArgsAssignment args(&func); // Create arguments assignment context.
|
||||
//! args.assignAll(dst, src_a, src_b);// Assign our registers to arguments.
|
||||
//! args.updateFrameInfo(frame); // Reflect our args in FuncFrame.
|
||||
//! frame.finalize(); // Finalize the FuncFrame (updates it).
|
||||
//!
|
||||
//! a.emitProlog(frame); // Emit function prolog.
|
||||
//! a.emitArgsAssignment(frame, args);// Assign arguments to registers.
|
||||
//! a.movdqu(vec0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0.
|
||||
//! a.movdqu(vec1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1.
|
||||
//! a.paddd(vec0, vec1); // Add 4 ints in XMM1 to XMM0.
|
||||
//! a.movdqu(x86::ptr(dst), vec0); // Store the result to [dst].
|
||||
//! a.emitEpilog(frame); // Emit function epilog and return.
|
||||
//!
|
||||
//! SumIntsFunc fn;
|
||||
//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
|
||||
//! if (err) return 1; // Handle a possible error case.
|
||||
//!
|
||||
//! // Execute the generated function.
|
||||
//! int inA[4] = { 4, 3, 2, 1 };
|
||||
//! int inB[4] = { 1, 5, 2, 8 };
|
||||
//! int out[4];
|
||||
//! fn(out, inA, inB);
|
||||
//!
|
||||
//! // Prints {5 8 4 9}
|
||||
//! printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
|
||||
//!
|
||||
//! rt.release(fn);
|
||||
//! return 0;
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### Using x86::Assembler as Code-Patcher
|
||||
//!
|
||||
//! This is an advanced topic that is sometimes unavoidable. AsmJit by default appends machine code it generates
|
||||
//! into a \ref CodeBuffer, however, it also allows to set the offset in \ref CodeBuffer explicitly and to overwrite
|
||||
//! its content. This technique is extremely dangerous as X86 instructions have variable length (see below), so you
|
||||
//! should in general only patch code to change instruction's immediate values or some other details not known the
|
||||
//! at a time the instruction was emitted. A typical scenario that requires code-patching is when you start emitting
|
||||
//! function and you don't know how much stack you want to reserve for it.
|
||||
//!
|
||||
//! Before we go further it's important to introduce instruction options, because they can help with code-patching
|
||||
//! (and not only patching, but that will be explained in AVX-512 section):
|
||||
//!
|
||||
//! - Many general-purpose instructions (especially arithmetic ones) on X86 have multiple encodings - in AsmJit
|
||||
//! this is usually called 'short form' and 'long form'.
|
||||
//!
|
||||
//! - AsmJit always tries to use 'short form' as it makes the resulting machine-code smaller, which is always
|
||||
//! good - this decision is used by majority of assemblers out there.
|
||||
//!
|
||||
//! - AsmJit allows to override the default decision by using `short_()` and `long_()` instruction options to force
|
||||
//! short or long form, respectively. The most useful is `long_()` as it basically forces AsmJit to always emit
|
||||
//! the longest form. The `short_()` is not that useful as it's automatic (except jumps to non-bound labels). Note
|
||||
//! that the underscore after each function name avoids collision with built-in C++ types.
|
||||
//!
|
||||
//! To illustrate what short form and long form means in binary let's assume we want to emit "add esp, 16" instruction,
|
||||
//! which has two possible binary encodings:
|
||||
//!
|
||||
//! - `83C410` - This is a short form aka `short add esp, 16` - You can see opcode byte (0x8C), MOD/RM byte (0xC4)
|
||||
//! and an 8-bit immediate value representing `16`.
|
||||
//!
|
||||
//! - `81C410000000` - This is a long form aka `long add esp, 16` - You can see a different opcode byte (0x81), the
|
||||
//! same Mod/RM byte (0xC4) and a 32-bit immediate in little-endian representing `16`.
|
||||
//!
|
||||
//! It should be obvious that patching an existing instruction into an instruction having a different size may create
|
||||
//! various problems. So it's recommended to be careful and to only patch instructions into instructions having the
|
||||
//! same size. The example below demonstrates how instruction options can be used to guarantee the size of an
|
||||
//! instruction by forcing the assembler to use long-form encoding:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! #include <stdio.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! typedef int (*Func)(void);
|
||||
//!
|
||||
//! int main(int argc, char* argv[]) {
|
||||
//! JitRuntime rt; // Create a runtime specialized for JIT.
|
||||
//! CodeHolder code; // Create a CodeHolder.
|
||||
//!
|
||||
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
||||
//! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
|
||||
//!
|
||||
//! // Let's get these registers from x86::Assembler.
|
||||
//! x86::Gp zbp = a.zbp();
|
||||
//! x86::Gp zsp = a.zsp();
|
||||
//!
|
||||
//! // Function prolog.
|
||||
//! a.push(zbp);
|
||||
//! a.mov(zbp, zsp);
|
||||
//!
|
||||
//! // This is where we are gonna patch the code later, so let's get the offset
|
||||
//! // (the current location) from the beginning of the code-buffer.
|
||||
//! size_t patchOffset = a.offset();
|
||||
//! // Let's just emit 'sub zsp, 0' for now, but don't forget to use LONG form.
|
||||
//! a.long_().sub(zsp, 0);
|
||||
//!
|
||||
//! // ... emit some code (this just sets return value to zero) ...
|
||||
//! a.xor_(x86::eax, x86::eax);
|
||||
//!
|
||||
//! // Function epilog and return.
|
||||
//! a.mov(zsp, zbp);
|
||||
//! a.pop(zbp);
|
||||
//! a.ret();
|
||||
//!
|
||||
//! // Now we know how much stack size we want to reserve. I have chosen 128
|
||||
//! // bytes on purpose as it's encodable only in long form that we have used.
|
||||
//!
|
||||
//! int stackSize = 128; // Number of bytes to reserve on the stack.
|
||||
//! a.setOffset(patchOffset); // Move the current cursor to `patchOffset`.
|
||||
//! a.long_().sub(zsp, stackSize); // Patch the code; don't forget to use LONG form.
|
||||
//!
|
||||
//! // Now the code is ready to be called
|
||||
//! Func fn;
|
||||
//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
|
||||
//! if (err) return 1; // Handle a possible error returned by AsmJit.
|
||||
//!
|
||||
//! int result = fn(); // Execute the generated code.
|
||||
//! printf("%d\n", result); // Print the resulting "0".
|
||||
//!
|
||||
//! rt.release(fn); // Remove the function from the runtime.
|
||||
//! return 0;
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! If you run the example it will just work, because both instructions have the same size. As an experiment you can
|
||||
//! try removing `long_()` form to see what happens when wrong code is generated.
|
||||
//!
|
||||
//! ### Code Patching and REX Prefix
|
||||
//!
|
||||
//! In 64-bit mode there is one more thing to worry about when patching code: REX prefix. It's a single byte prefix
|
||||
//! designed to address registers with ids from 9 to 15 and to override the default width of operation from 32 to 64
|
||||
//! bits. AsmJit, like other assemblers, only emits REX prefix when it's necessary. If the patched code only changes
|
||||
//! the immediate value as shown in the previous example then there is nothing to worry about as it doesn't change
|
||||
//! the logic behind emitting REX prefix, however, if the patched code changes register id or overrides the operation
|
||||
//! width then it's important to take care of REX prefix as well.
|
||||
//!
|
||||
//! AsmJit contains another instruction option that controls (forces) REX prefix - `rex()`. If you use it the
|
||||
//! instruction emitted will always use REX prefix even when it's encodable without it. The following list contains
|
||||
//! some instructions and their binary representations to illustrate when it's emitted:
|
||||
//!
|
||||
//! - `__83C410` - `add esp, 16` - 32-bit operation in 64-bit mode doesn't require REX prefix.
|
||||
//! - `4083C410` - `rex add esp, 16` - 32-bit operation in 64-bit mode with forced REX prefix (0x40).
|
||||
//! - `4883C410` - `add rsp, 16` - 64-bit operation in 64-bit mode requires REX prefix (0x48).
|
||||
//! - `4183C410` - `add r12d, 16` - 32-bit operation in 64-bit mode using R12D requires REX prefix (0x41).
|
||||
//! - `4983C410` - `add r12, 16` - 64-bit operation in 64-bit mode using R12 requires REX prefix (0x49).
|
||||
//!
|
||||
//! ### More Prefixes
|
||||
//!
|
||||
//! X86 architecture is known for its prefixes. AsmJit supports all prefixes
|
||||
//! that can affect how the instruction is encoded:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! void prefixesExample(x86::Assembler& a) {
|
||||
//! // Lock prefix for implementing atomics:
|
||||
//! // lock add dword ptr [dst], 1
|
||||
//! a.lock().add(x86::dword_ptr(dst), 1);
|
||||
//!
|
||||
//! // Similarly, XAcquire/XRelease prefixes are also available:
|
||||
//! // xacquire add dword ptr [dst], 1
|
||||
//! a.xacquire().add(x86::dword_ptr(dst), 1);
|
||||
//!
|
||||
//! // Rep prefix (see also repe/repz and repne/repnz):
|
||||
//! // rep movs byte ptr [dst], byte ptr [src]
|
||||
//! a.rep().movs(x86::byte_ptr(dst), x86::byte_ptr(src));
|
||||
//!
|
||||
//! // Forcing REX prefix in 64-bit mode.
|
||||
//! // rex mov eax, 1
|
||||
//! a.rex().mov(x86::eax, 1);
|
||||
//!
|
||||
//! // AVX instruction without forced prefix uses the shortest encoding:
|
||||
//! // vaddpd xmm0, xmm1, xmm2 -> [C5|F1|58|C2]
|
||||
//! a.vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
|
||||
//!
|
||||
//! // Forcing VEX3 prefix (AVX):
|
||||
//! // vex3 vaddpd xmm0, xmm1, xmm2 -> [C4|E1|71|58|C2]
|
||||
//! a.vex3().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
|
||||
//!
|
||||
//! // Forcing EVEX prefix (AVX512):
|
||||
//! // evex vaddpd xmm0, xmm1, xmm2 -> [62|F1|F5|08|58|C2]
|
||||
//! a.evex().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
|
||||
//!
|
||||
//! // Some instructions accept prefixes not originally intended to:
|
||||
//! // rep ret
|
||||
//! a.rep().ret();
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! It's important to understand that prefixes are part of instruction options. When a member function that involves
|
||||
//! adding a prefix is called the prefix is combined with existing instruction options, which will affect the next
|
||||
//! instruction generated.
|
||||
//!
|
||||
//! ### Generating AVX512 code.
|
||||
//!
|
||||
//! x86::Assembler can generate AVX512+ code including the use of opmask registers. Opmask can be specified through
|
||||
//! \ref x86::Assembler::k() function, which stores it as an extra register, which will be used by the next
|
||||
//! instruction. AsmJit uses such concept for manipulating instruction options as well.
|
||||
//!
|
||||
//! The following AVX512 features are supported:
|
||||
//!
|
||||
//! - Opmask selector {k} and zeroing {z}.
|
||||
//! - Rounding modes {rn|rd|ru|rz} and suppress-all-exceptions {sae} option.
|
||||
//! - AVX512 broadcasts {1toN}.
|
||||
//!
|
||||
//! The following example demonstrates how AVX512 features can be used:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! void generateAVX512Code(x86::Assembler& a) {
|
||||
//! using namespace x86;
|
||||
//!
|
||||
//! // Opmask Selectors
|
||||
//! // ----------------
|
||||
//! //
|
||||
//! // - Opmask / zeroing is part of the instruction options / extraReg.
|
||||
//! // - k(reg) is like {kreg} in Intel syntax.
|
||||
//! // - z() is like {z} in Intel syntax.
|
||||
//!
|
||||
//! // vaddpd zmm {k1} {z}, zmm1, zmm2
|
||||
//! a.k(k1).z().vaddpd(zmm0, zmm1, zmm2);
|
||||
//!
|
||||
//! // Memory Broadcasts
|
||||
//! // -----------------
|
||||
//! //
|
||||
//! // - Broadcast data is part of memory operand.
|
||||
//! // - Use x86::Mem::_1toN(), which returns a new x86::Mem operand.
|
||||
//!
|
||||
//! // vaddpd zmm0 {k1} {z}, zmm1, [rcx] {1to8}
|
||||
//! a.k(k1).z().vaddpd(zmm0, zmm1, x86::mem(rcx)._1to8());
|
||||
//!
|
||||
//! // Embedded Rounding & Suppress-All-Exceptoins
|
||||
//! // -------------------------------------------
|
||||
//! //
|
||||
//! // - Rounding mode and {sae} are part of instruction options.
|
||||
//! // - Use sae() to enable exception suppression.
|
||||
//! // - Use rn_sae(), rd_sae(), ru_sae(), and rz_sae() - to enable rounding.
|
||||
//! // - Embedded rounding implicitly sets {sae} as well, that's why the API
|
||||
//! // also has sae() suffix, to make it clear.
|
||||
//!
|
||||
//! // vcmppd k1, zmm1, zmm2, 0x00 {sae}
|
||||
//! a.sae().vcmppd(k1, zmm1, zmm2, 0);
|
||||
//!
|
||||
//! // vaddpd zmm0, zmm1, zmm2 {rz}
|
||||
//! a.rz_sae().vaddpd(zmm0, zmm1, zmm2);
|
||||
//! }
|
||||
//! ```
|
||||
class ASMJIT_VIRTAPI Assembler
|
||||
: public BaseAssembler,
|
||||
public EmitterImplicitT<Assembler> {
|
||||
public:
|
||||
ASMJIT_NONCOPYABLE(Assembler)
|
||||
typedef BaseAssembler Base;
|
||||
|
||||
//! \name Construction & Destruction
|
||||
//! \{
|
||||
|
||||
ASMJIT_API explicit Assembler(CodeHolder* code = nullptr) noexcept;
|
||||
ASMJIT_API virtual ~Assembler() noexcept;
|
||||
|
||||
//! \}
|
||||
|
||||
//! \cond INTERNAL
|
||||
//! \name Internal
|
||||
//! \{
|
||||
|
||||
// NOTE: x86::Assembler uses _privateData to store 'address-override' bit that is used to decide whether to emit
|
||||
// address-override (67H) prefix based on the memory BASE+INDEX registers. It's either `kX86MemInfo_67H_X86` or
|
||||
// `kX86MemInfo_67H_X64`.
|
||||
inline uint32_t _addressOverrideMask() const noexcept { return _privateData; }
|
||||
inline void _setAddressOverrideMask(uint32_t m) noexcept { _privateData = m; }
|
||||
|
||||
//! \}
|
||||
//! \endcond
|
||||
|
||||
//! \name Emit
|
||||
//! \{
|
||||
|
||||
ASMJIT_API Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override;
|
||||
|
||||
//! \}
|
||||
//! \endcond
|
||||
|
||||
//! \name Align
|
||||
//! \{
|
||||
|
||||
ASMJIT_API Error align(AlignMode alignMode, uint32_t alignment) override;
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Events
|
||||
//! \{
|
||||
|
||||
ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
|
||||
ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
|
||||
|
||||
//! \}
|
||||
};
|
||||
|
||||
//! \}
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
|
||||
52
lib/lepton/asmjit/x86/x86builder.cpp
Normal file
52
lib/lepton/asmjit/x86/x86builder.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#include "../core/api-build_p.h"
|
||||
#if !defined(ASMJIT_NO_X86) && !defined(ASMJIT_NO_BUILDER)
|
||||
|
||||
#include "../x86/x86assembler.h"
|
||||
#include "../x86/x86builder.h"
|
||||
#include "../x86/x86emithelper_p.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
// x86::Builder - Construction & Destruction
|
||||
// =========================================
|
||||
|
||||
Builder::Builder(CodeHolder* code) noexcept : BaseBuilder() {
|
||||
_archMask = (uint64_t(1) << uint32_t(Arch::kX86)) |
|
||||
(uint64_t(1) << uint32_t(Arch::kX64)) ;
|
||||
assignEmitterFuncs(this);
|
||||
|
||||
if (code)
|
||||
code->attach(this);
|
||||
}
|
||||
Builder::~Builder() noexcept {}
|
||||
|
||||
// x86::Builder - Events
|
||||
// =====================
|
||||
|
||||
Error Builder::onAttach(CodeHolder* code) noexcept {
|
||||
return Base::onAttach(code);
|
||||
}
|
||||
|
||||
Error Builder::onDetach(CodeHolder* code) noexcept {
|
||||
return Base::onDetach(code);
|
||||
}
|
||||
|
||||
// x86::Builder - Finalize
|
||||
// =======================
|
||||
|
||||
Error Builder::finalize() {
|
||||
ASMJIT_PROPAGATE(runPasses());
|
||||
Assembler a(_code);
|
||||
a.addEncodingOptions(encodingOptions());
|
||||
a.addDiagnosticOptions(diagnosticOptions());
|
||||
return serializeTo(&a);
|
||||
}
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // !ASMJIT_NO_X86 && !ASMJIT_NO_BUILDER
|
||||
351
lib/lepton/asmjit/x86/x86builder.h
Normal file
351
lib/lepton/asmjit/x86/x86builder.h
Normal file
@ -0,0 +1,351 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#ifndef ASMJIT_X86_X86BUILDER_H_INCLUDED
|
||||
#define ASMJIT_X86_X86BUILDER_H_INCLUDED
|
||||
|
||||
#include "../core/api-config.h"
|
||||
#ifndef ASMJIT_NO_BUILDER
|
||||
|
||||
#include "../core/builder.h"
|
||||
#include "../x86/x86emitter.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
//! \addtogroup asmjit_x86
|
||||
//! \{
|
||||
|
||||
//! X86/X64 builder implementation.
|
||||
//!
|
||||
//! The code representation used by \ref BaseBuilder is compatible with everything AsmJit provides. Each instruction
|
||||
//! is stored as \ref InstNode, which contains instruction id, options, and operands. Each instruction emitted will
|
||||
//! create a new \ref InstNode instance and add it to the current cursor in the double-linked list of nodes. Since
|
||||
//! the instruction stream used by \ref BaseBuilder can be manipulated, we can rewrite the SumInts example from
|
||||
//! \ref asmjit_assembler into the following:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! #include <stdio.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
|
||||
//!
|
||||
//! // Small helper function to print the current content of `cb`.
|
||||
//! static void dumpCode(BaseBuilder& builder, const char* phase) {
|
||||
//! String sb;
|
||||
//! builder.dump(sb);
|
||||
//! printf("%s:\n%s\n", phase, sb.data());
|
||||
//! }
|
||||
//!
|
||||
//! int main() {
|
||||
//! JitRuntime rt; // Create JIT Runtime.
|
||||
//! CodeHolder code; // Create a CodeHolder.
|
||||
//!
|
||||
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
||||
//! x86::Builder cb(&code); // Create and attach x86::Builder to `code`.
|
||||
//!
|
||||
//! // Decide which registers will be mapped to function arguments. Try changing registers
|
||||
//! // of `dst`, `srcA`, and `srcB` and see what happens in function's prolog and epilog.
|
||||
//! x86::Gp dst = cb.zax();
|
||||
//! x86::Gp srcA = cb.zcx();
|
||||
//! x86::Gp srcB = cb.zdx();
|
||||
//!
|
||||
//! X86::Xmm vec0 = x86::xmm0;
|
||||
//! X86::Xmm vec1 = x86::xmm1;
|
||||
//!
|
||||
//! // Create and initialize `FuncDetail`.
|
||||
//! FuncDetail func;
|
||||
//! func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost));
|
||||
//!
|
||||
//! // Remember prolog insertion point.
|
||||
//! BaseNode* prologInsertionPoint = cb.cursor();
|
||||
//!
|
||||
//! // Emit function body:
|
||||
//! cb.movdqu(vec0, x86::ptr(srcA)); // Load 4 ints from [srcA] to XMM0.
|
||||
//! cb.movdqu(vec1, x86::ptr(srcB)); // Load 4 ints from [srcB] to XMM1.
|
||||
//! cb.paddd(vec0, vec1); // Add 4 ints in XMM1 to XMM0.
|
||||
//! cb.movdqu(x86::ptr(dst), vec0); // Store the result to [dst].
|
||||
//!
|
||||
//! // Remember epilog insertion point.
|
||||
//! BaseNode* epilogInsertionPoint = cb.cursor();
|
||||
//!
|
||||
//! // Let's see what we have now.
|
||||
//! dumpCode(cb, "Raw Function");
|
||||
//!
|
||||
//! // Now, after we emitted the function body, we can insert the prolog, arguments
|
||||
//! // allocation, and epilog. This is not possible with using pure x86::Assembler.
|
||||
//! FuncFrame frame;
|
||||
//! frame.init(func);
|
||||
//!
|
||||
//! // Make XMM0 and XMM1 dirty; RegGroup::kVec describes XMM|YMM|ZMM registers.
|
||||
//! frame.setDirtyRegs(RegGroup::kVec, IntUtils::mask(0, 1));
|
||||
//!
|
||||
//! FuncArgsAssignment args(&func); // Create arguments assignment context.
|
||||
//! args.assignAll(dst, srcA, srcB); // Assign our registers to arguments.
|
||||
//! args.updateFrame(frame); // Reflect our args in FuncFrame.
|
||||
//! frame.finalize(); // Finalize the FuncFrame (updates it).
|
||||
//!
|
||||
//! // Insert function prolog and allocate arguments to registers.
|
||||
//! cb.setCursor(prologInsertionPoint);
|
||||
//! cb.emitProlog(frame);
|
||||
//! cb.emitArgsAssignment(frame, args);
|
||||
//!
|
||||
//! // Insert function epilog.
|
||||
//! cb.setCursor(epilogInsertionPoint);
|
||||
//! cb.emitEpilog(frame);
|
||||
//!
|
||||
//! // Let's see how the function's prolog and epilog looks.
|
||||
//! dumpCode(cb, "Prolog & Epilog");
|
||||
//!
|
||||
//! // IMPORTANT: Builder requires finalize() to be called to serialize its
|
||||
//! // code to the Assembler (it automatically creates one if not attached).
|
||||
//! cb.finalize();
|
||||
//!
|
||||
//! SumIntsFunc fn;
|
||||
//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
|
||||
//! if (err) return 1; // Handle a possible error case.
|
||||
//!
|
||||
//! // Execute the generated function.
|
||||
//! int inA[4] = { 4, 3, 2, 1 };
|
||||
//! int inB[4] = { 1, 5, 2, 8 };
|
||||
//! int out[4];
|
||||
//! fn(out, inA, inB);
|
||||
//!
|
||||
//! // Prints {5 8 4 9}
|
||||
//! printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
|
||||
//!
|
||||
//! rt.release(fn); // Explicitly remove the function from the runtime.
|
||||
//! return 0;
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! When the example is executed it should output the following (this one using AMD64-SystemV ABI):
|
||||
//!
|
||||
//! ```
|
||||
//! Raw Function:
|
||||
//! movdqu xmm0, [rcx]
|
||||
//! movdqu xmm1, [rdx]
|
||||
//! paddd xmm0, xmm1
|
||||
//! movdqu [rax], xmm0
|
||||
//!
|
||||
//! Prolog & Epilog:
|
||||
//! mov rax, rdi
|
||||
//! mov rcx, rsi
|
||||
//! movdqu xmm0, [rcx]
|
||||
//! movdqu xmm1, [rdx]
|
||||
//! paddd xmm0, xmm1
|
||||
//! movdqu [rax], xmm0
|
||||
//! ret
|
||||
//!
|
||||
//! {5 8 4 9}
|
||||
//! ```
|
||||
//!
|
||||
//! The number of use-cases of \ref BaseBuilder is not limited and highly depends on your creativity and experience.
|
||||
//! The previous example can be easily improved to collect all dirty registers inside the function programmatically
|
||||
//! and to pass them to \ref FuncFrame::setDirtyRegs().
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! // NOTE: This function doesn't cover all possible constructs. It ignores instructions that write
|
||||
//! // to implicit registers that are not part of the operand list. It also counts read-only registers.
|
||||
//! // Real implementation would be a bit more complicated, but still relatively easy to implement.
|
||||
//! static void collectDirtyRegs(const BaseNode* first,
|
||||
//! const BaseNode* last,
|
||||
//! Support::Array<RegMask, Globals::kNumVirtGroups>& regMask) {
|
||||
//! const BaseNode* node = first;
|
||||
//! while (node) {
|
||||
//! if (node->actsAsInst()) {
|
||||
//! const InstNode* inst = node->as<InstNode>();
|
||||
//! const Operand* opArray = inst->operands();
|
||||
//!
|
||||
//! for (uint32_t i = 0, opCount = inst->opCount(); i < opCount; i++) {
|
||||
//! const Operand& op = opArray[i];
|
||||
//! if (op.isReg()) {
|
||||
//! const x86::Reg& reg = op.as<x86::Reg>();
|
||||
//! if (reg.group() <= RegGroup::kMaxVirt) {
|
||||
//! regMask[reg.group()] |= 1u << reg.id();
|
||||
//! }
|
||||
//! }
|
||||
//! }
|
||||
//! }
|
||||
//!
|
||||
//! if (node == last)
|
||||
//! break;
|
||||
//! node = node->next();
|
||||
//! }
|
||||
//! }
|
||||
//!
|
||||
//! static void setDirtyRegsOfFuncFrame(const x86::Builder& builder, FuncFrame& frame) {
|
||||
//! Support::Array<RegMask, Globals::kNumVirtGroups> regMask {};
|
||||
//! collectDirtyRegs(builder.firstNode(), builder.lastNode(), regMask);
|
||||
//!
|
||||
//! // X86/X64 ABIs only require to save GP/XMM registers:
|
||||
//! frame.setDirtyRegs(RegGroup::kGp, regMask[RegGroup::kGp]);
|
||||
//! frame.setDirtyRegs(RegGroup::kVec, regMask[RegGroup::kVec]);
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### Casting Between Various Emitters
|
||||
//!
|
||||
//! Even when \ref BaseAssembler and \ref BaseBuilder provide the same interface as defined by \ref BaseEmitter their
|
||||
//! platform dependent variants like \ref x86::Assembler and \ref x86::Builder cannot be interchanged or casted to each
|
||||
//! other by using a C++ `static_cast<>`. The main reason is the inheritance graph of these classes is different and
|
||||
//! cast-incompatible, as illustrated below:
|
||||
//!
|
||||
//! ```
|
||||
//! +--------------+ +=========================+
|
||||
//! +----------------------->| x86::Emitter |<--+--# x86::EmitterImplicitT<> #<--+
|
||||
//! | +--------------+ | +=========================+ |
|
||||
//! | (abstract) | (mixin) |
|
||||
//! | +--------------+ +~~~~~~~~~~~~~~+ | |
|
||||
//! +-->| BaseAssembler|---->|x86::Assembler|<--+ |
|
||||
//! | +--------------+ +~~~~~~~~~~~~~~+ | |
|
||||
//! | (abstract) (final) | |
|
||||
//! +===============+ | +--------------+ +~~~~~~~~~~~~~~+ | |
|
||||
//! # BaseEmitter #--+-->| BaseBuilder |--+->| x86::Builder |<--+ |
|
||||
//! +===============+ +--------------+ | +~~~~~~~~~~~~~~+ |
|
||||
//! (abstract) (abstract) | (final) |
|
||||
//! +---------------------+ |
|
||||
//! | |
|
||||
//! | +--------------+ +~~~~~~~~~~~~~~+ +=========================+ |
|
||||
//! +-->| BaseCompiler |---->| x86::Compiler|<-----# x86::EmitterExplicitT<> #---+
|
||||
//! +--------------+ +~~~~~~~~~~~~~~+ +=========================+
|
||||
//! (abstract) (final) (mixin)
|
||||
//! ```
|
||||
//!
|
||||
//! The graph basically shows that it's not possible to cast between \ref x86::Assembler and \ref x86::Builder.
|
||||
//! However, since both share the base interface (\ref BaseEmitter) it's possible to cast them to a class that
|
||||
//! cannot be instantiated, but defines the same interface - the class is called \ref x86::Emitter and was
|
||||
//! introduced to make it possible to write a function that can emit to both \ref x86::Assembler and \ref
|
||||
//! x86::Builder. Note that \ref x86::Emitter cannot be created, it's abstract and has private constructors and
|
||||
//! destructors; it was only designed to be casted to and used as an interface.
|
||||
//!
|
||||
//! Each architecture-specific emitter implements a member function called
|
||||
//! `as<arch::Emitter>()`, which casts the instance to the architecture
|
||||
//! specific emitter as illustrated below:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! static void emitSomething(x86::Emitter* e) {
|
||||
//! e->mov(x86::eax, x86::ebx);
|
||||
//! }
|
||||
//!
|
||||
//! static void assemble(CodeHolder& code, bool useAsm) {
|
||||
//! if (useAsm) {
|
||||
//! x86::Assembler assembler(&code);
|
||||
//! emitSomething(assembler.as<x86::Emitter>());
|
||||
//! }
|
||||
//! else {
|
||||
//! x86::Builder builder(&code);
|
||||
//! emitSomething(builder.as<x86::Emitter>());
|
||||
//!
|
||||
//! // NOTE: Builder requires `finalize()` to be called to serialize its
|
||||
//! // content to Assembler (it automatically creates one if not attached).
|
||||
//! builder.finalize();
|
||||
//! }
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! The example above shows how to create a function that can emit code to either \ref x86::Assembler or \ref
|
||||
//! x86::Builder through \ref x86::Emitter, which provides emitter-neutral functionality. \ref x86::Emitter,
|
||||
//! however, doesn't provide any emitter-specific functionality like `setCursor()`.
|
||||
//!
|
||||
//! ### Code Injection and Manipulation
|
||||
//!
|
||||
//! \ref BaseBuilder emitter stores its nodes in a double-linked list, which makes it easy to manipulate that
|
||||
//! list during the code generation or afterwards. Each node is always emitted next to the current cursor and
|
||||
//! the cursor is advanced to that newly emitted node. The cursor can be retrieved and changed by \ref
|
||||
//! BaseBuilder::cursor() and \ref BaseBuilder::setCursor(), respectively.
|
||||
//!
|
||||
//! The example below demonstrates how to remember a node and inject something
|
||||
//! next to it.
|
||||
//!
|
||||
//! ```
|
||||
//! static void example(x86::Builder& builder) {
|
||||
//! // Emit something, after it returns the cursor would point at the last
|
||||
//! // emitted node.
|
||||
//! builder.mov(x86::rax, x86::rdx); // [1]
|
||||
//!
|
||||
//! // We can retrieve the node.
|
||||
//! BaseNode* node = builder.cursor();
|
||||
//!
|
||||
//! // Change the instruction we just emitted, just for fun...
|
||||
//! if (node->isInst()) {
|
||||
//! InstNode* inst = node->as<InstNode>();
|
||||
//! // Changes the operands at index [1] to RCX.
|
||||
//! inst->setOp(1, x86::rcx);
|
||||
//! }
|
||||
//!
|
||||
//! // ------------------------- Generate Some Code -------------------------
|
||||
//! builder.add(x86::rax, x86::rdx); // [2]
|
||||
//! builder.shr(x86::rax, 3); // [3]
|
||||
//! // ----------------------------------------------------------------------
|
||||
//!
|
||||
//! // Now, we know where our node is, and we can simply change the cursor
|
||||
//! // and start emitting something after it. The setCursor() function
|
||||
//! // returns the previous cursor, and it's always a good practice to remember
|
||||
//! // it, because you never know if you are not already injecting the code
|
||||
//! // somewhere else...
|
||||
//! BaseNode* oldCursor = builder.setCursor(node);
|
||||
//!
|
||||
//! builder.mul(x86::rax, 8); // [4]
|
||||
//!
|
||||
//! // Restore the cursor
|
||||
//! builder.setCursor(oldCursor);
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! The function above would actually emit the following:
|
||||
//!
|
||||
//! ```
|
||||
//! mov rax, rcx ; [1] Patched at the beginning.
|
||||
//! mul rax, 8 ; [4] Injected.
|
||||
//! add rax, rdx ; [2] Followed [1] initially.
|
||||
//! shr rax, 3 ; [3] Follows [2].
|
||||
//! ```
|
||||
class ASMJIT_VIRTAPI Builder
|
||||
: public BaseBuilder,
|
||||
public EmitterImplicitT<Builder> {
|
||||
public:
|
||||
ASMJIT_NONCOPYABLE(Builder)
|
||||
typedef BaseBuilder Base;
|
||||
|
||||
//! \name Construction & Destruction
|
||||
//! \{
|
||||
|
||||
ASMJIT_API explicit Builder(CodeHolder* code = nullptr) noexcept;
|
||||
ASMJIT_API virtual ~Builder() noexcept;
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Events
|
||||
//! \{
|
||||
|
||||
ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
|
||||
ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Finalize
|
||||
//! \{
|
||||
|
||||
ASMJIT_API Error finalize() override;
|
||||
|
||||
//! \}
|
||||
};
|
||||
|
||||
//! \}
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // !ASMJIT_NO_BUILDER
|
||||
#endif // ASMJIT_X86_X86BUILDER_H_INCLUDED
|
||||
61
lib/lepton/asmjit/x86/x86compiler.cpp
Normal file
61
lib/lepton/asmjit/x86/x86compiler.cpp
Normal file
@ -0,0 +1,61 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#include "../core/api-build_p.h"
|
||||
#if !defined(ASMJIT_NO_X86) && !defined(ASMJIT_NO_COMPILER)
|
||||
|
||||
#include "../x86/x86assembler.h"
|
||||
#include "../x86/x86compiler.h"
|
||||
#include "../x86/x86instapi_p.h"
|
||||
#include "../x86/x86rapass_p.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
// x86::Compiler - Construction & Destruction
|
||||
// ==========================================
|
||||
|
||||
Compiler::Compiler(CodeHolder* code) noexcept : BaseCompiler() {
|
||||
_archMask = (uint64_t(1) << uint32_t(Arch::kX86)) |
|
||||
(uint64_t(1) << uint32_t(Arch::kX64)) ;
|
||||
assignEmitterFuncs(this);
|
||||
|
||||
if (code)
|
||||
code->attach(this);
|
||||
}
|
||||
Compiler::~Compiler() noexcept {}
|
||||
|
||||
// x86::Compiler - Events
|
||||
// ======================
|
||||
|
||||
Error Compiler::onAttach(CodeHolder* code) noexcept {
|
||||
ASMJIT_PROPAGATE(Base::onAttach(code));
|
||||
Error err = addPassT<X86RAPass>();
|
||||
|
||||
if (ASMJIT_UNLIKELY(err)) {
|
||||
onDetach(code);
|
||||
return err;
|
||||
}
|
||||
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
Error Compiler::onDetach(CodeHolder* code) noexcept {
|
||||
return Base::onDetach(code);
|
||||
}
|
||||
|
||||
// x86::Compiler - Finalize
|
||||
// ========================
|
||||
|
||||
Error Compiler::finalize() {
|
||||
ASMJIT_PROPAGATE(runPasses());
|
||||
Assembler a(_code);
|
||||
a.addEncodingOptions(encodingOptions());
|
||||
a.addDiagnosticOptions(diagnosticOptions());
|
||||
return serializeTo(&a);
|
||||
}
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // !ASMJIT_NO_X86 && !ASMJIT_NO_COMPILER
|
||||
721
lib/lepton/asmjit/x86/x86compiler.h
Normal file
721
lib/lepton/asmjit/x86/x86compiler.h
Normal file
@ -0,0 +1,721 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#ifndef ASMJIT_X86_X86COMPILER_H_INCLUDED
|
||||
#define ASMJIT_X86_X86COMPILER_H_INCLUDED
|
||||
|
||||
#include "../core/api-config.h"
|
||||
#ifndef ASMJIT_NO_COMPILER
|
||||
|
||||
#include "../core/compiler.h"
|
||||
#include "../core/type.h"
|
||||
#include "../x86/x86emitter.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
//! \addtogroup asmjit_x86
|
||||
//! \{
|
||||
|
||||
//! X86/X64 compiler implementation.
|
||||
//!
|
||||
//! ### Compiler Basics
|
||||
//!
|
||||
//! The first \ref x86::Compiler example shows how to generate a function that simply returns an integer value. It's
|
||||
//! an analogy to the first Assembler example:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! #include <stdio.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! // Signature of the generated function.
|
||||
//! typedef int (*Func)(void);
|
||||
//!
|
||||
//! int main() {
|
||||
//! JitRuntime rt; // Runtime specialized for JIT code execution.
|
||||
//! CodeHolder code; // Holds code and relocation information.
|
||||
//!
|
||||
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
||||
//! x86::Compiler cc(&code); // Create and attach x86::Compiler to code.
|
||||
//!
|
||||
//! cc.addFunc(FuncSignatureT<int>());// Begin a function of `int fn(void)` signature.
|
||||
//!
|
||||
//! x86::Gp vReg = cc.newGpd(); // Create a 32-bit general purpose register.
|
||||
//! cc.mov(vReg, 1); // Move one to our virtual register `vReg`.
|
||||
//! cc.ret(vReg); // Return `vReg` from the function.
|
||||
//!
|
||||
//! cc.endFunc(); // End of the function body.
|
||||
//! cc.finalize(); // Translate and assemble the whole 'cc' content.
|
||||
//! // ----> x86::Compiler is no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! Func fn;
|
||||
//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
|
||||
//! if (err) return 1; // Handle a possible error returned by AsmJit.
|
||||
//! // ----> CodeHolder is no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! int result = fn(); // Execute the generated code.
|
||||
//! printf("%d\n", result); // Print the resulting "1".
|
||||
//!
|
||||
//! rt.release(fn); // Explicitly remove the function from the runtime.
|
||||
//! return 0;
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! The \ref BaseCompiler::addFunc() and \ref BaseCompiler::endFunc() functions are used to define the function and
|
||||
//! its end. Both must be called per function, but the body doesn't have to be generated in sequence. An example of
|
||||
//! generating two functions will be shown later. The next example shows more complicated code that contain a loop
|
||||
//! and generates a simple memory copy function that uses `uint32_t` items:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! #include <stdio.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! // Signature of the generated function.
|
||||
//! typedef void (*MemCpy32)(uint32_t* dst, const uint32_t* src, size_t count);
|
||||
//!
|
||||
//! int main() {
|
||||
//! JitRuntime rt; // Runtime specialized for JIT code execution.
|
||||
//! CodeHolder code; // Holds code and relocation information.
|
||||
//!
|
||||
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
||||
//! x86::Compiler cc(&code); // Create and attach x86::Compiler to code.
|
||||
//!
|
||||
//! FuncNode* funcNode = cc.addFunc( // Begin the function of the following signature:
|
||||
//! FuncSignatureT<void, // Return value - void (no return value).
|
||||
//! uint32_t*, // 1st argument - uint32_t* (machine reg-size).
|
||||
//! const uint32_t*, // 2nd argument - uint32_t* (machine reg-size).
|
||||
//! size_t>()); // 3rd argument - size_t (machine reg-size).
|
||||
//!
|
||||
//! Label L_Loop = cc.newLabel(); // Start of the loop.
|
||||
//! Label L_Exit = cc.newLabel(); // Used to exit early.
|
||||
//!
|
||||
//! x86::Gp dst = cc.newIntPtr("dst");// Create `dst` register (destination pointer).
|
||||
//! x86::Gp src = cc.newIntPtr("src");// Create `src` register (source pointer).
|
||||
//! x86::Gp i = cc.newUIntPtr("i"); // Create `i` register (loop counter).
|
||||
//!
|
||||
//! funcNode->setArg(0, dst); // Assign `dst` argument.
|
||||
//! funcNode->setArg(1, src); // Assign `src` argument.
|
||||
//! funcNode->setArg(2, i); // Assign `i` argument.
|
||||
//!
|
||||
//! cc.test(i, i); // Early exit if length is zero.
|
||||
//! cc.jz(L_Exit);
|
||||
//!
|
||||
//! cc.bind(L_Loop); // Bind the beginning of the loop here.
|
||||
//!
|
||||
//! x86::Gp tmp = cc.newInt32("tmp"); // Copy a single dword (4 bytes).
|
||||
//! cc.mov(tmp, x86::dword_ptr(src)); // Load DWORD from [src] address.
|
||||
//! cc.mov(x86::dword_ptr(dst), tmp); // Store DWORD to [dst] address.
|
||||
//!
|
||||
//! cc.add(src, 4); // Increment `src`.
|
||||
//! cc.add(dst, 4); // Increment `dst`.
|
||||
//!
|
||||
//! cc.dec(i); // Loop until `i` is non-zero.
|
||||
//! cc.jnz(L_Loop);
|
||||
//!
|
||||
//! cc.bind(L_Exit); // Label used by early exit.
|
||||
//! cc.endFunc(); // End of the function body.
|
||||
//!
|
||||
//! cc.finalize(); // Translate and assemble the whole 'cc' content.
|
||||
//! // ----> x86::Compiler is no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! // Add the generated code to the runtime.
|
||||
//! MemCpy32 memcpy32;
|
||||
//! Error err = rt.add(&memcpy32, &code);
|
||||
//!
|
||||
//! // Handle a possible error returned by AsmJit.
|
||||
//! if (err)
|
||||
//! return 1;
|
||||
//! // ----> CodeHolder is no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! // Test the generated code.
|
||||
//! uint32_t input[6] = { 1, 2, 3, 5, 8, 13 };
|
||||
//! uint32_t output[6];
|
||||
//! memcpy32(output, input, 6);
|
||||
//!
|
||||
//! for (uint32_t i = 0; i < 6; i++)
|
||||
//! printf("%d\n", output[i]);
|
||||
//!
|
||||
//! rt.release(memcpy32);
|
||||
//! return 0;
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### AVX and AVX-512
|
||||
//!
|
||||
//! AVX and AVX-512 code generation must be explicitly enabled via \ref FuncFrame to work properly. If it's not setup
|
||||
//! correctly then Prolog & Epilog would use SSE instead of AVX instructions to work with SIMD registers. In addition,
|
||||
//! Compiler requires explicitly enable AVX-512 via \ref FuncFrame in order to use all 32 SIMD registers.
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! #include <stdio.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! // Signature of the generated function.
|
||||
//! typedef void (*Func)(void*);
|
||||
//!
|
||||
//! int main() {
|
||||
//! JitRuntime rt; // Runtime specialized for JIT code execution.
|
||||
//! CodeHolder code; // Holds code and relocation information.
|
||||
//!
|
||||
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
||||
//! x86::Compiler cc(&code); // Create and attach x86::Compiler to code.
|
||||
//!
|
||||
//! FuncNode* funcNode = cc.addFunc(FuncSignatureT<void, void*>());
|
||||
//!
|
||||
//! // Use the following to enable AVX and/or AVX-512.
|
||||
//! funcNode->frame().setAvxEnabled();
|
||||
//! funcNode->frame().setAvx512Enabled();
|
||||
//!
|
||||
//! // Do something with the input pointer.
|
||||
//! x86::Gp addr = cc.newIntPtr("addr");
|
||||
//! x86::Zmm vreg = cc.newZmm("vreg");
|
||||
//!
|
||||
//! funcNode->setArg(0, addr);
|
||||
//!
|
||||
//! cc.vmovdqu32(vreg, x86::ptr(addr));
|
||||
//! cc.vpaddq(vreg, vreg, vreg);
|
||||
//! cc.vmovdqu32(x86::ptr(addr), vreg);
|
||||
//!
|
||||
//! cc.endFunc(); // End of the function body.
|
||||
//! cc.finalize(); // Translate and assemble the whole 'cc' content.
|
||||
//! // ----> x86::Compiler is no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! Func fn;
|
||||
//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
|
||||
//! if (err) return 1; // Handle a possible error returned by AsmJit.
|
||||
//! // ----> CodeHolder is no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! // Execute the generated code and print some output.
|
||||
//! uint64_t data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
|
||||
//! fn(data);
|
||||
//! printf("%llu\n", (unsigned long long)data[0]);
|
||||
//!
|
||||
//! rt.release(fn); // Explicitly remove the function from the runtime.
|
||||
//! return 0;
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### Recursive Functions
|
||||
//!
|
||||
//! It's possible to create more functions by using the same \ref x86::Compiler instance and make links between them.
|
||||
//! In such case it's important to keep the pointer to \ref FuncNode.
|
||||
//!
|
||||
//! The example below creates a simple Fibonacci function that calls itself recursively:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! #include <stdio.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! // Signature of the generated function.
|
||||
//! typedef uint32_t (*Fibonacci)(uint32_t x);
|
||||
//!
|
||||
//! int main() {
|
||||
//! JitRuntime rt; // Runtime specialized for JIT code execution.
|
||||
//! CodeHolder code; // Holds code and relocation information.
|
||||
//!
|
||||
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
||||
//! x86::Compiler cc(&code); // Create and attach x86::Compiler to code.
|
||||
//!
|
||||
//! FuncNode* funcNode = cc.addFunc( // Begin of the Fibonacci function, addFunc()
|
||||
//! FuncSignatureT<int, int>()); // Returns a pointer to the FuncNode node.
|
||||
//!
|
||||
//! Label L_Exit = cc.newLabel() // Exit label.
|
||||
//! x86::Gp x = cc.newUInt32(); // Function x argument.
|
||||
//! x86::Gp y = cc.newUInt32(); // Temporary.
|
||||
//!
|
||||
//! funcNode->setArg(0, x);
|
||||
//!
|
||||
//! cc.cmp(x, 3); // Return x if less than 3.
|
||||
//! cc.jb(L_Exit);
|
||||
//!
|
||||
//! cc.mov(y, x); // Make copy of the original x.
|
||||
//! cc.dec(x); // Decrease x.
|
||||
//!
|
||||
//! InvokeNode* invokeNode; // Function invocation:
|
||||
//! cc.invoke(&invokeNode, // - InvokeNode (output).
|
||||
//! funcNode->label(), // - Function address or Label.
|
||||
//! FuncSignatureT<int, int>()); // - Function signature.
|
||||
//!
|
||||
//! invokeNode->setArg(0, x); // Assign x as the first argument.
|
||||
//! invokeNode->setRet(0, x); // Assign x as a return value as well.
|
||||
//!
|
||||
//! cc.add(x, y); // Combine the return value with y.
|
||||
//!
|
||||
//! cc.bind(L_Exit);
|
||||
//! cc.ret(x); // Return x.
|
||||
//! cc.endFunc(); // End of the function body.
|
||||
//!
|
||||
//! cc.finalize(); // Translate and assemble the whole 'cc' content.
|
||||
//! // ----> x86::Compiler is no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! Fibonacci fib;
|
||||
//! Error err = rt.add(&fib, &code); // Add the generated code to the runtime.
|
||||
//! if (err) return 1; // Handle a possible error returned by AsmJit.
|
||||
//! // ----> CodeHolder is no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! // Test the generated code.
|
||||
//! printf("Fib(%u) -> %u\n", 8, fib(8));
|
||||
//!
|
||||
//! rt.release(fib);
|
||||
//! return 0;
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### Stack Management
|
||||
//!
|
||||
//! Function's stack-frame is managed automatically, which is used by the register allocator to spill virtual
|
||||
//! registers. It also provides an interface to allocate user-defined block of the stack, which can be used as
|
||||
//! a temporary storage by the generated function. In the following example a stack of 256 bytes size is allocated,
|
||||
//! filled by bytes starting from 0 to 255 and then iterated again to sum all the values.
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//! #include <stdio.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! // Signature of the generated function.
|
||||
//! typedef int (*Func)(void);
|
||||
//!
|
||||
//! int main() {
|
||||
//! JitRuntime rt; // Runtime specialized for JIT code execution.
|
||||
//! CodeHolder code; // Holds code and relocation information.
|
||||
//!
|
||||
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
||||
//! x86::Compiler cc(&code); // Create and attach x86::Compiler to code.
|
||||
//!
|
||||
//! cc.addFunc(FuncSignatureT<int>());// Create a function that returns int.
|
||||
//!
|
||||
//! x86::Gp p = cc.newIntPtr("p");
|
||||
//! x86::Gp i = cc.newIntPtr("i");
|
||||
//!
|
||||
//! // Allocate 256 bytes on the stack aligned to 4 bytes.
|
||||
//! x86::Mem stack = cc.newStack(256, 4);
|
||||
//!
|
||||
//! x86::Mem stackIdx(stack); // Copy of stack with i added.
|
||||
//! stackIdx.setIndex(i); // stackIdx <- stack[i].
|
||||
//! stackIdx.setSize(1); // stackIdx <- byte ptr stack[i].
|
||||
//!
|
||||
//! // Load a stack address to `p`. This step is purely optional and shows
|
||||
//! // that `lea` is useful to load a memory operands address (even absolute)
|
||||
//! // to a general purpose register.
|
||||
//! cc.lea(p, stack);
|
||||
//!
|
||||
//! // Clear i (xor is a C++ keyword, hence 'xor_' is used instead).
|
||||
//! cc.xor_(i, i);
|
||||
//!
|
||||
//! Label L1 = cc.newLabel();
|
||||
//! Label L2 = cc.newLabel();
|
||||
//!
|
||||
//! cc.bind(L1); // First loop, fill the stack.
|
||||
//! cc.mov(stackIdx, i.r8()); // stack[i] = uint8_t(i).
|
||||
//!
|
||||
//! cc.inc(i); // i++;
|
||||
//! cc.cmp(i, 256); // if (i < 256)
|
||||
//! cc.jb(L1); // goto L1;
|
||||
//!
|
||||
//! // Second loop, sum all bytes stored in `stack`.
|
||||
//! x86::Gp sum = cc.newInt32("sum");
|
||||
//! x86::Gp val = cc.newInt32("val");
|
||||
//!
|
||||
//! cc.xor_(i, i);
|
||||
//! cc.xor_(sum, sum);
|
||||
//!
|
||||
//! cc.bind(L2);
|
||||
//!
|
||||
//! cc.movzx(val, stackIdx); // val = uint32_t(stack[i]);
|
||||
//! cc.add(sum, val); // sum += val;
|
||||
//!
|
||||
//! cc.inc(i); // i++;
|
||||
//! cc.cmp(i, 256); // if (i < 256)
|
||||
//! cc.jb(L2); // goto L2;
|
||||
//!
|
||||
//! cc.ret(sum); // Return the `sum` of all values.
|
||||
//! cc.endFunc(); // End of the function body.
|
||||
//!
|
||||
//! cc.finalize(); // Translate and assemble the whole 'cc' content.
|
||||
//! // ----> x86::Compiler is no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! Func func;
|
||||
//! Error err = rt.add(&func, &code); // Add the generated code to the runtime.
|
||||
//! if (err) return 1; // Handle a possible error returned by AsmJit.
|
||||
//! // ----> CodeHolder is no longer needed from here and can be destroyed <----
|
||||
//!
|
||||
//! printf("Func() -> %d\n", func()); // Test the generated code.
|
||||
//!
|
||||
//! rt.release(func);
|
||||
//! return 0;
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### Constant Pool
|
||||
//!
|
||||
//! Compiler provides two constant pools for a general purpose code generation:
|
||||
//!
|
||||
//! - Local constant pool - Part of \ref FuncNode, can be only used by a single function and added after the
|
||||
//! function epilog sequence (after `ret` instruction).
|
||||
//!
|
||||
//! - Global constant pool - Part of \ref BaseCompiler, flushed at the end of the generated code by \ref
|
||||
//! BaseEmitter::finalize().
|
||||
//!
|
||||
//! The example below illustrates how a built-in constant pool can be used:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! static void exampleUseOfConstPool(x86::Compiler& cc) {
|
||||
//! cc.addFunc(FuncSignatureT<int>());
|
||||
//!
|
||||
//! x86::Gp v0 = cc.newGpd("v0");
|
||||
//! x86::Gp v1 = cc.newGpd("v1");
|
||||
//!
|
||||
//! x86::Mem c0 = cc.newInt32Const(ConstPoolScope::kLocal, 200);
|
||||
//! x86::Mem c1 = cc.newInt32Const(ConstPoolScope::kLocal, 33);
|
||||
//!
|
||||
//! cc.mov(v0, c0);
|
||||
//! cc.mov(v1, c1);
|
||||
//! cc.add(v0, v1);
|
||||
//!
|
||||
//! cc.ret(v0);
|
||||
//! cc.endFunc();
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### Jump Tables
|
||||
//!
|
||||
//! x86::Compiler supports `jmp` instruction with reg/mem operand, which is a commonly used pattern to implement
|
||||
//! indirect jumps within a function, for example to implement `switch()` statement in a programming languages.
|
||||
//! By default AsmJit assumes that every basic block can be a possible jump target as it's unable to deduce targets
|
||||
//! from instruction's operands. This is a very pessimistic default that should be avoided if possible as it's costly
|
||||
//! and very unfriendly to liveness analysis and register allocation.
|
||||
//!
|
||||
//! Instead of relying on such pessimistic default behavior, let's use \ref JumpAnnotation to annotate a jump where
|
||||
//! all targets are known:
|
||||
//!
|
||||
//! ```
|
||||
//! #include <asmjit/x86.h>
|
||||
//!
|
||||
//! using namespace asmjit;
|
||||
//!
|
||||
//! static void exampleUseOfIndirectJump(x86::Compiler& cc) {
|
||||
//! FuncNode* funcNode = cc.addFunc(FuncSignatureT<float, float, float, uint32_t>(CallConvId::kHost));
|
||||
//!
|
||||
//! // Function arguments
|
||||
//! x86::Xmm a = cc.newXmmSs("a");
|
||||
//! x86::Xmm b = cc.newXmmSs("b");
|
||||
//! x86::Gp op = cc.newUInt32("op");
|
||||
//!
|
||||
//! x86::Gp target = cc.newIntPtr("target");
|
||||
//! x86::Gp offset = cc.newIntPtr("offset");
|
||||
//!
|
||||
//! Label L_Table = cc.newLabel();
|
||||
//! Label L_Add = cc.newLabel();
|
||||
//! Label L_Sub = cc.newLabel();
|
||||
//! Label L_Mul = cc.newLabel();
|
||||
//! Label L_Div = cc.newLabel();
|
||||
//! Label L_End = cc.newLabel();
|
||||
//!
|
||||
//! funcNode->setArg(0, a);
|
||||
//! funcNode->setArg(1, b);
|
||||
//! funcNode->setArg(2, op);
|
||||
//!
|
||||
//! // Jump annotation is a building block that allows to annotate all possible targets where `jmp()` can
|
||||
//! // jump. It then drives the CFG construction and liveness analysis, which impacts register allocation.
|
||||
//! JumpAnnotation* annotation = cc.newJumpAnnotation();
|
||||
//! annotation->addLabel(L_Add);
|
||||
//! annotation->addLabel(L_Sub);
|
||||
//! annotation->addLabel(L_Mul);
|
||||
//! annotation->addLabel(L_Div);
|
||||
//!
|
||||
//! // Most likely not the common indirect jump approach, but it
|
||||
//! // doesn't really matter how final address is calculated. The
|
||||
//! // most important path using JumpAnnotation with `jmp()`.
|
||||
//! cc.lea(offset, x86::ptr(L_Table));
|
||||
//! if (cc.is64Bit())
|
||||
//! cc.movsxd(target, x86::dword_ptr(offset, op.cloneAs(offset), 2));
|
||||
//! else
|
||||
//! cc.mov(target, x86::dword_ptr(offset, op.cloneAs(offset), 2));
|
||||
//! cc.add(target, offset);
|
||||
//! cc.jmp(target, annotation);
|
||||
//!
|
||||
//! // Acts like a switch() statement in C.
|
||||
//! cc.bind(L_Add);
|
||||
//! cc.addss(a, b);
|
||||
//! cc.jmp(L_End);
|
||||
//!
|
||||
//! cc.bind(L_Sub);
|
||||
//! cc.subss(a, b);
|
||||
//! cc.jmp(L_End);
|
||||
//!
|
||||
//! cc.bind(L_Mul);
|
||||
//! cc.mulss(a, b);
|
||||
//! cc.jmp(L_End);
|
||||
//!
|
||||
//! cc.bind(L_Div);
|
||||
//! cc.divss(a, b);
|
||||
//!
|
||||
//! cc.bind(L_End);
|
||||
//! cc.ret(a);
|
||||
//!
|
||||
//! cc.endFunc();
|
||||
//!
|
||||
//! // Relative int32_t offsets of `L_XXX - L_Table`.
|
||||
//! cc.bind(L_Table);
|
||||
//! cc.embedLabelDelta(L_Add, L_Table, 4);
|
||||
//! cc.embedLabelDelta(L_Sub, L_Table, 4);
|
||||
//! cc.embedLabelDelta(L_Mul, L_Table, 4);
|
||||
//! cc.embedLabelDelta(L_Div, L_Table, 4);
|
||||
//! }
|
||||
//! ```
|
||||
class ASMJIT_VIRTAPI Compiler
|
||||
: public BaseCompiler,
|
||||
public EmitterExplicitT<Compiler> {
|
||||
public:
|
||||
ASMJIT_NONCOPYABLE(Compiler)
|
||||
typedef BaseCompiler Base;
|
||||
|
||||
//! \name Construction & Destruction
|
||||
//! \{
|
||||
|
||||
ASMJIT_API explicit Compiler(CodeHolder* code = nullptr) noexcept;
|
||||
ASMJIT_API virtual ~Compiler() noexcept;
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Virtual Registers
|
||||
//! \{
|
||||
|
||||
#ifndef ASMJIT_NO_LOGGING
|
||||
# define ASMJIT_NEW_REG_FMT(OUT, PARAM, FORMAT, ARGS) \
|
||||
_newRegFmt(&OUT, PARAM, FORMAT, ARGS)
|
||||
#else
|
||||
# define ASMJIT_NEW_REG_FMT(OUT, PARAM, FORMAT, ARGS) \
|
||||
DebugUtils::unused(FORMAT); \
|
||||
DebugUtils::unused(std::forward<Args>(args)...); \
|
||||
_newReg(&OUT, PARAM)
|
||||
#endif
|
||||
|
||||
#define ASMJIT_NEW_REG_CUSTOM(FUNC, REG) \
|
||||
inline REG FUNC(TypeId typeId) { \
|
||||
REG reg(Globals::NoInit); \
|
||||
_newReg(®, typeId); \
|
||||
return reg; \
|
||||
} \
|
||||
\
|
||||
template<typename... Args> \
|
||||
inline REG FUNC(TypeId typeId, const char* fmt, Args&&... args) { \
|
||||
REG reg(Globals::NoInit); \
|
||||
ASMJIT_NEW_REG_FMT(reg, typeId, fmt, std::forward<Args>(args)...); \
|
||||
return reg; \
|
||||
}
|
||||
|
||||
#define ASMJIT_NEW_REG_TYPED(FUNC, REG, TYPE_ID) \
|
||||
inline REG FUNC() { \
|
||||
REG reg(Globals::NoInit); \
|
||||
_newReg(®, TYPE_ID); \
|
||||
return reg; \
|
||||
} \
|
||||
\
|
||||
template<typename... Args> \
|
||||
inline REG FUNC(const char* fmt, Args&&... args) { \
|
||||
REG reg(Globals::NoInit); \
|
||||
ASMJIT_NEW_REG_FMT(reg, TYPE_ID, fmt, std::forward<Args>(args)...); \
|
||||
return reg; \
|
||||
}
|
||||
|
||||
template<typename RegT>
|
||||
inline RegT newSimilarReg(const RegT& ref) {
|
||||
RegT reg(Globals::NoInit);
|
||||
_newReg(reg, ref);
|
||||
return reg;
|
||||
}
|
||||
|
||||
template<typename RegT, typename... Args>
|
||||
inline RegT newSimilarReg(const RegT& ref, const char* fmt, Args&&... args) {
|
||||
RegT reg(Globals::NoInit);
|
||||
ASMJIT_NEW_REG_FMT(reg, ref, fmt, std::forward<Args>(args)...);
|
||||
return reg;
|
||||
}
|
||||
|
||||
ASMJIT_NEW_REG_CUSTOM(newReg , Reg )
|
||||
ASMJIT_NEW_REG_CUSTOM(newGp , Gp )
|
||||
ASMJIT_NEW_REG_CUSTOM(newVec , Vec )
|
||||
ASMJIT_NEW_REG_CUSTOM(newK , KReg)
|
||||
|
||||
ASMJIT_NEW_REG_TYPED(newInt8 , Gp , TypeId::kInt8)
|
||||
ASMJIT_NEW_REG_TYPED(newUInt8 , Gp , TypeId::kUInt8)
|
||||
ASMJIT_NEW_REG_TYPED(newInt16 , Gp , TypeId::kInt16)
|
||||
ASMJIT_NEW_REG_TYPED(newUInt16 , Gp , TypeId::kUInt16)
|
||||
ASMJIT_NEW_REG_TYPED(newInt32 , Gp , TypeId::kInt32)
|
||||
ASMJIT_NEW_REG_TYPED(newUInt32 , Gp , TypeId::kUInt32)
|
||||
ASMJIT_NEW_REG_TYPED(newInt64 , Gp , TypeId::kInt64)
|
||||
ASMJIT_NEW_REG_TYPED(newUInt64 , Gp , TypeId::kUInt64)
|
||||
ASMJIT_NEW_REG_TYPED(newIntPtr , Gp , TypeId::kIntPtr)
|
||||
ASMJIT_NEW_REG_TYPED(newUIntPtr, Gp , TypeId::kUIntPtr)
|
||||
|
||||
ASMJIT_NEW_REG_TYPED(newGpb , Gp , TypeId::kUInt8)
|
||||
ASMJIT_NEW_REG_TYPED(newGpw , Gp , TypeId::kUInt16)
|
||||
ASMJIT_NEW_REG_TYPED(newGpd , Gp , TypeId::kUInt32)
|
||||
ASMJIT_NEW_REG_TYPED(newGpq , Gp , TypeId::kUInt64)
|
||||
ASMJIT_NEW_REG_TYPED(newGpz , Gp , TypeId::kUIntPtr)
|
||||
ASMJIT_NEW_REG_TYPED(newXmm , Xmm , TypeId::kInt32x4)
|
||||
ASMJIT_NEW_REG_TYPED(newXmmSs , Xmm , TypeId::kFloat32x1)
|
||||
ASMJIT_NEW_REG_TYPED(newXmmSd , Xmm , TypeId::kFloat64x1)
|
||||
ASMJIT_NEW_REG_TYPED(newXmmPs , Xmm , TypeId::kFloat32x4)
|
||||
ASMJIT_NEW_REG_TYPED(newXmmPd , Xmm , TypeId::kFloat64x2)
|
||||
ASMJIT_NEW_REG_TYPED(newYmm , Ymm , TypeId::kInt32x8)
|
||||
ASMJIT_NEW_REG_TYPED(newYmmPs , Ymm , TypeId::kFloat32x8)
|
||||
ASMJIT_NEW_REG_TYPED(newYmmPd , Ymm , TypeId::kFloat64x4)
|
||||
ASMJIT_NEW_REG_TYPED(newZmm , Zmm , TypeId::kInt32x16)
|
||||
ASMJIT_NEW_REG_TYPED(newZmmPs , Zmm , TypeId::kFloat32x16)
|
||||
ASMJIT_NEW_REG_TYPED(newZmmPd , Zmm , TypeId::kFloat64x8)
|
||||
ASMJIT_NEW_REG_TYPED(newMm , Mm , TypeId::kMmx64)
|
||||
ASMJIT_NEW_REG_TYPED(newKb , KReg, TypeId::kMask8)
|
||||
ASMJIT_NEW_REG_TYPED(newKw , KReg, TypeId::kMask16)
|
||||
ASMJIT_NEW_REG_TYPED(newKd , KReg, TypeId::kMask32)
|
||||
ASMJIT_NEW_REG_TYPED(newKq , KReg, TypeId::kMask64)
|
||||
|
||||
#undef ASMJIT_NEW_REG_TYPED
|
||||
#undef ASMJIT_NEW_REG_CUSTOM
|
||||
#undef ASMJIT_NEW_REG_FMT
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Stack
|
||||
//! \{
|
||||
|
||||
//! Creates a new memory chunk allocated on the current function's stack.
|
||||
inline Mem newStack(uint32_t size, uint32_t alignment, const char* name = nullptr) {
|
||||
Mem m(Globals::NoInit);
|
||||
_newStack(&m, size, alignment, name);
|
||||
return m;
|
||||
}
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Constants
|
||||
//! \{
|
||||
|
||||
//! Put data to a constant-pool and get a memory reference to it.
|
||||
inline Mem newConst(ConstPoolScope scope, const void* data, size_t size) {
|
||||
Mem m(Globals::NoInit);
|
||||
_newConst(&m, scope, data, size);
|
||||
return m;
|
||||
}
|
||||
|
||||
//! Put a BYTE `val` to a constant-pool.
|
||||
inline Mem newByteConst(ConstPoolScope scope, uint8_t val) noexcept { return newConst(scope, &val, 1); }
|
||||
//! Put a WORD `val` to a constant-pool.
|
||||
inline Mem newWordConst(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
|
||||
//! Put a DWORD `val` to a constant-pool.
|
||||
inline Mem newDWordConst(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
|
||||
//! Put a QWORD `val` to a constant-pool.
|
||||
inline Mem newQWordConst(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
|
||||
|
||||
//! Put a WORD `val` to a constant-pool.
|
||||
inline Mem newInt16Const(ConstPoolScope scope, int16_t val) noexcept { return newConst(scope, &val, 2); }
|
||||
//! Put a WORD `val` to a constant-pool.
|
||||
inline Mem newUInt16Const(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
|
||||
//! Put a DWORD `val` to a constant-pool.
|
||||
inline Mem newInt32Const(ConstPoolScope scope, int32_t val) noexcept { return newConst(scope, &val, 4); }
|
||||
//! Put a DWORD `val` to a constant-pool.
|
||||
inline Mem newUInt32Const(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
|
||||
//! Put a QWORD `val` to a constant-pool.
|
||||
inline Mem newInt64Const(ConstPoolScope scope, int64_t val) noexcept { return newConst(scope, &val, 8); }
|
||||
//! Put a QWORD `val` to a constant-pool.
|
||||
inline Mem newUInt64Const(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
|
||||
|
||||
//! Put a SP-FP `val` to a constant-pool.
|
||||
inline Mem newFloatConst(ConstPoolScope scope, float val) noexcept { return newConst(scope, &val, 4); }
|
||||
//! Put a DP-FP `val` to a constant-pool.
|
||||
inline Mem newDoubleConst(ConstPoolScope scope, double val) noexcept { return newConst(scope, &val, 8); }
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Instruction Options
|
||||
//! \{
|
||||
|
||||
//! Force the compiler to not follow the conditional or unconditional jump.
|
||||
inline Compiler& unfollow() noexcept { addInstOptions(InstOptions::kUnfollow); return *this; }
|
||||
//! Tell the compiler that the destination variable will be overwritten.
|
||||
inline Compiler& overwrite() noexcept { addInstOptions(InstOptions::kOverwrite); return *this; }
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Function Call & Ret Intrinsics
|
||||
//! \{
|
||||
|
||||
//! Invoke a function call without `target` type enforcement.
|
||||
inline Error invoke_(InvokeNode** out, const Operand_& target, const FuncSignature& signature) {
|
||||
return addInvokeNode(out, Inst::kIdCall, target, signature);
|
||||
}
|
||||
|
||||
//! Invoke a function call of the given `target` and `signature` and store the added node to `out`.
|
||||
//!
|
||||
//! Creates a new \ref InvokeNode, initializes all the necessary members to match the given function `signature`,
|
||||
//! adds the node to the compiler, and stores its pointer to `out`. The operation is atomic, if anything fails
|
||||
//! nullptr is stored in `out` and error code is returned.
|
||||
inline Error invoke(InvokeNode** out, const Gp& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
|
||||
//! \overload
|
||||
inline Error invoke(InvokeNode** out, const Mem& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
|
||||
//! \overload
|
||||
inline Error invoke(InvokeNode** out, const Label& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
|
||||
//! \overload
|
||||
inline Error invoke(InvokeNode** out, const Imm& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
|
||||
//! \overload
|
||||
inline Error invoke(InvokeNode** out, uint64_t target, const FuncSignature& signature) { return invoke_(out, Imm(int64_t(target)), signature); }
|
||||
|
||||
//! Return from function.
|
||||
inline Error ret() { return addRet(Operand(), Operand()); }
|
||||
//! \overload
|
||||
inline Error ret(const BaseReg& o0) { return addRet(o0, Operand()); }
|
||||
//! \overload
|
||||
inline Error ret(const BaseReg& o0, const BaseReg& o1) { return addRet(o0, o1); }
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Jump Tables Support
|
||||
//! \{
|
||||
|
||||
using EmitterExplicitT<Compiler>::jmp;
|
||||
|
||||
//! Adds a jump to the given `target` with the provided jump `annotation`.
|
||||
inline Error jmp(const BaseReg& target, JumpAnnotation* annotation) { return emitAnnotatedJump(Inst::kIdJmp, target, annotation); }
|
||||
//! \overload
|
||||
inline Error jmp(const BaseMem& target, JumpAnnotation* annotation) { return emitAnnotatedJump(Inst::kIdJmp, target, annotation); }
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Events
|
||||
//! \{
|
||||
|
||||
ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
|
||||
ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Finalize
|
||||
//! \{
|
||||
|
||||
ASMJIT_API Error finalize() override;
|
||||
|
||||
//! \}
|
||||
};
|
||||
|
||||
//! \}
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // !ASMJIT_NO_COMPILER
|
||||
#endif // ASMJIT_X86_X86COMPILER_H_INCLUDED
|
||||
619
lib/lepton/asmjit/x86/x86emithelper.cpp
Normal file
619
lib/lepton/asmjit/x86/x86emithelper.cpp
Normal file
@ -0,0 +1,619 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#include "../core/api-build_p.h"
|
||||
#if !defined(ASMJIT_NO_X86)
|
||||
|
||||
#include "../core/formatter.h"
|
||||
#include "../core/funcargscontext_p.h"
|
||||
#include "../core/string.h"
|
||||
#include "../core/support.h"
|
||||
#include "../core/type.h"
|
||||
#include "../core/radefs_p.h"
|
||||
#include "../x86/x86emithelper_p.h"
|
||||
#include "../x86/x86emitter.h"
|
||||
#include "../x86/x86formatter_p.h"
|
||||
#include "../x86/x86instapi_p.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
// x86::EmitHelper - Utilities
|
||||
// ===========================
|
||||
|
||||
static inline uint32_t getXmmMovInst(const FuncFrame& frame) {
|
||||
bool avx = frame.isAvxEnabled();
|
||||
bool aligned = frame.hasAlignedVecSR();
|
||||
|
||||
return aligned ? (avx ? Inst::kIdVmovaps : Inst::kIdMovaps)
|
||||
: (avx ? Inst::kIdVmovups : Inst::kIdMovups);
|
||||
}
|
||||
|
||||
//! Converts `size` to a 'kmov?' instruction.
|
||||
static inline uint32_t kmovInstFromSize(uint32_t size) noexcept {
|
||||
switch (size) {
|
||||
case 1: return Inst::kIdKmovb;
|
||||
case 2: return Inst::kIdKmovw;
|
||||
case 4: return Inst::kIdKmovd;
|
||||
case 8: return Inst::kIdKmovq;
|
||||
default: return Inst::kIdNone;
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint32_t makeCastOp(TypeId dst, TypeId src) noexcept {
|
||||
return (uint32_t(dst) << 8) | uint32_t(src);
|
||||
}
|
||||
|
||||
// x86::EmitHelper - Emit Reg Move
|
||||
// ===============================
|
||||
|
||||
ASMJIT_FAVOR_SIZE Error EmitHelper::emitRegMove(
|
||||
const Operand_& dst_,
|
||||
const Operand_& src_, TypeId typeId, const char* comment) {
|
||||
|
||||
// Invalid or abstract TypeIds are not allowed.
|
||||
ASMJIT_ASSERT(TypeUtils::isValid(typeId) && !TypeUtils::isAbstract(typeId));
|
||||
|
||||
Operand dst(dst_);
|
||||
Operand src(src_);
|
||||
|
||||
InstId instId = Inst::kIdNone;
|
||||
uint32_t memFlags = 0;
|
||||
uint32_t overrideMemSize = 0;
|
||||
|
||||
enum MemFlags : uint32_t {
|
||||
kDstMem = 0x1,
|
||||
kSrcMem = 0x2
|
||||
};
|
||||
|
||||
// Detect memory operands and patch them to have the same size as the register. BaseCompiler always sets memory size
|
||||
// of allocs and spills, so it shouldn't be really necessary, however, after this function was separated from Compiler
|
||||
// it's better to make sure that the size is always specified, as we can use 'movzx' and 'movsx' that rely on it.
|
||||
if (dst.isMem()) { memFlags |= kDstMem; dst.as<Mem>().setSize(src.size()); }
|
||||
if (src.isMem()) { memFlags |= kSrcMem; src.as<Mem>().setSize(dst.size()); }
|
||||
|
||||
switch (typeId) {
|
||||
case TypeId::kInt8:
|
||||
case TypeId::kUInt8:
|
||||
case TypeId::kInt16:
|
||||
case TypeId::kUInt16:
|
||||
// Special case - 'movzx' load.
|
||||
if (memFlags & kSrcMem) {
|
||||
instId = Inst::kIdMovzx;
|
||||
dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
|
||||
break;
|
||||
}
|
||||
|
||||
if (!memFlags) {
|
||||
// Change both destination and source registers to GPD (safer, no dependencies).
|
||||
dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
|
||||
src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
|
||||
}
|
||||
ASMJIT_FALLTHROUGH;
|
||||
|
||||
case TypeId::kInt32:
|
||||
case TypeId::kUInt32:
|
||||
case TypeId::kInt64:
|
||||
case TypeId::kUInt64:
|
||||
instId = Inst::kIdMov;
|
||||
break;
|
||||
|
||||
case TypeId::kMmx32:
|
||||
instId = Inst::kIdMovd;
|
||||
if (memFlags) break;
|
||||
ASMJIT_FALLTHROUGH;
|
||||
|
||||
case TypeId::kMmx64 : instId = Inst::kIdMovq ; break;
|
||||
case TypeId::kMask8 : instId = Inst::kIdKmovb; break;
|
||||
case TypeId::kMask16: instId = Inst::kIdKmovw; break;
|
||||
case TypeId::kMask32: instId = Inst::kIdKmovd; break;
|
||||
case TypeId::kMask64: instId = Inst::kIdKmovq; break;
|
||||
|
||||
default: {
|
||||
TypeId scalarTypeId = TypeUtils::scalarOf(typeId);
|
||||
if (TypeUtils::isVec32(typeId) && memFlags) {
|
||||
overrideMemSize = 4;
|
||||
if (scalarTypeId == TypeId::kFloat32)
|
||||
instId = _avxEnabled ? Inst::kIdVmovss : Inst::kIdMovss;
|
||||
else
|
||||
instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
|
||||
break;
|
||||
}
|
||||
|
||||
if (TypeUtils::isVec64(typeId) && memFlags) {
|
||||
overrideMemSize = 8;
|
||||
if (scalarTypeId == TypeId::kFloat64)
|
||||
instId = _avxEnabled ? Inst::kIdVmovsd : Inst::kIdMovsd;
|
||||
else
|
||||
instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
|
||||
break;
|
||||
}
|
||||
|
||||
if (scalarTypeId == TypeId::kFloat32)
|
||||
instId = _avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
|
||||
else if (scalarTypeId == TypeId::kFloat64)
|
||||
instId = _avxEnabled ? Inst::kIdVmovapd : Inst::kIdMovapd;
|
||||
else if (!_avx512Enabled)
|
||||
instId = _avxEnabled ? Inst::kIdVmovdqa : Inst::kIdMovdqa;
|
||||
else
|
||||
instId = Inst::kIdVmovdqa32;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!instId)
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
|
||||
if (overrideMemSize) {
|
||||
if (dst.isMem()) dst.as<Mem>().setSize(overrideMemSize);
|
||||
if (src.isMem()) src.as<Mem>().setSize(overrideMemSize);
|
||||
}
|
||||
|
||||
_emitter->setInlineComment(comment);
|
||||
return _emitter->emit(instId, dst, src);
|
||||
}
|
||||
|
||||
// x86::EmitHelper - Emit Arg Move
|
||||
// ===============================
|
||||
|
||||
ASMJIT_FAVOR_SIZE Error EmitHelper::emitArgMove(
|
||||
const BaseReg& dst_, TypeId dstTypeId,
|
||||
const Operand_& src_, TypeId srcTypeId, const char* comment) {
|
||||
|
||||
// Deduce optional `dstTypeId`, which may be `TypeId::kVoid` in some cases.
|
||||
if (dstTypeId == TypeId::kVoid) {
|
||||
const ArchTraits& archTraits = ArchTraits::byArch(_emitter->arch());
|
||||
dstTypeId = archTraits.regTypeToTypeId(dst_.type());
|
||||
}
|
||||
|
||||
// Invalid or abstract TypeIds are not allowed.
|
||||
ASMJIT_ASSERT(TypeUtils::isValid(dstTypeId) && !TypeUtils::isAbstract(dstTypeId));
|
||||
ASMJIT_ASSERT(TypeUtils::isValid(srcTypeId) && !TypeUtils::isAbstract(srcTypeId));
|
||||
|
||||
Reg dst(dst_.as<Reg>());
|
||||
Operand src(src_);
|
||||
|
||||
uint32_t dstSize = TypeUtils::sizeOf(dstTypeId);
|
||||
uint32_t srcSize = TypeUtils::sizeOf(srcTypeId);
|
||||
|
||||
InstId instId = Inst::kIdNone;
|
||||
|
||||
// Not a real loop, just 'break' is nicer than 'goto'.
|
||||
for (;;) {
|
||||
if (TypeUtils::isInt(dstTypeId)) {
|
||||
if (TypeUtils::isInt(srcTypeId)) {
|
||||
instId = Inst::kIdMovsx;
|
||||
uint32_t castOp = makeCastOp(dstTypeId, srcTypeId);
|
||||
|
||||
// Sign extend by using 'movsx'.
|
||||
if (castOp == makeCastOp(TypeId::kInt16, TypeId::kInt8 ) ||
|
||||
castOp == makeCastOp(TypeId::kInt32, TypeId::kInt8 ) ||
|
||||
castOp == makeCastOp(TypeId::kInt32, TypeId::kInt16) ||
|
||||
castOp == makeCastOp(TypeId::kInt64, TypeId::kInt8 ) ||
|
||||
castOp == makeCastOp(TypeId::kInt64, TypeId::kInt16))
|
||||
break;
|
||||
|
||||
// Sign extend by using 'movsxd'.
|
||||
instId = Inst::kIdMovsxd;
|
||||
if (castOp == makeCastOp(TypeId::kInt64, TypeId::kInt32))
|
||||
break;
|
||||
}
|
||||
|
||||
if (TypeUtils::isInt(srcTypeId) || src_.isMem()) {
|
||||
// Zero extend by using 'movzx' or 'mov'.
|
||||
if (dstSize <= 4 && srcSize < 4) {
|
||||
instId = Inst::kIdMovzx;
|
||||
dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
|
||||
}
|
||||
else {
|
||||
// We should have caught all possibilities where `srcSize` is less than 4, so we don't have to worry
|
||||
// about 'movzx' anymore. Minimum size is enough to determine if we want 32-bit or 64-bit move.
|
||||
instId = Inst::kIdMov;
|
||||
srcSize = Support::min(srcSize, dstSize);
|
||||
|
||||
dst.setSignature(srcSize == 4 ? Reg::signatureOfT<RegType::kX86_Gpd>()
|
||||
: Reg::signatureOfT<RegType::kX86_Gpq>());
|
||||
if (src.isReg())
|
||||
src.setSignature(dst.signature());
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// NOTE: The previous branch caught all memory sources, from here it's always register to register conversion,
|
||||
// so catch the remaining cases.
|
||||
srcSize = Support::min(srcSize, dstSize);
|
||||
|
||||
if (TypeUtils::isMmx(srcTypeId)) {
|
||||
// 64-bit move.
|
||||
instId = Inst::kIdMovq;
|
||||
if (srcSize == 8)
|
||||
break;
|
||||
|
||||
// 32-bit move.
|
||||
instId = Inst::kIdMovd;
|
||||
dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
|
||||
break;
|
||||
}
|
||||
|
||||
if (TypeUtils::isMask(srcTypeId)) {
|
||||
instId = kmovInstFromSize(srcSize);
|
||||
dst.setSignature(srcSize <= 4 ? Reg::signatureOfT<RegType::kX86_Gpd>()
|
||||
: Reg::signatureOfT<RegType::kX86_Gpq>());
|
||||
break;
|
||||
}
|
||||
|
||||
if (TypeUtils::isVec(srcTypeId)) {
|
||||
// 64-bit move.
|
||||
instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
|
||||
if (srcSize == 8)
|
||||
break;
|
||||
|
||||
// 32-bit move.
|
||||
instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
|
||||
dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (TypeUtils::isMmx(dstTypeId)) {
|
||||
instId = Inst::kIdMovq;
|
||||
srcSize = Support::min(srcSize, dstSize);
|
||||
|
||||
if (TypeUtils::isInt(srcTypeId) || src.isMem()) {
|
||||
// 64-bit move.
|
||||
if (srcSize == 8)
|
||||
break;
|
||||
|
||||
// 32-bit move.
|
||||
instId = Inst::kIdMovd;
|
||||
if (src.isReg())
|
||||
src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
|
||||
break;
|
||||
}
|
||||
|
||||
if (TypeUtils::isMmx(srcTypeId))
|
||||
break;
|
||||
|
||||
// This will hurt if AVX is enabled.
|
||||
instId = Inst::kIdMovdq2q;
|
||||
if (TypeUtils::isVec(srcTypeId))
|
||||
break;
|
||||
}
|
||||
|
||||
if (TypeUtils::isMask(dstTypeId)) {
|
||||
srcSize = Support::min(srcSize, dstSize);
|
||||
|
||||
if (TypeUtils::isInt(srcTypeId) || TypeUtils::isMask(srcTypeId) || src.isMem()) {
|
||||
instId = kmovInstFromSize(srcSize);
|
||||
if (Reg::isGp(src) && srcSize <= 4)
|
||||
src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (TypeUtils::isVec(dstTypeId)) {
|
||||
// By default set destination to XMM, will be set to YMM|ZMM if needed.
|
||||
dst.setSignature(Reg::signatureOfT<RegType::kX86_Xmm>());
|
||||
|
||||
// This will hurt if AVX is enabled.
|
||||
if (Reg::isMm(src)) {
|
||||
// 64-bit move.
|
||||
instId = Inst::kIdMovq2dq;
|
||||
break;
|
||||
}
|
||||
|
||||
// Argument conversion.
|
||||
TypeId dstScalarId = TypeUtils::scalarOf(dstTypeId);
|
||||
TypeId srcScalarId = TypeUtils::scalarOf(srcTypeId);
|
||||
|
||||
if (dstScalarId == TypeId::kFloat32 && srcScalarId == TypeId::kFloat64) {
|
||||
srcSize = Support::min(dstSize * 2, srcSize);
|
||||
dstSize = srcSize / 2;
|
||||
|
||||
if (srcSize <= 8)
|
||||
instId = _avxEnabled ? Inst::kIdVcvtss2sd : Inst::kIdCvtss2sd;
|
||||
else
|
||||
instId = _avxEnabled ? Inst::kIdVcvtps2pd : Inst::kIdCvtps2pd;
|
||||
|
||||
if (dstSize == 32)
|
||||
dst.setSignature(Reg::signatureOfT<RegType::kX86_Ymm>());
|
||||
if (src.isReg())
|
||||
src.setSignature(Reg::signatureOfVecBySize(srcSize));
|
||||
break;
|
||||
}
|
||||
|
||||
if (dstScalarId == TypeId::kFloat64 && srcScalarId == TypeId::kFloat32) {
|
||||
srcSize = Support::min(dstSize, srcSize * 2) / 2;
|
||||
dstSize = srcSize * 2;
|
||||
|
||||
if (srcSize <= 4)
|
||||
instId = _avxEnabled ? Inst::kIdVcvtsd2ss : Inst::kIdCvtsd2ss;
|
||||
else
|
||||
instId = _avxEnabled ? Inst::kIdVcvtpd2ps : Inst::kIdCvtpd2ps;
|
||||
|
||||
dst.setSignature(Reg::signatureOfVecBySize(dstSize));
|
||||
if (src.isReg() && srcSize >= 32)
|
||||
src.setSignature(Reg::signatureOfT<RegType::kX86_Ymm>());
|
||||
break;
|
||||
}
|
||||
|
||||
srcSize = Support::min(srcSize, dstSize);
|
||||
if (Reg::isGp(src) || src.isMem()) {
|
||||
// 32-bit move.
|
||||
if (srcSize <= 4) {
|
||||
instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
|
||||
if (src.isReg())
|
||||
src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
|
||||
break;
|
||||
}
|
||||
|
||||
// 64-bit move.
|
||||
if (srcSize == 8) {
|
||||
instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (Reg::isVec(src) || src.isMem()) {
|
||||
instId = _avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
|
||||
|
||||
if (src.isMem() && srcSize < _emitter->environment().stackAlignment())
|
||||
instId = _avxEnabled ? Inst::kIdVmovups : Inst::kIdMovups;
|
||||
|
||||
OperandSignature signature = Reg::signatureOfVecBySize(srcSize);
|
||||
dst.setSignature(signature);
|
||||
if (src.isReg())
|
||||
src.setSignature(signature);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
}
|
||||
|
||||
if (src.isMem())
|
||||
src.as<Mem>().setSize(srcSize);
|
||||
|
||||
_emitter->setInlineComment(comment);
|
||||
return _emitter->emit(instId, dst, src);
|
||||
}
|
||||
|
||||
Error EmitHelper::emitRegSwap(
|
||||
const BaseReg& a,
|
||||
const BaseReg& b, const char* comment) {
|
||||
|
||||
if (a.isGp() && b.isGp()) {
|
||||
_emitter->setInlineComment(comment);
|
||||
return _emitter->emit(Inst::kIdXchg, a, b);
|
||||
}
|
||||
else
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
}
|
||||
|
||||
// x86::EmitHelper - Emit Prolog & Epilog
|
||||
// ======================================
|
||||
|
||||
static inline void X86Internal_setupSaveRestoreInfo(RegGroup group, const FuncFrame& frame, Reg& xReg, uint32_t& xInst, uint32_t& xSize) noexcept {
|
||||
switch (group) {
|
||||
case RegGroup::kVec:
|
||||
xReg = xmm(0);
|
||||
xInst = getXmmMovInst(frame);
|
||||
xSize = xReg.size();
|
||||
break;
|
||||
case RegGroup::kX86_K:
|
||||
xReg = k(0);
|
||||
xInst = Inst::kIdKmovq;
|
||||
xSize = xReg.size();
|
||||
break;
|
||||
case RegGroup::kX86_MM:
|
||||
xReg = mm(0);
|
||||
xInst = Inst::kIdMovq;
|
||||
xSize = xReg.size();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ASMJIT_FAVOR_SIZE Error EmitHelper::emitProlog(const FuncFrame& frame) {
|
||||
Emitter* emitter = _emitter->as<Emitter>();
|
||||
uint32_t gpSaved = frame.savedRegs(RegGroup::kGp);
|
||||
|
||||
Gp zsp = emitter->zsp(); // ESP|RSP register.
|
||||
Gp zbp = emitter->zbp(); // EBP|RBP register.
|
||||
Gp gpReg = zsp; // General purpose register (temporary).
|
||||
Gp saReg = zsp; // Stack-arguments base pointer.
|
||||
|
||||
// Emit: 'push zbp'
|
||||
// 'mov zbp, zsp'.
|
||||
if (frame.hasPreservedFP()) {
|
||||
gpSaved &= ~Support::bitMask(Gp::kIdBp);
|
||||
ASMJIT_PROPAGATE(emitter->push(zbp));
|
||||
ASMJIT_PROPAGATE(emitter->mov(zbp, zsp));
|
||||
}
|
||||
|
||||
// Emit: 'push gp' sequence.
|
||||
{
|
||||
Support::BitWordIterator<RegMask> it(gpSaved);
|
||||
while (it.hasNext()) {
|
||||
gpReg.setId(it.next());
|
||||
ASMJIT_PROPAGATE(emitter->push(gpReg));
|
||||
}
|
||||
}
|
||||
|
||||
// Emit: 'mov saReg, zsp'.
|
||||
uint32_t saRegId = frame.saRegId();
|
||||
if (saRegId != BaseReg::kIdBad && saRegId != Gp::kIdSp) {
|
||||
saReg.setId(saRegId);
|
||||
if (frame.hasPreservedFP()) {
|
||||
if (saRegId != Gp::kIdBp)
|
||||
ASMJIT_PROPAGATE(emitter->mov(saReg, zbp));
|
||||
}
|
||||
else {
|
||||
ASMJIT_PROPAGATE(emitter->mov(saReg, zsp));
|
||||
}
|
||||
}
|
||||
|
||||
// Emit: 'and zsp, StackAlignment'.
|
||||
if (frame.hasDynamicAlignment()) {
|
||||
ASMJIT_PROPAGATE(emitter->and_(zsp, -int32_t(frame.finalStackAlignment())));
|
||||
}
|
||||
|
||||
// Emit: 'sub zsp, StackAdjustment'.
|
||||
if (frame.hasStackAdjustment()) {
|
||||
ASMJIT_PROPAGATE(emitter->sub(zsp, frame.stackAdjustment()));
|
||||
}
|
||||
|
||||
// Emit: 'mov [zsp + DAOffset], saReg'.
|
||||
if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
|
||||
Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
|
||||
ASMJIT_PROPAGATE(emitter->mov(saMem, saReg));
|
||||
}
|
||||
|
||||
// Emit 'movxxx [zsp + X], {[x|y|z]mm, k}'.
|
||||
{
|
||||
Reg xReg;
|
||||
Mem xBase = ptr(zsp, int32_t(frame.extraRegSaveOffset()));
|
||||
|
||||
uint32_t xInst;
|
||||
uint32_t xSize;
|
||||
|
||||
for (RegGroup group : Support::EnumValues<RegGroup, RegGroup(1), RegGroup::kMaxVirt>{}) {
|
||||
Support::BitWordIterator<RegMask> it(frame.savedRegs(group));
|
||||
if (it.hasNext()) {
|
||||
X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
|
||||
do {
|
||||
xReg.setId(it.next());
|
||||
ASMJIT_PROPAGATE(emitter->emit(xInst, xBase, xReg));
|
||||
xBase.addOffsetLo32(int32_t(xSize));
|
||||
} while (it.hasNext());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
ASMJIT_FAVOR_SIZE Error EmitHelper::emitEpilog(const FuncFrame& frame) {
|
||||
Emitter* emitter = _emitter->as<Emitter>();
|
||||
|
||||
uint32_t i;
|
||||
uint32_t regId;
|
||||
|
||||
uint32_t registerSize = emitter->registerSize();
|
||||
uint32_t gpSaved = frame.savedRegs(RegGroup::kGp);
|
||||
|
||||
Gp zsp = emitter->zsp(); // ESP|RSP register.
|
||||
Gp zbp = emitter->zbp(); // EBP|RBP register.
|
||||
Gp gpReg = emitter->zsp(); // General purpose register (temporary).
|
||||
|
||||
// Don't emit 'pop zbp' in the pop sequence, this case is handled separately.
|
||||
if (frame.hasPreservedFP())
|
||||
gpSaved &= ~Support::bitMask(Gp::kIdBp);
|
||||
|
||||
// Emit 'movxxx {[x|y|z]mm, k}, [zsp + X]'.
|
||||
{
|
||||
Reg xReg;
|
||||
Mem xBase = ptr(zsp, int32_t(frame.extraRegSaveOffset()));
|
||||
|
||||
uint32_t xInst;
|
||||
uint32_t xSize;
|
||||
|
||||
for (RegGroup group : Support::EnumValues<RegGroup, RegGroup(1), RegGroup::kMaxVirt>{}) {
|
||||
Support::BitWordIterator<RegMask> it(frame.savedRegs(group));
|
||||
if (it.hasNext()) {
|
||||
X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
|
||||
do {
|
||||
xReg.setId(it.next());
|
||||
ASMJIT_PROPAGATE(emitter->emit(xInst, xReg, xBase));
|
||||
xBase.addOffsetLo32(int32_t(xSize));
|
||||
} while (it.hasNext());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Emit 'emms' and/or 'vzeroupper'.
|
||||
if (frame.hasMmxCleanup()) ASMJIT_PROPAGATE(emitter->emms());
|
||||
if (frame.hasAvxCleanup()) ASMJIT_PROPAGATE(emitter->vzeroupper());
|
||||
|
||||
if (frame.hasPreservedFP()) {
|
||||
// Emit 'mov zsp, zbp' or 'lea zsp, [zbp - x]'
|
||||
int32_t count = int32_t(frame.pushPopSaveSize() - registerSize);
|
||||
if (!count)
|
||||
ASMJIT_PROPAGATE(emitter->mov(zsp, zbp));
|
||||
else
|
||||
ASMJIT_PROPAGATE(emitter->lea(zsp, ptr(zbp, -count)));
|
||||
}
|
||||
else {
|
||||
if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
|
||||
// Emit 'mov zsp, [zsp + DsaSlot]'.
|
||||
Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
|
||||
ASMJIT_PROPAGATE(emitter->mov(zsp, saMem));
|
||||
}
|
||||
else if (frame.hasStackAdjustment()) {
|
||||
// Emit 'add zsp, StackAdjustment'.
|
||||
ASMJIT_PROPAGATE(emitter->add(zsp, int32_t(frame.stackAdjustment())));
|
||||
}
|
||||
}
|
||||
|
||||
// Emit 'pop gp' sequence.
|
||||
if (gpSaved) {
|
||||
i = gpSaved;
|
||||
regId = 16;
|
||||
|
||||
do {
|
||||
regId--;
|
||||
if (i & 0x8000) {
|
||||
gpReg.setId(regId);
|
||||
ASMJIT_PROPAGATE(emitter->pop(gpReg));
|
||||
}
|
||||
i <<= 1;
|
||||
} while (regId != 0);
|
||||
}
|
||||
|
||||
// Emit 'pop zbp'.
|
||||
if (frame.hasPreservedFP())
|
||||
ASMJIT_PROPAGATE(emitter->pop(zbp));
|
||||
|
||||
// Emit 'ret' or 'ret x'.
|
||||
if (frame.hasCalleeStackCleanup())
|
||||
ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet, int(frame.calleeStackCleanup())));
|
||||
else
|
||||
ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet));
|
||||
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
static Error ASMJIT_CDECL Emitter_emitProlog(BaseEmitter* emitter, const FuncFrame& frame) {
|
||||
EmitHelper emitHelper(emitter, frame.isAvxEnabled(), frame.isAvx512Enabled());
|
||||
return emitHelper.emitProlog(frame);
|
||||
}
|
||||
|
||||
static Error ASMJIT_CDECL Emitter_emitEpilog(BaseEmitter* emitter, const FuncFrame& frame) {
|
||||
EmitHelper emitHelper(emitter, frame.isAvxEnabled(), frame.isAvx512Enabled());
|
||||
return emitHelper.emitEpilog(frame);
|
||||
}
|
||||
|
||||
static Error ASMJIT_CDECL Emitter_emitArgsAssignment(BaseEmitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args) {
|
||||
EmitHelper emitHelper(emitter, frame.isAvxEnabled(), frame.isAvx512Enabled());
|
||||
return emitHelper.emitArgsAssignment(frame, args);
|
||||
}
|
||||
|
||||
void assignEmitterFuncs(BaseEmitter* emitter) {
|
||||
emitter->_funcs.emitProlog = Emitter_emitProlog;
|
||||
emitter->_funcs.emitEpilog = Emitter_emitEpilog;
|
||||
emitter->_funcs.emitArgsAssignment = Emitter_emitArgsAssignment;
|
||||
|
||||
#ifndef ASMJIT_NO_LOGGING
|
||||
emitter->_funcs.formatInstruction = FormatterInternal::formatInstruction;
|
||||
#endif
|
||||
|
||||
#ifndef ASMJIT_NO_VALIDATION
|
||||
emitter->_funcs.validate = InstInternal::validate;
|
||||
#endif
|
||||
}
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // !ASMJIT_NO_X86
|
||||
60
lib/lepton/asmjit/x86/x86emithelper_p.h
Normal file
60
lib/lepton/asmjit/x86/x86emithelper_p.h
Normal file
@ -0,0 +1,60 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#ifndef ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED
|
||||
#define ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED
|
||||
|
||||
#include "../core/api-config.h"
|
||||
|
||||
#include "../core/emithelper_p.h"
|
||||
#include "../core/func.h"
|
||||
#include "../x86/x86emitter.h"
|
||||
#include "../x86/x86operand.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
//! \cond INTERNAL
|
||||
//! \addtogroup asmjit_x86
|
||||
//! \{
|
||||
|
||||
static inline RegType vecTypeIdToRegType(TypeId typeId) noexcept {
|
||||
return uint32_t(typeId) <= uint32_t(TypeId::_kVec128End) ? RegType::kX86_Xmm :
|
||||
uint32_t(typeId) <= uint32_t(TypeId::_kVec256End) ? RegType::kX86_Ymm : RegType::kX86_Zmm;
|
||||
}
|
||||
|
||||
class EmitHelper : public BaseEmitHelper {
|
||||
public:
|
||||
bool _avxEnabled;
|
||||
bool _avx512Enabled;
|
||||
|
||||
inline explicit EmitHelper(BaseEmitter* emitter = nullptr, bool avxEnabled = false, bool avx512Enabled = false) noexcept
|
||||
: BaseEmitHelper(emitter),
|
||||
_avxEnabled(avxEnabled || avx512Enabled),
|
||||
_avx512Enabled(avx512Enabled) {}
|
||||
|
||||
Error emitRegMove(
|
||||
const Operand_& dst_,
|
||||
const Operand_& src_, TypeId typeId, const char* comment = nullptr) override;
|
||||
|
||||
Error emitArgMove(
|
||||
const BaseReg& dst_, TypeId dstTypeId,
|
||||
const Operand_& src_, TypeId srcTypeId, const char* comment = nullptr) override;
|
||||
|
||||
Error emitRegSwap(
|
||||
const BaseReg& a,
|
||||
const BaseReg& b, const char* comment = nullptr) override;
|
||||
|
||||
Error emitProlog(const FuncFrame& frame);
|
||||
Error emitEpilog(const FuncFrame& frame);
|
||||
};
|
||||
|
||||
void assignEmitterFuncs(BaseEmitter* emitter);
|
||||
|
||||
//! \}
|
||||
//! \endcond
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED
|
||||
4315
lib/lepton/asmjit/x86/x86emitter.h
Normal file
4315
lib/lepton/asmjit/x86/x86emitter.h
Normal file
File diff suppressed because it is too large
Load Diff
944
lib/lepton/asmjit/x86/x86formatter.cpp
Normal file
944
lib/lepton/asmjit/x86/x86formatter.cpp
Normal file
@ -0,0 +1,944 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#include "../core/api-build_p.h"
|
||||
#ifndef ASMJIT_NO_LOGGING
|
||||
|
||||
#include "../core/cpuinfo.h"
|
||||
#include "../core/misc_p.h"
|
||||
#include "../core/support.h"
|
||||
#include "../x86/x86formatter_p.h"
|
||||
#include "../x86/x86instapi_p.h"
|
||||
#include "../x86/x86instdb_p.h"
|
||||
#include "../x86/x86operand.h"
|
||||
|
||||
#ifndef ASMJIT_NO_COMPILER
|
||||
#include "../core/compiler.h"
|
||||
#endif
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
// x86::FormatterInternal - Constants
|
||||
// ==================================
|
||||
|
||||
struct RegFormatInfo {
|
||||
struct TypeEntry {
|
||||
uint8_t index;
|
||||
};
|
||||
|
||||
struct NameEntry {
|
||||
uint8_t count;
|
||||
uint8_t formatIndex;
|
||||
uint8_t specialIndex;
|
||||
uint8_t specialCount;
|
||||
};
|
||||
|
||||
TypeEntry typeEntries[uint32_t(RegType::kMaxValue) + 1];
|
||||
char typeStrings[128 - 32];
|
||||
|
||||
NameEntry nameEntries[uint32_t(RegType::kMaxValue) + 1];
|
||||
char nameStrings[280];
|
||||
};
|
||||
|
||||
template<uint32_t X>
|
||||
struct RegFormatInfo_T {
|
||||
enum {
|
||||
kTypeIndex = X == uint32_t(RegType::kX86_GpbLo) ? 1 :
|
||||
X == uint32_t(RegType::kX86_GpbHi) ? 8 :
|
||||
X == uint32_t(RegType::kX86_Gpw ) ? 15 :
|
||||
X == uint32_t(RegType::kX86_Gpd ) ? 19 :
|
||||
X == uint32_t(RegType::kX86_Gpq ) ? 23 :
|
||||
X == uint32_t(RegType::kX86_Xmm ) ? 27 :
|
||||
X == uint32_t(RegType::kX86_Ymm ) ? 31 :
|
||||
X == uint32_t(RegType::kX86_Zmm ) ? 35 :
|
||||
X == uint32_t(RegType::kX86_Mm ) ? 50 :
|
||||
X == uint32_t(RegType::kX86_KReg ) ? 53 :
|
||||
X == uint32_t(RegType::kX86_SReg ) ? 43 :
|
||||
X == uint32_t(RegType::kX86_CReg ) ? 59 :
|
||||
X == uint32_t(RegType::kX86_DReg ) ? 62 :
|
||||
X == uint32_t(RegType::kX86_St ) ? 47 :
|
||||
X == uint32_t(RegType::kX86_Bnd ) ? 55 :
|
||||
X == uint32_t(RegType::kX86_Tmm ) ? 65 :
|
||||
X == uint32_t(RegType::kX86_Rip ) ? 39 : 0,
|
||||
|
||||
kFormatIndex = X == uint32_t(RegType::kX86_GpbLo) ? 1 :
|
||||
X == uint32_t(RegType::kX86_GpbHi) ? 6 :
|
||||
X == uint32_t(RegType::kX86_Gpw ) ? 11 :
|
||||
X == uint32_t(RegType::kX86_Gpd ) ? 16 :
|
||||
X == uint32_t(RegType::kX86_Gpq ) ? 21 :
|
||||
X == uint32_t(RegType::kX86_Xmm ) ? 25 :
|
||||
X == uint32_t(RegType::kX86_Ymm ) ? 31 :
|
||||
X == uint32_t(RegType::kX86_Zmm ) ? 37 :
|
||||
X == uint32_t(RegType::kX86_Mm ) ? 60 :
|
||||
X == uint32_t(RegType::kX86_KReg ) ? 65 :
|
||||
X == uint32_t(RegType::kX86_SReg ) ? 49 :
|
||||
X == uint32_t(RegType::kX86_CReg ) ? 75 :
|
||||
X == uint32_t(RegType::kX86_DReg ) ? 80 :
|
||||
X == uint32_t(RegType::kX86_St ) ? 55 :
|
||||
X == uint32_t(RegType::kX86_Bnd ) ? 69 :
|
||||
X == uint32_t(RegType::kX86_Tmm ) ? 89 :
|
||||
X == uint32_t(RegType::kX86_Rip ) ? 43 : 0,
|
||||
|
||||
kSpecialIndex = X == uint32_t(RegType::kX86_GpbLo) ? 96 :
|
||||
X == uint32_t(RegType::kX86_GpbHi) ? 128 :
|
||||
X == uint32_t(RegType::kX86_Gpw ) ? 161 :
|
||||
X == uint32_t(RegType::kX86_Gpd ) ? 160 :
|
||||
X == uint32_t(RegType::kX86_Gpq ) ? 192 :
|
||||
X == uint32_t(RegType::kX86_SReg ) ? 224 :
|
||||
X == uint32_t(RegType::kX86_Rip ) ? 85 : 0,
|
||||
|
||||
kSpecialCount = X == uint32_t(RegType::kX86_GpbLo) ? 8 :
|
||||
X == uint32_t(RegType::kX86_GpbHi) ? 4 :
|
||||
X == uint32_t(RegType::kX86_Gpw ) ? 8 :
|
||||
X == uint32_t(RegType::kX86_Gpd ) ? 8 :
|
||||
X == uint32_t(RegType::kX86_Gpq ) ? 8 :
|
||||
X == uint32_t(RegType::kX86_SReg ) ? 7 :
|
||||
X == uint32_t(RegType::kX86_Rip ) ? 1 : 0
|
||||
};
|
||||
};
|
||||
|
||||
#define ASMJIT_REG_TYPE_ENTRY(TYPE) { \
|
||||
RegFormatInfo_T<TYPE>::kTypeIndex \
|
||||
}
|
||||
|
||||
#define ASMJIT_REG_NAME_ENTRY(TYPE) { \
|
||||
RegTraits<RegType(TYPE)>::kCount, \
|
||||
RegFormatInfo_T<TYPE>::kFormatIndex, \
|
||||
RegFormatInfo_T<TYPE>::kSpecialIndex, \
|
||||
RegFormatInfo_T<TYPE>::kSpecialCount \
|
||||
}
|
||||
|
||||
static const RegFormatInfo x86RegFormatInfo = {
|
||||
// Register type entries and strings.
|
||||
{ ASMJIT_LOOKUP_TABLE_32(ASMJIT_REG_TYPE_ENTRY, 0) },
|
||||
|
||||
"\0" // #0
|
||||
"gpb\0\0\0\0" // #1
|
||||
"gpb.hi\0" // #8
|
||||
"gpw\0" // #15
|
||||
"gpd\0" // #19
|
||||
"gpq\0" // #23
|
||||
"xmm\0" // #27
|
||||
"ymm\0" // #31
|
||||
"zmm\0" // #35
|
||||
"rip\0" // #39
|
||||
"seg\0" // #43
|
||||
"st\0" // #47
|
||||
"mm\0" // #50
|
||||
"k\0" // #53
|
||||
"bnd\0" // #55
|
||||
"cr\0" // #59
|
||||
"dr\0" // #62
|
||||
"tmm\0" // #65
|
||||
,
|
||||
|
||||
// Register name entries and strings.
|
||||
{ ASMJIT_LOOKUP_TABLE_32(ASMJIT_REG_NAME_ENTRY, 0) },
|
||||
|
||||
"\0"
|
||||
"r%ub\0" // #1
|
||||
"r%uh\0" // #6
|
||||
"r%uw\0" // #11
|
||||
"r%ud\0" // #16
|
||||
"r%u\0" // #21
|
||||
"xmm%u\0" // #25
|
||||
"ymm%u\0" // #31
|
||||
"zmm%u\0" // #37
|
||||
"rip%u\0" // #43
|
||||
"seg%u\0" // #49
|
||||
"st%u\0" // #55
|
||||
"mm%u\0" // #60
|
||||
"k%u\0" // #65
|
||||
"bnd%u\0" // #69
|
||||
"cr%u\0" // #75
|
||||
"dr%u\0" // #80
|
||||
|
||||
"rip\0" // #85
|
||||
"tmm%u\0" // #89
|
||||
"\0" // #95
|
||||
|
||||
"al\0\0" "cl\0\0" "dl\0\0" "bl\0\0" "spl\0" "bpl\0" "sil\0" "dil\0" // #96
|
||||
"ah\0\0" "ch\0\0" "dh\0\0" "bh\0\0" "n/a\0" "n/a\0" "n/a\0" "n/a\0" // #128
|
||||
"eax\0" "ecx\0" "edx\0" "ebx\0" "esp\0" "ebp\0" "esi\0" "edi\0" // #160
|
||||
"rax\0" "rcx\0" "rdx\0" "rbx\0" "rsp\0" "rbp\0" "rsi\0" "rdi\0" // #192
|
||||
"n/a\0" "es\0\0" "cs\0\0" "ss\0\0" "ds\0\0" "fs\0\0" "gs\0\0" "n/a\0" // #224
|
||||
};
|
||||
#undef ASMJIT_REG_NAME_ENTRY
|
||||
#undef ASMJIT_REG_TYPE_ENTRY
|
||||
|
||||
static const char* x86GetAddressSizeString(uint32_t size) noexcept {
|
||||
switch (size) {
|
||||
case 1 : return "byte ptr ";
|
||||
case 2 : return "word ptr ";
|
||||
case 4 : return "dword ptr ";
|
||||
case 6 : return "fword ptr ";
|
||||
case 8 : return "qword ptr ";
|
||||
case 10: return "tbyte ptr ";
|
||||
case 16: return "xmmword ptr ";
|
||||
case 32: return "ymmword ptr ";
|
||||
case 64: return "zmmword ptr ";
|
||||
default: return "";
|
||||
}
|
||||
}
|
||||
|
||||
// x86::FormatterInternal - Format FeatureId
|
||||
// =========================================
|
||||
|
||||
Error FormatterInternal::formatFeature(String& sb, uint32_t featureId) noexcept {
|
||||
// @EnumStringBegin{"enum": "CpuFeatures::X86", "output": "sFeature", "strip": "k"}@
|
||||
static const char sFeatureString[] =
|
||||
"None\0"
|
||||
"MT\0"
|
||||
"NX\0"
|
||||
"3DNOW\0"
|
||||
"3DNOW2\0"
|
||||
"ADX\0"
|
||||
"AESNI\0"
|
||||
"ALTMOVCR8\0"
|
||||
"AMX_BF16\0"
|
||||
"AMX_INT8\0"
|
||||
"AMX_TILE\0"
|
||||
"AVX\0"
|
||||
"AVX2\0"
|
||||
"AVX512_4FMAPS\0"
|
||||
"AVX512_4VNNIW\0"
|
||||
"AVX512_BF16\0"
|
||||
"AVX512_BITALG\0"
|
||||
"AVX512_BW\0"
|
||||
"AVX512_CDI\0"
|
||||
"AVX512_DQ\0"
|
||||
"AVX512_ERI\0"
|
||||
"AVX512_F\0"
|
||||
"AVX512_FP16\0"
|
||||
"AVX512_IFMA\0"
|
||||
"AVX512_PFI\0"
|
||||
"AVX512_VBMI\0"
|
||||
"AVX512_VBMI2\0"
|
||||
"AVX512_VL\0"
|
||||
"AVX512_VNNI\0"
|
||||
"AVX512_VP2INTERSECT\0"
|
||||
"AVX512_VPOPCNTDQ\0"
|
||||
"AVX_VNNI\0"
|
||||
"BMI\0"
|
||||
"BMI2\0"
|
||||
"CET_IBT\0"
|
||||
"CET_SS\0"
|
||||
"CLDEMOTE\0"
|
||||
"CLFLUSH\0"
|
||||
"CLFLUSHOPT\0"
|
||||
"CLWB\0"
|
||||
"CLZERO\0"
|
||||
"CMOV\0"
|
||||
"CMPXCHG16B\0"
|
||||
"CMPXCHG8B\0"
|
||||
"ENCLV\0"
|
||||
"ENQCMD\0"
|
||||
"ERMS\0"
|
||||
"F16C\0"
|
||||
"FMA\0"
|
||||
"FMA4\0"
|
||||
"FPU\0"
|
||||
"FSGSBASE\0"
|
||||
"FXSR\0"
|
||||
"FXSROPT\0"
|
||||
"GEODE\0"
|
||||
"GFNI\0"
|
||||
"HLE\0"
|
||||
"HRESET\0"
|
||||
"I486\0"
|
||||
"LAHFSAHF\0"
|
||||
"LWP\0"
|
||||
"LZCNT\0"
|
||||
"MCOMMIT\0"
|
||||
"MMX\0"
|
||||
"MMX2\0"
|
||||
"MONITOR\0"
|
||||
"MONITORX\0"
|
||||
"MOVBE\0"
|
||||
"MOVDIR64B\0"
|
||||
"MOVDIRI\0"
|
||||
"MPX\0"
|
||||
"MSR\0"
|
||||
"MSSE\0"
|
||||
"OSXSAVE\0"
|
||||
"OSPKE\0"
|
||||
"PCLMULQDQ\0"
|
||||
"PCONFIG\0"
|
||||
"POPCNT\0"
|
||||
"PREFETCHW\0"
|
||||
"PREFETCHWT1\0"
|
||||
"PTWRITE\0"
|
||||
"RDPID\0"
|
||||
"RDPRU\0"
|
||||
"RDRAND\0"
|
||||
"RDSEED\0"
|
||||
"RDTSC\0"
|
||||
"RDTSCP\0"
|
||||
"RTM\0"
|
||||
"SERIALIZE\0"
|
||||
"SHA\0"
|
||||
"SKINIT\0"
|
||||
"SMAP\0"
|
||||
"SMEP\0"
|
||||
"SMX\0"
|
||||
"SNP\0"
|
||||
"SSE\0"
|
||||
"SSE2\0"
|
||||
"SSE3\0"
|
||||
"SSE4_1\0"
|
||||
"SSE4_2\0"
|
||||
"SSE4A\0"
|
||||
"SSSE3\0"
|
||||
"SVM\0"
|
||||
"TBM\0"
|
||||
"TSX\0"
|
||||
"TSXLDTRK\0"
|
||||
"UINTR\0"
|
||||
"VAES\0"
|
||||
"VMX\0"
|
||||
"VPCLMULQDQ\0"
|
||||
"WAITPKG\0"
|
||||
"WBNOINVD\0"
|
||||
"XOP\0"
|
||||
"XSAVE\0"
|
||||
"XSAVEC\0"
|
||||
"XSAVEOPT\0"
|
||||
"XSAVES\0"
|
||||
"<Unknown>\0";
|
||||
|
||||
static const uint16_t sFeatureIndex[] = {
|
||||
0, 5, 8, 11, 17, 24, 28, 34, 44, 53, 62, 71, 75, 80, 94, 108, 120, 134, 144,
|
||||
155, 165, 176, 185, 197, 209, 220, 232, 245, 255, 267, 287, 304, 313, 317,
|
||||
322, 330, 337, 346, 354, 365, 370, 377, 382, 393, 403, 409, 416, 421, 426,
|
||||
430, 435, 439, 448, 453, 461, 467, 472, 476, 483, 488, 497, 501, 507, 515,
|
||||
519, 524, 532, 541, 547, 557, 565, 569, 573, 578, 586, 592, 602, 610, 617,
|
||||
627, 639, 647, 653, 659, 666, 673, 679, 686, 690, 700, 704, 711, 716, 721,
|
||||
725, 729, 733, 738, 743, 750, 757, 763, 769, 773, 777, 781, 790, 796, 801,
|
||||
805, 816, 824, 833, 837, 843, 850, 859, 866
|
||||
};
|
||||
// @EnumStringEnd@
|
||||
|
||||
return sb.append(sFeatureString + sFeatureIndex[Support::min<uint32_t>(featureId, uint32_t(CpuFeatures::X86::kMaxValue) + 1)]);
|
||||
}
|
||||
|
||||
// x86::FormatterInternal - Format Register
|
||||
// ========================================
|
||||
|
||||
ASMJIT_FAVOR_SIZE Error FormatterInternal::formatRegister(String& sb, FormatFlags formatFlags, const BaseEmitter* emitter, Arch arch, RegType type, uint32_t id) noexcept {
|
||||
DebugUtils::unused(arch);
|
||||
const RegFormatInfo& info = x86RegFormatInfo;
|
||||
|
||||
#ifndef ASMJIT_NO_COMPILER
|
||||
if (Operand::isVirtId(id)) {
|
||||
if (emitter && emitter->emitterType() == EmitterType::kCompiler) {
|
||||
const BaseCompiler* cc = static_cast<const BaseCompiler*>(emitter);
|
||||
if (cc->isVirtIdValid(id)) {
|
||||
VirtReg* vReg = cc->virtRegById(id);
|
||||
ASMJIT_ASSERT(vReg != nullptr);
|
||||
|
||||
const char* name = vReg->name();
|
||||
if (name && name[0] != '\0')
|
||||
ASMJIT_PROPAGATE(sb.append(name));
|
||||
else
|
||||
ASMJIT_PROPAGATE(sb.appendFormat("%%%u", unsigned(Operand::virtIdToIndex(id))));
|
||||
|
||||
if (vReg->type() != type && uint32_t(type) <= uint32_t(RegType::kMaxValue) && Support::test(formatFlags, FormatFlags::kRegCasts)) {
|
||||
const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[size_t(type)];
|
||||
if (typeEntry.index)
|
||||
ASMJIT_PROPAGATE(sb.appendFormat("@%s", info.typeStrings + typeEntry.index));
|
||||
}
|
||||
|
||||
return kErrorOk;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
DebugUtils::unused(emitter, formatFlags);
|
||||
#endif
|
||||
|
||||
if (uint32_t(type) <= uint32_t(RegType::kMaxValue)) {
|
||||
const RegFormatInfo::NameEntry& nameEntry = info.nameEntries[size_t(type)];
|
||||
|
||||
if (id < nameEntry.specialCount)
|
||||
return sb.append(info.nameStrings + nameEntry.specialIndex + id * 4);
|
||||
|
||||
if (id < nameEntry.count)
|
||||
return sb.appendFormat(info.nameStrings + nameEntry.formatIndex, unsigned(id));
|
||||
|
||||
const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[size_t(type)];
|
||||
if (typeEntry.index)
|
||||
return sb.appendFormat("%s@%u", info.typeStrings + typeEntry.index, id);
|
||||
}
|
||||
|
||||
return sb.appendFormat("<Reg-%u>?%u", uint32_t(type), id);
|
||||
}
|
||||
|
||||
// x86::FormatterInternal - Format Operand
|
||||
// =======================================
|
||||
|
||||
ASMJIT_FAVOR_SIZE Error FormatterInternal::formatOperand(
|
||||
String& sb,
|
||||
FormatFlags formatFlags,
|
||||
const BaseEmitter* emitter,
|
||||
Arch arch,
|
||||
const Operand_& op) noexcept {
|
||||
|
||||
if (op.isReg())
|
||||
return formatRegister(sb, formatFlags, emitter, arch, op.as<BaseReg>().type(), op.as<BaseReg>().id());
|
||||
|
||||
if (op.isMem()) {
|
||||
const Mem& m = op.as<Mem>();
|
||||
ASMJIT_PROPAGATE(sb.append(x86GetAddressSizeString(m.size())));
|
||||
|
||||
// Segment override prefix.
|
||||
uint32_t seg = m.segmentId();
|
||||
if (seg != SReg::kIdNone && seg < SReg::kIdCount)
|
||||
ASMJIT_PROPAGATE(sb.appendFormat("%s:", x86RegFormatInfo.nameStrings + 224 + size_t(seg) * 4));
|
||||
|
||||
ASMJIT_PROPAGATE(sb.append('['));
|
||||
switch (m.addrType()) {
|
||||
case Mem::AddrType::kDefault:
|
||||
break;
|
||||
case Mem::AddrType::kAbs:
|
||||
ASMJIT_PROPAGATE(sb.append("abs "));
|
||||
break;
|
||||
case Mem::AddrType::kRel:
|
||||
ASMJIT_PROPAGATE(sb.append("rel "));
|
||||
break;
|
||||
}
|
||||
|
||||
char opSign = '\0';
|
||||
if (m.hasBase()) {
|
||||
opSign = '+';
|
||||
if (m.hasBaseLabel()) {
|
||||
ASMJIT_PROPAGATE(Formatter::formatLabel(sb, formatFlags, emitter, m.baseId()));
|
||||
}
|
||||
else {
|
||||
FormatFlags modifiedFlags = formatFlags;
|
||||
if (m.isRegHome()) {
|
||||
ASMJIT_PROPAGATE(sb.append("&"));
|
||||
modifiedFlags &= ~FormatFlags::kRegCasts;
|
||||
}
|
||||
ASMJIT_PROPAGATE(formatRegister(sb, modifiedFlags, emitter, arch, m.baseType(), m.baseId()));
|
||||
}
|
||||
}
|
||||
|
||||
if (m.hasIndex()) {
|
||||
if (opSign)
|
||||
ASMJIT_PROPAGATE(sb.append(opSign));
|
||||
|
||||
opSign = '+';
|
||||
ASMJIT_PROPAGATE(formatRegister(sb, formatFlags, emitter, arch, m.indexType(), m.indexId()));
|
||||
if (m.hasShift())
|
||||
ASMJIT_PROPAGATE(sb.appendFormat("*%u", 1 << m.shift()));
|
||||
}
|
||||
|
||||
uint64_t off = uint64_t(m.offset());
|
||||
if (off || !m.hasBaseOrIndex()) {
|
||||
if (int64_t(off) < 0) {
|
||||
opSign = '-';
|
||||
off = ~off + 1;
|
||||
}
|
||||
|
||||
if (opSign)
|
||||
ASMJIT_PROPAGATE(sb.append(opSign));
|
||||
|
||||
uint32_t base = 10;
|
||||
if (Support::test(formatFlags, FormatFlags::kHexOffsets) && off > 9) {
|
||||
ASMJIT_PROPAGATE(sb.append("0x", 2));
|
||||
base = 16;
|
||||
}
|
||||
|
||||
ASMJIT_PROPAGATE(sb.appendUInt(off, base));
|
||||
}
|
||||
|
||||
return sb.append(']');
|
||||
}
|
||||
|
||||
if (op.isImm()) {
|
||||
const Imm& i = op.as<Imm>();
|
||||
int64_t val = i.value();
|
||||
|
||||
if (Support::test(formatFlags, FormatFlags::kHexImms) && uint64_t(val) > 9) {
|
||||
ASMJIT_PROPAGATE(sb.append("0x", 2));
|
||||
return sb.appendUInt(uint64_t(val), 16);
|
||||
}
|
||||
else {
|
||||
return sb.appendInt(val, 10);
|
||||
}
|
||||
}
|
||||
|
||||
if (op.isLabel()) {
|
||||
return Formatter::formatLabel(sb, formatFlags, emitter, op.id());
|
||||
}
|
||||
|
||||
return sb.append("<None>");
|
||||
}
|
||||
|
||||
// x86::FormatterInternal - Format Immediate (Extension)
|
||||
// =====================================================
|
||||
|
||||
static constexpr char kImmCharStart = '{';
|
||||
static constexpr char kImmCharEnd = '}';
|
||||
static constexpr char kImmCharOr = '|';
|
||||
|
||||
struct ImmBits {
|
||||
enum Mode : uint32_t {
|
||||
kModeLookup = 0,
|
||||
kModeFormat = 1
|
||||
};
|
||||
|
||||
uint8_t mask;
|
||||
uint8_t shift;
|
||||
uint8_t mode;
|
||||
char text[48 - 3];
|
||||
};
|
||||
|
||||
ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmShuf(String& sb, uint32_t u8, uint32_t bits, uint32_t count) noexcept {
|
||||
uint32_t mask = (1 << bits) - 1;
|
||||
|
||||
for (uint32_t i = 0; i < count; i++, u8 >>= bits) {
|
||||
uint32_t value = u8 & mask;
|
||||
ASMJIT_PROPAGATE(sb.append(i == 0 ? kImmCharStart : kImmCharOr));
|
||||
ASMJIT_PROPAGATE(sb.appendUInt(value));
|
||||
}
|
||||
|
||||
if (kImmCharEnd)
|
||||
ASMJIT_PROPAGATE(sb.append(kImmCharEnd));
|
||||
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmBits(String& sb, uint32_t u8, const ImmBits* bits, uint32_t count) noexcept {
|
||||
uint32_t n = 0;
|
||||
char buf[64];
|
||||
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
const ImmBits& spec = bits[i];
|
||||
|
||||
uint32_t value = (u8 & uint32_t(spec.mask)) >> spec.shift;
|
||||
const char* str = nullptr;
|
||||
|
||||
switch (spec.mode) {
|
||||
case ImmBits::kModeLookup:
|
||||
str = Support::findPackedString(spec.text, value);
|
||||
break;
|
||||
|
||||
case ImmBits::kModeFormat:
|
||||
snprintf(buf, sizeof(buf), spec.text, unsigned(value));
|
||||
str = buf;
|
||||
break;
|
||||
|
||||
default:
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
}
|
||||
|
||||
if (!str[0])
|
||||
continue;
|
||||
|
||||
ASMJIT_PROPAGATE(sb.append(++n == 1 ? kImmCharStart : kImmCharOr));
|
||||
ASMJIT_PROPAGATE(sb.append(str));
|
||||
}
|
||||
|
||||
if (n && kImmCharEnd)
|
||||
ASMJIT_PROPAGATE(sb.append(kImmCharEnd));
|
||||
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmText(String& sb, uint32_t u8, uint32_t bits, uint32_t advance, const char* text, uint32_t count = 1) noexcept {
|
||||
uint32_t mask = (1u << bits) - 1;
|
||||
uint32_t pos = 0;
|
||||
|
||||
for (uint32_t i = 0; i < count; i++, u8 >>= bits, pos += advance) {
|
||||
uint32_t value = (u8 & mask) + pos;
|
||||
ASMJIT_PROPAGATE(sb.append(i == 0 ? kImmCharStart : kImmCharOr));
|
||||
ASMJIT_PROPAGATE(sb.append(Support::findPackedString(text, value)));
|
||||
}
|
||||
|
||||
if (kImmCharEnd)
|
||||
ASMJIT_PROPAGATE(sb.append(kImmCharEnd));
|
||||
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
ASMJIT_FAVOR_SIZE static Error FormatterInternal_explainConst(
|
||||
String& sb,
|
||||
FormatFlags formatFlags,
|
||||
InstId instId,
|
||||
uint32_t vecSize,
|
||||
const Imm& imm) noexcept {
|
||||
|
||||
DebugUtils::unused(formatFlags);
|
||||
|
||||
static const char vcmpx[] =
|
||||
"EQ_OQ\0" "LT_OS\0" "LE_OS\0" "UNORD_Q\0" "NEQ_UQ\0" "NLT_US\0" "NLE_US\0" "ORD_Q\0"
|
||||
"EQ_UQ\0" "NGE_US\0" "NGT_US\0" "FALSE_OQ\0" "NEQ_OQ\0" "GE_OS\0" "GT_OS\0" "TRUE_UQ\0"
|
||||
"EQ_OS\0" "LT_OQ\0" "LE_OQ\0" "UNORD_S\0" "NEQ_US\0" "NLT_UQ\0" "NLE_UQ\0" "ORD_S\0"
|
||||
"EQ_US\0" "NGE_UQ\0" "NGT_UQ\0" "FALSE_OS\0" "NEQ_OS\0" "GE_OQ\0" "GT_OQ\0" "TRUE_US\0";
|
||||
|
||||
// Why to make it compatible...
|
||||
static const char vpcmpx[] = "EQ\0" "LT\0" "LE\0" "FALSE\0" "NEQ\0" "GE\0" "GT\0" "TRUE\0";
|
||||
static const char vpcomx[] = "LT\0" "LE\0" "GT\0" "GE\0" "EQ\0" "NEQ\0" "FALSE\0" "TRUE\0";
|
||||
|
||||
static const char vshufpd[] = "A0\0A1\0B0\0B1\0A2\0A3\0B2\0B3\0A4\0A5\0B4\0B5\0A6\0A7\0B6\0B7\0";
|
||||
static const char vshufps[] = "A0\0A1\0A2\0A3\0A0\0A1\0A2\0A3\0B0\0B1\0B2\0B3\0B0\0B1\0B2\0B3\0";
|
||||
|
||||
static const ImmBits vfpclassxx[] = {
|
||||
{ 0x07u, 0, ImmBits::kModeLookup, "QNAN\0" "+0\0" "-0\0" "+INF\0" "-INF\0" "DENORMAL\0" "-FINITE\0" "SNAN\0" }
|
||||
};
|
||||
|
||||
static const ImmBits vfixupimmxx[] = {
|
||||
{ 0x01u, 0, ImmBits::kModeLookup, "\0" "+INF_IE\0" },
|
||||
{ 0x02u, 1, ImmBits::kModeLookup, "\0" "-VE_IE\0" },
|
||||
{ 0x04u, 2, ImmBits::kModeLookup, "\0" "-INF_IE\0" },
|
||||
{ 0x08u, 3, ImmBits::kModeLookup, "\0" "SNAN_IE\0" },
|
||||
{ 0x10u, 4, ImmBits::kModeLookup, "\0" "ONE_IE\0" },
|
||||
{ 0x20u, 5, ImmBits::kModeLookup, "\0" "ONE_ZE\0" },
|
||||
{ 0x40u, 6, ImmBits::kModeLookup, "\0" "ZERO_IE\0" },
|
||||
{ 0x80u, 7, ImmBits::kModeLookup, "\0" "ZERO_ZE\0" }
|
||||
};
|
||||
|
||||
static const ImmBits vgetmantxx[] = {
|
||||
{ 0x03u, 0, ImmBits::kModeLookup, "[1, 2)\0" "[.5, 2)\0" "[.5, 1)\0" "[.75, 1.5)\0" },
|
||||
{ 0x04u, 2, ImmBits::kModeLookup, "\0" "NO_SIGN\0" },
|
||||
{ 0x08u, 3, ImmBits::kModeLookup, "\0" "QNAN_IF_SIGN\0" }
|
||||
};
|
||||
|
||||
static const ImmBits vmpsadbw[] = {
|
||||
{ 0x04u, 2, ImmBits::kModeLookup, "BLK1[0]\0" "BLK1[1]\0" },
|
||||
{ 0x03u, 0, ImmBits::kModeLookup, "BLK2[0]\0" "BLK2[1]\0" "BLK2[2]\0" "BLK2[3]\0" },
|
||||
{ 0x40u, 6, ImmBits::kModeLookup, "BLK1[4]\0" "BLK1[5]\0" },
|
||||
{ 0x30u, 4, ImmBits::kModeLookup, "BLK2[4]\0" "BLK2[5]\0" "BLK2[6]\0" "BLK2[7]\0" }
|
||||
};
|
||||
|
||||
static const ImmBits vpclmulqdq[] = {
|
||||
{ 0x01u, 0, ImmBits::kModeLookup, "LQ\0" "HQ\0" },
|
||||
{ 0x10u, 4, ImmBits::kModeLookup, "LQ\0" "HQ\0" }
|
||||
};
|
||||
|
||||
static const ImmBits vperm2x128[] = {
|
||||
{ 0x0Bu, 0, ImmBits::kModeLookup, "A0\0" "A1\0" "B0\0" "B1\0" "\0" "\0" "\0" "\0" "0\0" "0\0" "0\0" "0\0" },
|
||||
{ 0xB0u, 4, ImmBits::kModeLookup, "A0\0" "A1\0" "B0\0" "B1\0" "\0" "\0" "\0" "\0" "0\0" "0\0" "0\0" "0\0" }
|
||||
};
|
||||
|
||||
static const ImmBits vrangexx[] = {
|
||||
{ 0x03u, 0, ImmBits::kModeLookup, "MIN\0" "MAX\0" "MIN_ABS\0" "MAX_ABS\0" },
|
||||
{ 0x0Cu, 2, ImmBits::kModeLookup, "SIGN_A\0" "SIGN_B\0" "SIGN_0\0" "SIGN_1\0" }
|
||||
};
|
||||
|
||||
static const ImmBits vreducexx_vrndscalexx[] = {
|
||||
{ 0x07u, 0, ImmBits::kModeLookup, "\0" "\0" "\0" "\0" "ROUND\0" "FLOOR\0" "CEIL\0" "TRUNC\0" },
|
||||
{ 0x08u, 3, ImmBits::kModeLookup, "\0" "SAE\0" },
|
||||
{ 0xF0u, 4, ImmBits::kModeFormat, "LEN=%d" }
|
||||
};
|
||||
|
||||
static const ImmBits vroundxx[] = {
|
||||
{ 0x07u, 0, ImmBits::kModeLookup, "ROUND\0" "FLOOR\0" "CEIL\0" "TRUNC\0" "\0" "\0" "\0" "\0" },
|
||||
{ 0x08u, 3, ImmBits::kModeLookup, "\0" "INEXACT\0" }
|
||||
};
|
||||
|
||||
uint32_t u8 = imm.valueAs<uint8_t>();
|
||||
switch (instId) {
|
||||
case Inst::kIdVblendpd:
|
||||
case Inst::kIdBlendpd:
|
||||
return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 8);
|
||||
|
||||
case Inst::kIdVblendps:
|
||||
case Inst::kIdBlendps:
|
||||
return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 4);
|
||||
|
||||
case Inst::kIdVcmppd:
|
||||
case Inst::kIdVcmpps:
|
||||
case Inst::kIdVcmpsd:
|
||||
case Inst::kIdVcmpss:
|
||||
return FormatterInternal_formatImmText(sb, u8, 5, 0, vcmpx);
|
||||
|
||||
case Inst::kIdCmppd:
|
||||
case Inst::kIdCmpps:
|
||||
case Inst::kIdCmpsd:
|
||||
case Inst::kIdCmpss:
|
||||
return FormatterInternal_formatImmText(sb, u8, 3, 0, vcmpx);
|
||||
|
||||
case Inst::kIdVdbpsadbw:
|
||||
return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
|
||||
|
||||
case Inst::kIdVdppd:
|
||||
case Inst::kIdVdpps:
|
||||
case Inst::kIdDppd:
|
||||
case Inst::kIdDpps:
|
||||
return FormatterInternal_formatImmShuf(sb, u8, 1, 8);
|
||||
|
||||
case Inst::kIdVmpsadbw:
|
||||
case Inst::kIdMpsadbw:
|
||||
return FormatterInternal_formatImmBits(sb, u8, vmpsadbw, Support::min<uint32_t>(vecSize / 8, 4));
|
||||
|
||||
case Inst::kIdVpblendw:
|
||||
case Inst::kIdPblendw:
|
||||
return FormatterInternal_formatImmShuf(sb, u8, 1, 8);
|
||||
|
||||
case Inst::kIdVpblendd:
|
||||
return FormatterInternal_formatImmShuf(sb, u8, 1, Support::min<uint32_t>(vecSize / 4, 8));
|
||||
|
||||
case Inst::kIdVpclmulqdq:
|
||||
case Inst::kIdPclmulqdq:
|
||||
return FormatterInternal_formatImmBits(sb, u8, vpclmulqdq, ASMJIT_ARRAY_SIZE(vpclmulqdq));
|
||||
|
||||
case Inst::kIdVroundpd:
|
||||
case Inst::kIdVroundps:
|
||||
case Inst::kIdVroundsd:
|
||||
case Inst::kIdVroundss:
|
||||
case Inst::kIdRoundpd:
|
||||
case Inst::kIdRoundps:
|
||||
case Inst::kIdRoundsd:
|
||||
case Inst::kIdRoundss:
|
||||
return FormatterInternal_formatImmBits(sb, u8, vroundxx, ASMJIT_ARRAY_SIZE(vroundxx));
|
||||
|
||||
case Inst::kIdVshufpd:
|
||||
case Inst::kIdShufpd:
|
||||
return FormatterInternal_formatImmText(sb, u8, 1, 2, vshufpd, Support::min<uint32_t>(vecSize / 8, 8));
|
||||
|
||||
case Inst::kIdVshufps:
|
||||
case Inst::kIdShufps:
|
||||
return FormatterInternal_formatImmText(sb, u8, 2, 4, vshufps, 4);
|
||||
|
||||
case Inst::kIdVcvtps2ph:
|
||||
return FormatterInternal_formatImmBits(sb, u8, vroundxx, 1);
|
||||
|
||||
case Inst::kIdVperm2f128:
|
||||
case Inst::kIdVperm2i128:
|
||||
return FormatterInternal_formatImmBits(sb, u8, vperm2x128, ASMJIT_ARRAY_SIZE(vperm2x128));
|
||||
|
||||
case Inst::kIdVpermilpd:
|
||||
return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 8);
|
||||
|
||||
case Inst::kIdVpermilps:
|
||||
return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
|
||||
|
||||
case Inst::kIdVpshufd:
|
||||
case Inst::kIdPshufd:
|
||||
return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
|
||||
|
||||
case Inst::kIdVpshufhw:
|
||||
case Inst::kIdVpshuflw:
|
||||
case Inst::kIdPshufhw:
|
||||
case Inst::kIdPshuflw:
|
||||
case Inst::kIdPshufw:
|
||||
return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
|
||||
|
||||
case Inst::kIdVfixupimmpd:
|
||||
case Inst::kIdVfixupimmps:
|
||||
case Inst::kIdVfixupimmsd:
|
||||
case Inst::kIdVfixupimmss:
|
||||
return FormatterInternal_formatImmBits(sb, u8, vfixupimmxx, ASMJIT_ARRAY_SIZE(vfixupimmxx));
|
||||
|
||||
case Inst::kIdVfpclasspd:
|
||||
case Inst::kIdVfpclassps:
|
||||
case Inst::kIdVfpclasssd:
|
||||
case Inst::kIdVfpclassss:
|
||||
return FormatterInternal_formatImmBits(sb, u8, vfpclassxx, ASMJIT_ARRAY_SIZE(vfpclassxx));
|
||||
|
||||
case Inst::kIdVgetmantpd:
|
||||
case Inst::kIdVgetmantps:
|
||||
case Inst::kIdVgetmantsd:
|
||||
case Inst::kIdVgetmantss:
|
||||
return FormatterInternal_formatImmBits(sb, u8, vgetmantxx, ASMJIT_ARRAY_SIZE(vgetmantxx));
|
||||
|
||||
case Inst::kIdVpcmpb:
|
||||
case Inst::kIdVpcmpd:
|
||||
case Inst::kIdVpcmpq:
|
||||
case Inst::kIdVpcmpw:
|
||||
case Inst::kIdVpcmpub:
|
||||
case Inst::kIdVpcmpud:
|
||||
case Inst::kIdVpcmpuq:
|
||||
case Inst::kIdVpcmpuw:
|
||||
return FormatterInternal_formatImmText(sb, u8, 3, 0, vpcmpx);
|
||||
|
||||
case Inst::kIdVpcomb:
|
||||
case Inst::kIdVpcomd:
|
||||
case Inst::kIdVpcomq:
|
||||
case Inst::kIdVpcomw:
|
||||
case Inst::kIdVpcomub:
|
||||
case Inst::kIdVpcomud:
|
||||
case Inst::kIdVpcomuq:
|
||||
case Inst::kIdVpcomuw:
|
||||
return FormatterInternal_formatImmText(sb, u8, 3, 0, vpcomx);
|
||||
|
||||
case Inst::kIdVpermq:
|
||||
case Inst::kIdVpermpd:
|
||||
return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
|
||||
|
||||
case Inst::kIdVpternlogd:
|
||||
case Inst::kIdVpternlogq:
|
||||
return FormatterInternal_formatImmShuf(sb, u8, 1, 8);
|
||||
|
||||
case Inst::kIdVrangepd:
|
||||
case Inst::kIdVrangeps:
|
||||
case Inst::kIdVrangesd:
|
||||
case Inst::kIdVrangess:
|
||||
return FormatterInternal_formatImmBits(sb, u8, vrangexx, ASMJIT_ARRAY_SIZE(vrangexx));
|
||||
|
||||
case Inst::kIdVreducepd:
|
||||
case Inst::kIdVreduceps:
|
||||
case Inst::kIdVreducesd:
|
||||
case Inst::kIdVreducess:
|
||||
case Inst::kIdVrndscalepd:
|
||||
case Inst::kIdVrndscaleps:
|
||||
case Inst::kIdVrndscalesd:
|
||||
case Inst::kIdVrndscaless:
|
||||
return FormatterInternal_formatImmBits(sb, u8, vreducexx_vrndscalexx, ASMJIT_ARRAY_SIZE(vreducexx_vrndscalexx));
|
||||
|
||||
case Inst::kIdVshuff32x4:
|
||||
case Inst::kIdVshuff64x2:
|
||||
case Inst::kIdVshufi32x4:
|
||||
case Inst::kIdVshufi64x2: {
|
||||
uint32_t count = Support::max<uint32_t>(vecSize / 16, 2u);
|
||||
uint32_t bits = count <= 2 ? 1u : 2u;
|
||||
return FormatterInternal_formatImmShuf(sb, u8, bits, count);
|
||||
}
|
||||
|
||||
default:
|
||||
return kErrorOk;
|
||||
}
|
||||
}
|
||||
|
||||
// x86::FormatterInternal - Format Instruction
|
||||
// ===========================================
|
||||
|
||||
ASMJIT_FAVOR_SIZE Error FormatterInternal::formatInstruction(
|
||||
String& sb,
|
||||
FormatFlags formatFlags,
|
||||
const BaseEmitter* emitter,
|
||||
Arch arch,
|
||||
const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept {
|
||||
|
||||
InstId instId = inst.id();
|
||||
InstOptions options = inst.options();
|
||||
|
||||
// Format instruction options and instruction mnemonic.
|
||||
if (instId < Inst::_kIdCount) {
|
||||
// VEX|EVEX options.
|
||||
if (Support::test(options, InstOptions::kX86_Vex))
|
||||
ASMJIT_PROPAGATE(sb.append("{vex} "));
|
||||
|
||||
if (Support::test(options, InstOptions::kX86_Vex3))
|
||||
ASMJIT_PROPAGATE(sb.append("{vex3} "));
|
||||
|
||||
if (Support::test(options, InstOptions::kX86_Evex))
|
||||
ASMJIT_PROPAGATE(sb.append("{evex} "));
|
||||
|
||||
// MOD/RM and MOD/MR options
|
||||
if (Support::test(options, InstOptions::kX86_ModRM))
|
||||
ASMJIT_PROPAGATE(sb.append("{modrm} "));
|
||||
else if (Support::test(options, InstOptions::kX86_ModMR))
|
||||
ASMJIT_PROPAGATE(sb.append("{modmr} "));
|
||||
|
||||
// SHORT|LONG options.
|
||||
if (Support::test(options, InstOptions::kShortForm))
|
||||
ASMJIT_PROPAGATE(sb.append("short "));
|
||||
|
||||
if (Support::test(options, InstOptions::kLongForm))
|
||||
ASMJIT_PROPAGATE(sb.append("long "));
|
||||
|
||||
// LOCK|XACQUIRE|XRELEASE options.
|
||||
if (Support::test(options, InstOptions::kX86_XAcquire))
|
||||
ASMJIT_PROPAGATE(sb.append("xacquire "));
|
||||
|
||||
if (Support::test(options, InstOptions::kX86_XRelease))
|
||||
ASMJIT_PROPAGATE(sb.append("xrelease "));
|
||||
|
||||
if (Support::test(options, InstOptions::kX86_Lock))
|
||||
ASMJIT_PROPAGATE(sb.append("lock "));
|
||||
|
||||
// REP|REPNE options.
|
||||
if (Support::test(options, InstOptions::kX86_Rep | InstOptions::kX86_Repne)) {
|
||||
sb.append(Support::test(options, InstOptions::kX86_Rep) ? "rep " : "repnz ");
|
||||
if (inst.hasExtraReg()) {
|
||||
ASMJIT_PROPAGATE(sb.append("{"));
|
||||
ASMJIT_PROPAGATE(formatOperand(sb, formatFlags, emitter, arch, inst.extraReg().toReg<BaseReg>()));
|
||||
ASMJIT_PROPAGATE(sb.append("} "));
|
||||
}
|
||||
}
|
||||
|
||||
// REX options.
|
||||
if (Support::test(options, InstOptions::kX86_Rex)) {
|
||||
const InstOptions kRXBWMask = InstOptions::kX86_OpCodeR |
|
||||
InstOptions::kX86_OpCodeX |
|
||||
InstOptions::kX86_OpCodeB |
|
||||
InstOptions::kX86_OpCodeW ;
|
||||
if (Support::test(options, kRXBWMask)) {
|
||||
ASMJIT_PROPAGATE(sb.append("rex."));
|
||||
if (Support::test(options, InstOptions::kX86_OpCodeR)) sb.append('r');
|
||||
if (Support::test(options, InstOptions::kX86_OpCodeX)) sb.append('x');
|
||||
if (Support::test(options, InstOptions::kX86_OpCodeB)) sb.append('b');
|
||||
if (Support::test(options, InstOptions::kX86_OpCodeW)) sb.append('w');
|
||||
sb.append(' ');
|
||||
}
|
||||
else {
|
||||
ASMJIT_PROPAGATE(sb.append("rex "));
|
||||
}
|
||||
}
|
||||
|
||||
ASMJIT_PROPAGATE(InstInternal::instIdToString(arch, instId, sb));
|
||||
}
|
||||
else {
|
||||
ASMJIT_PROPAGATE(sb.appendFormat("[InstId=#%u]", unsigned(instId)));
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < opCount; i++) {
|
||||
const Operand_& op = operands[i];
|
||||
if (op.isNone()) break;
|
||||
|
||||
ASMJIT_PROPAGATE(sb.append(i == 0 ? " " : ", "));
|
||||
ASMJIT_PROPAGATE(formatOperand(sb, formatFlags, emitter, arch, op));
|
||||
|
||||
if (op.isImm() && uint32_t(formatFlags & FormatFlags::kExplainImms)) {
|
||||
uint32_t vecSize = 16;
|
||||
for (uint32_t j = 0; j < opCount; j++)
|
||||
if (operands[j].isReg())
|
||||
vecSize = Support::max<uint32_t>(vecSize, operands[j].size());
|
||||
ASMJIT_PROPAGATE(FormatterInternal_explainConst(sb, formatFlags, instId, vecSize, op.as<Imm>()));
|
||||
}
|
||||
|
||||
// Support AVX-512 masking - {k}{z}.
|
||||
if (i == 0) {
|
||||
if (inst.extraReg().group() == RegGroup::kX86_K) {
|
||||
ASMJIT_PROPAGATE(sb.append(" {"));
|
||||
ASMJIT_PROPAGATE(formatRegister(sb, formatFlags, emitter, arch, inst.extraReg().type(), inst.extraReg().id()));
|
||||
ASMJIT_PROPAGATE(sb.append('}'));
|
||||
|
||||
if (Support::test(options, InstOptions::kX86_ZMask))
|
||||
ASMJIT_PROPAGATE(sb.append("{z}"));
|
||||
}
|
||||
else if (Support::test(options, InstOptions::kX86_ZMask)) {
|
||||
ASMJIT_PROPAGATE(sb.append(" {z}"));
|
||||
}
|
||||
}
|
||||
|
||||
// Support AVX-512 broadcast - {1tox}.
|
||||
if (op.isMem() && op.as<Mem>().hasBroadcast()) {
|
||||
ASMJIT_PROPAGATE(sb.appendFormat(" {1to%u}", Support::bitMask(uint32_t(op.as<Mem>().getBroadcast()))));
|
||||
}
|
||||
}
|
||||
|
||||
// Support AVX-512 embedded rounding and suppress-all-exceptions {sae}.
|
||||
if (inst.hasOption(InstOptions::kX86_ER | InstOptions::kX86_SAE)) {
|
||||
if (inst.hasOption(InstOptions::kX86_ER)) {
|
||||
uint32_t bits = uint32_t(inst.options() & InstOptions::kX86_ERMask) >> Support::ConstCTZ<uint32_t(InstOptions::kX86_ERMask)>::value;
|
||||
|
||||
const char roundingModes[] = "rn\0rd\0ru\0rz";
|
||||
ASMJIT_PROPAGATE(sb.appendFormat(", {%s-sae}", roundingModes + bits * 3));
|
||||
}
|
||||
else {
|
||||
ASMJIT_PROPAGATE(sb.append(", {sae}"));
|
||||
}
|
||||
}
|
||||
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // !ASMJIT_NO_LOGGING
|
||||
58
lib/lepton/asmjit/x86/x86formatter_p.h
Normal file
58
lib/lepton/asmjit/x86/x86formatter_p.h
Normal file
@ -0,0 +1,58 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#ifndef ASMJIT_X86_X86FORMATTER_P_H_INCLUDED
|
||||
#define ASMJIT_X86_X86FORMATTER_P_H_INCLUDED
|
||||
|
||||
#include "../core/api-config.h"
|
||||
#ifndef ASMJIT_NO_LOGGING
|
||||
|
||||
#include "../core/formatter.h"
|
||||
#include "../core/string.h"
|
||||
#include "../x86/x86globals.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
//! \cond INTERNAL
|
||||
//! \addtogroup asmjit_x86
|
||||
//! \{
|
||||
|
||||
namespace FormatterInternal {
|
||||
|
||||
Error ASMJIT_CDECL formatFeature(
|
||||
String& sb,
|
||||
uint32_t featureId) noexcept;
|
||||
|
||||
Error ASMJIT_CDECL formatRegister(
|
||||
String& sb,
|
||||
FormatFlags flags,
|
||||
const BaseEmitter* emitter,
|
||||
Arch arch,
|
||||
RegType regType,
|
||||
uint32_t regId) noexcept;
|
||||
|
||||
Error ASMJIT_CDECL formatOperand(
|
||||
String& sb,
|
||||
FormatFlags flags,
|
||||
const BaseEmitter* emitter,
|
||||
Arch arch,
|
||||
const Operand_& op) noexcept;
|
||||
|
||||
Error ASMJIT_CDECL formatInstruction(
|
||||
String& sb,
|
||||
FormatFlags flags,
|
||||
const BaseEmitter* emitter,
|
||||
Arch arch,
|
||||
const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept;
|
||||
|
||||
} // {FormatterInternal}
|
||||
|
||||
//! \}
|
||||
//! \endcond
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // !ASMJIT_NO_LOGGING
|
||||
#endif // ASMJIT_X86_X86FORMATTER_P_H_INCLUDED
|
||||
503
lib/lepton/asmjit/x86/x86func.cpp
Normal file
503
lib/lepton/asmjit/x86/x86func.cpp
Normal file
@ -0,0 +1,503 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#include "../core/api-build_p.h"
|
||||
#if !defined(ASMJIT_NO_X86)
|
||||
|
||||
#include "../x86/x86func_p.h"
|
||||
#include "../x86/x86emithelper_p.h"
|
||||
#include "../x86/x86operand.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
namespace FuncInternal {
|
||||
|
||||
static inline bool shouldThreatAsCDeclIn64BitMode(CallConvId ccId) noexcept {
|
||||
return ccId == CallConvId::kCDecl ||
|
||||
ccId == CallConvId::kStdCall ||
|
||||
ccId == CallConvId::kThisCall ||
|
||||
ccId == CallConvId::kFastCall ||
|
||||
ccId == CallConvId::kRegParm1 ||
|
||||
ccId == CallConvId::kRegParm2 ||
|
||||
ccId == CallConvId::kRegParm3;
|
||||
}
|
||||
|
||||
ASMJIT_FAVOR_SIZE Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept {
|
||||
constexpr uint32_t kZax = Gp::kIdAx;
|
||||
constexpr uint32_t kZbx = Gp::kIdBx;
|
||||
constexpr uint32_t kZcx = Gp::kIdCx;
|
||||
constexpr uint32_t kZdx = Gp::kIdDx;
|
||||
constexpr uint32_t kZsp = Gp::kIdSp;
|
||||
constexpr uint32_t kZbp = Gp::kIdBp;
|
||||
constexpr uint32_t kZsi = Gp::kIdSi;
|
||||
constexpr uint32_t kZdi = Gp::kIdDi;
|
||||
|
||||
bool winABI = environment.isPlatformWindows() || environment.isMSVC();
|
||||
|
||||
cc.setArch(environment.arch());
|
||||
cc.setSaveRestoreRegSize(RegGroup::kVec, 16);
|
||||
cc.setSaveRestoreRegSize(RegGroup::kX86_MM, 8);
|
||||
cc.setSaveRestoreRegSize(RegGroup::kX86_K, 8);
|
||||
cc.setSaveRestoreAlignment(RegGroup::kVec, 16);
|
||||
cc.setSaveRestoreAlignment(RegGroup::kX86_MM, 8);
|
||||
cc.setSaveRestoreAlignment(RegGroup::kX86_K, 8);
|
||||
|
||||
if (environment.is32Bit()) {
|
||||
bool isStandardCallConv = true;
|
||||
|
||||
cc.setSaveRestoreRegSize(RegGroup::kGp, 4);
|
||||
cc.setSaveRestoreAlignment(RegGroup::kGp, 4);
|
||||
|
||||
cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(Gp::kIdBx, Gp::kIdSp, Gp::kIdBp, Gp::kIdSi, Gp::kIdDi));
|
||||
cc.setNaturalStackAlignment(4);
|
||||
|
||||
switch (ccId) {
|
||||
case CallConvId::kCDecl:
|
||||
break;
|
||||
|
||||
case CallConvId::kStdCall:
|
||||
cc.setFlags(CallConvFlags::kCalleePopsStack);
|
||||
break;
|
||||
|
||||
case CallConvId::kFastCall:
|
||||
cc.setFlags(CallConvFlags::kCalleePopsStack);
|
||||
cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx);
|
||||
break;
|
||||
|
||||
case CallConvId::kVectorCall:
|
||||
cc.setFlags(CallConvFlags::kCalleePopsStack);
|
||||
cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx);
|
||||
cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5);
|
||||
break;
|
||||
|
||||
case CallConvId::kThisCall:
|
||||
// NOTE: Even MINGW (starting with GCC 4.7.0) now uses __thiscall on MS Windows, so we won't bail to any
|
||||
// other calling convention if __thiscall was specified.
|
||||
if (winABI) {
|
||||
cc.setFlags(CallConvFlags::kCalleePopsStack);
|
||||
cc.setPassedOrder(RegGroup::kGp, kZcx);
|
||||
}
|
||||
else {
|
||||
ccId = CallConvId::kCDecl;
|
||||
}
|
||||
break;
|
||||
|
||||
case CallConvId::kRegParm1:
|
||||
cc.setPassedOrder(RegGroup::kGp, kZax);
|
||||
break;
|
||||
|
||||
case CallConvId::kRegParm2:
|
||||
cc.setPassedOrder(RegGroup::kGp, kZax, kZdx);
|
||||
break;
|
||||
|
||||
case CallConvId::kRegParm3:
|
||||
cc.setPassedOrder(RegGroup::kGp, kZax, kZdx, kZcx);
|
||||
break;
|
||||
|
||||
case CallConvId::kLightCall2:
|
||||
case CallConvId::kLightCall3:
|
||||
case CallConvId::kLightCall4: {
|
||||
uint32_t n = uint32_t(ccId) - uint32_t(CallConvId::kLightCall2) + 2;
|
||||
|
||||
cc.setFlags(CallConvFlags::kPassFloatsByVec);
|
||||
cc.setPassedOrder(RegGroup::kGp, kZax, kZdx, kZcx, kZsi, kZdi);
|
||||
cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
cc.setPassedOrder(RegGroup::kX86_K, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
cc.setPassedOrder(RegGroup::kX86_MM, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
cc.setPreservedRegs(RegGroup::kGp, Support::lsbMask<uint32_t>(8));
|
||||
cc.setPreservedRegs(RegGroup::kVec, Support::lsbMask<uint32_t>(8) & ~Support::lsbMask<uint32_t>(n));
|
||||
|
||||
cc.setNaturalStackAlignment(16);
|
||||
isStandardCallConv = false;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
return DebugUtils::errored(kErrorInvalidArgument);
|
||||
}
|
||||
|
||||
if (isStandardCallConv) {
|
||||
// MMX arguments is something where compiler vendors disagree. For example GCC and MSVC would pass first three
|
||||
// via registers and the rest via stack, however Clang passes all via stack. Returning MMX registers is even
|
||||
// more fun, where GCC uses MM0, but Clang uses EAX:EDX pair. I'm not sure it's something we should be worried
|
||||
// about as MMX is deprecated anyway.
|
||||
cc.setPassedOrder(RegGroup::kX86_MM, 0, 1, 2);
|
||||
|
||||
// Vector arguments (XMM|YMM|ZMM) are passed via registers. However, if the function is variadic then they have
|
||||
// to be passed via stack.
|
||||
cc.setPassedOrder(RegGroup::kVec, 0, 1, 2);
|
||||
|
||||
// Functions with variable arguments always use stack for MM and vector arguments.
|
||||
cc.addFlags(CallConvFlags::kPassVecByStackIfVA);
|
||||
}
|
||||
|
||||
if (ccId == CallConvId::kCDecl) {
|
||||
cc.addFlags(CallConvFlags::kVarArgCompatible);
|
||||
}
|
||||
}
|
||||
else {
|
||||
cc.setSaveRestoreRegSize(RegGroup::kGp, 8);
|
||||
cc.setSaveRestoreAlignment(RegGroup::kGp, 8);
|
||||
|
||||
// Preprocess the calling convention into a common id as many conventions are normally ignored even by C/C++
|
||||
// compilers and treated as `__cdecl`.
|
||||
if (shouldThreatAsCDeclIn64BitMode(ccId))
|
||||
ccId = winABI ? CallConvId::kX64Windows : CallConvId::kX64SystemV;
|
||||
|
||||
switch (ccId) {
|
||||
case CallConvId::kX64SystemV: {
|
||||
cc.setFlags(CallConvFlags::kPassFloatsByVec |
|
||||
CallConvFlags::kPassMmxByXmm |
|
||||
CallConvFlags::kVarArgCompatible);
|
||||
cc.setNaturalStackAlignment(16);
|
||||
cc.setRedZoneSize(128);
|
||||
cc.setPassedOrder(RegGroup::kGp, kZdi, kZsi, kZdx, kZcx, 8, 9);
|
||||
cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(kZbx, kZsp, kZbp, 12, 13, 14, 15));
|
||||
break;
|
||||
}
|
||||
|
||||
case CallConvId::kX64Windows: {
|
||||
cc.setStrategy(CallConvStrategy::kX64Windows);
|
||||
cc.setFlags(CallConvFlags::kPassFloatsByVec |
|
||||
CallConvFlags::kIndirectVecArgs |
|
||||
CallConvFlags::kPassMmxByGp |
|
||||
CallConvFlags::kVarArgCompatible);
|
||||
cc.setNaturalStackAlignment(16);
|
||||
// Maximum 4 arguments in registers, each adds 8 bytes to the spill zone.
|
||||
cc.setSpillZoneSize(4 * 8);
|
||||
cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx, 8, 9);
|
||||
cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3);
|
||||
cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15));
|
||||
cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
|
||||
break;
|
||||
}
|
||||
|
||||
case CallConvId::kVectorCall: {
|
||||
cc.setStrategy(CallConvStrategy::kX64VectorCall);
|
||||
cc.setFlags(CallConvFlags::kPassFloatsByVec |
|
||||
CallConvFlags::kPassMmxByGp );
|
||||
cc.setNaturalStackAlignment(16);
|
||||
// Maximum 6 arguments in registers, each adds 8 bytes to the spill zone.
|
||||
cc.setSpillZoneSize(6 * 8);
|
||||
cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx, 8, 9);
|
||||
cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5);
|
||||
cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15));
|
||||
cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
|
||||
break;
|
||||
}
|
||||
|
||||
case CallConvId::kLightCall2:
|
||||
case CallConvId::kLightCall3:
|
||||
case CallConvId::kLightCall4: {
|
||||
uint32_t n = uint32_t(ccId) - uint32_t(CallConvId::kLightCall2) + 2;
|
||||
|
||||
cc.setFlags(CallConvFlags::kPassFloatsByVec);
|
||||
cc.setNaturalStackAlignment(16);
|
||||
cc.setPassedOrder(RegGroup::kGp, kZax, kZdx, kZcx, kZsi, kZdi);
|
||||
cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
cc.setPassedOrder(RegGroup::kX86_K, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
cc.setPassedOrder(RegGroup::kX86_MM, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
|
||||
cc.setPreservedRegs(RegGroup::kGp, Support::lsbMask<uint32_t>(16));
|
||||
cc.setPreservedRegs(RegGroup::kVec, ~Support::lsbMask<uint32_t>(n));
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
return DebugUtils::errored(kErrorInvalidArgument);
|
||||
}
|
||||
}
|
||||
|
||||
cc.setId(ccId);
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
ASMJIT_FAVOR_SIZE void unpackValues(FuncDetail& func, FuncValuePack& pack) noexcept {
|
||||
TypeId typeId = pack[0].typeId();
|
||||
switch (typeId) {
|
||||
case TypeId::kInt64:
|
||||
case TypeId::kUInt64: {
|
||||
if (Environment::is32Bit(func.callConv().arch())) {
|
||||
// Convert a 64-bit return value to two 32-bit return values.
|
||||
pack[0].initTypeId(TypeId::kUInt32);
|
||||
pack[1].initTypeId(TypeId(uint32_t(typeId) - 2));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ASMJIT_FAVOR_SIZE Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept {
|
||||
const CallConv& cc = func.callConv();
|
||||
Arch arch = cc.arch();
|
||||
uint32_t stackOffset = cc._spillZoneSize;
|
||||
uint32_t argCount = func.argCount();
|
||||
|
||||
// Up to two return values can be returned in GP registers.
|
||||
static const uint8_t gpReturnIndexes[4] = {
|
||||
uint8_t(Gp::kIdAx),
|
||||
uint8_t(Gp::kIdDx),
|
||||
uint8_t(BaseReg::kIdBad),
|
||||
uint8_t(BaseReg::kIdBad)
|
||||
};
|
||||
|
||||
if (func.hasRet()) {
|
||||
unpackValues(func, func._rets);
|
||||
for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
|
||||
TypeId typeId = func._rets[valueIndex].typeId();
|
||||
|
||||
// Terminate at the first void type (end of the pack).
|
||||
if (typeId == TypeId::kVoid)
|
||||
break;
|
||||
|
||||
switch (typeId) {
|
||||
case TypeId::kInt64:
|
||||
case TypeId::kUInt64: {
|
||||
if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
|
||||
func._rets[valueIndex].initReg(RegType::kX86_Gpq, gpReturnIndexes[valueIndex], typeId);
|
||||
else
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
break;
|
||||
}
|
||||
|
||||
case TypeId::kInt8:
|
||||
case TypeId::kInt16:
|
||||
case TypeId::kInt32: {
|
||||
if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
|
||||
func._rets[valueIndex].initReg(RegType::kX86_Gpd, gpReturnIndexes[valueIndex], TypeId::kInt32);
|
||||
else
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
break;
|
||||
}
|
||||
|
||||
case TypeId::kUInt8:
|
||||
case TypeId::kUInt16:
|
||||
case TypeId::kUInt32: {
|
||||
if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
|
||||
func._rets[valueIndex].initReg(RegType::kX86_Gpd, gpReturnIndexes[valueIndex], TypeId::kUInt32);
|
||||
else
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
break;
|
||||
}
|
||||
|
||||
case TypeId::kFloat32:
|
||||
case TypeId::kFloat64: {
|
||||
RegType regType = Environment::is32Bit(arch) ? RegType::kX86_St : RegType::kX86_Xmm;
|
||||
func._rets[valueIndex].initReg(regType, valueIndex, typeId);
|
||||
break;
|
||||
}
|
||||
|
||||
case TypeId::kFloat80: {
|
||||
// 80-bit floats are always returned by FP0.
|
||||
func._rets[valueIndex].initReg(RegType::kX86_St, valueIndex, typeId);
|
||||
break;
|
||||
}
|
||||
|
||||
case TypeId::kMmx32:
|
||||
case TypeId::kMmx64: {
|
||||
// MM registers are returned through XMM (SystemV) or GPQ (Win64).
|
||||
RegType regType = RegType::kX86_Mm;
|
||||
uint32_t regIndex = valueIndex;
|
||||
if (Environment::is64Bit(arch)) {
|
||||
regType = cc.strategy() == CallConvStrategy::kDefault ? RegType::kX86_Xmm : RegType::kX86_Gpq;
|
||||
regIndex = cc.strategy() == CallConvStrategy::kDefault ? valueIndex : gpReturnIndexes[valueIndex];
|
||||
|
||||
if (regIndex == BaseReg::kIdBad)
|
||||
return DebugUtils::errored(kErrorInvalidState);
|
||||
}
|
||||
|
||||
func._rets[valueIndex].initReg(regType, regIndex, typeId);
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
func._rets[valueIndex].initReg(vecTypeIdToRegType(typeId), valueIndex, typeId);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (cc.strategy()) {
|
||||
case CallConvStrategy::kDefault: {
|
||||
uint32_t gpzPos = 0;
|
||||
uint32_t vecPos = 0;
|
||||
|
||||
for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
|
||||
unpackValues(func, func._args[argIndex]);
|
||||
|
||||
for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
|
||||
FuncValue& arg = func._args[argIndex][valueIndex];
|
||||
|
||||
// Terminate if there are no more arguments in the pack.
|
||||
if (!arg)
|
||||
break;
|
||||
|
||||
TypeId typeId = arg.typeId();
|
||||
|
||||
if (TypeUtils::isInt(typeId)) {
|
||||
uint32_t regId = BaseReg::kIdBad;
|
||||
|
||||
if (gpzPos < CallConv::kMaxRegArgsPerGroup)
|
||||
regId = cc._passedOrder[RegGroup::kGp].id[gpzPos];
|
||||
|
||||
if (regId != BaseReg::kIdBad) {
|
||||
RegType regType = typeId <= TypeId::kUInt32 ? RegType::kX86_Gpd : RegType::kX86_Gpq;
|
||||
arg.assignRegData(regType, regId);
|
||||
func.addUsedRegs(RegGroup::kGp, Support::bitMask(regId));
|
||||
gpzPos++;
|
||||
}
|
||||
else {
|
||||
uint32_t size = Support::max<uint32_t>(TypeUtils::sizeOf(typeId), registerSize);
|
||||
arg.assignStackOffset(int32_t(stackOffset));
|
||||
stackOffset += size;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (TypeUtils::isFloat(typeId) || TypeUtils::isVec(typeId)) {
|
||||
uint32_t regId = BaseReg::kIdBad;
|
||||
|
||||
if (vecPos < CallConv::kMaxRegArgsPerGroup)
|
||||
regId = cc._passedOrder[RegGroup::kVec].id[vecPos];
|
||||
|
||||
if (TypeUtils::isFloat(typeId)) {
|
||||
// If this is a float, but `kFlagPassFloatsByVec` is false, we have to use stack instead. This should
|
||||
// be only used by 32-bit calling conventions.
|
||||
if (!cc.hasFlag(CallConvFlags::kPassFloatsByVec))
|
||||
regId = BaseReg::kIdBad;
|
||||
}
|
||||
else {
|
||||
// Pass vector registers via stack if this is a variable arguments function. This should be only used
|
||||
// by 32-bit calling conventions.
|
||||
if (signature.hasVarArgs() && cc.hasFlag(CallConvFlags::kPassVecByStackIfVA))
|
||||
regId = BaseReg::kIdBad;
|
||||
}
|
||||
|
||||
if (regId != BaseReg::kIdBad) {
|
||||
arg.initTypeId(typeId);
|
||||
arg.assignRegData(vecTypeIdToRegType(typeId), regId);
|
||||
func.addUsedRegs(RegGroup::kVec, Support::bitMask(regId));
|
||||
vecPos++;
|
||||
}
|
||||
else {
|
||||
uint32_t size = TypeUtils::sizeOf(typeId);
|
||||
arg.assignStackOffset(int32_t(stackOffset));
|
||||
stackOffset += size;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case CallConvStrategy::kX64Windows:
|
||||
case CallConvStrategy::kX64VectorCall: {
|
||||
// Both X64 and VectorCall behave similarly - arguments are indexed from left to right. The position of the
|
||||
// argument determines in which register the argument is allocated, so it's either GP or one of XMM/YMM/ZMM
|
||||
// registers.
|
||||
//
|
||||
// [ X64 ] [VecCall]
|
||||
// Index: #0 #1 #2 #3 #4 #5
|
||||
//
|
||||
// GP : RCX RDX R8 R9
|
||||
// VEC : XMM0 XMM1 XMM2 XMM3 XMM4 XMM5
|
||||
//
|
||||
// For example function `f(int a, double b, int c, double d)` will be:
|
||||
//
|
||||
// (a) (b) (c) (d)
|
||||
// RCX XMM1 R8 XMM3
|
||||
//
|
||||
// Unused vector registers are used by HVA.
|
||||
bool isVectorCall = (cc.strategy() == CallConvStrategy::kX64VectorCall);
|
||||
|
||||
for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
|
||||
unpackValues(func, func._args[argIndex]);
|
||||
|
||||
for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
|
||||
FuncValue& arg = func._args[argIndex][valueIndex];
|
||||
|
||||
// Terminate if there are no more arguments in the pack.
|
||||
if (!arg)
|
||||
break;
|
||||
|
||||
TypeId typeId = arg.typeId();
|
||||
uint32_t size = TypeUtils::sizeOf(typeId);
|
||||
|
||||
if (TypeUtils::isInt(typeId) || TypeUtils::isMmx(typeId)) {
|
||||
uint32_t regId = BaseReg::kIdBad;
|
||||
|
||||
if (argIndex < CallConv::kMaxRegArgsPerGroup)
|
||||
regId = cc._passedOrder[RegGroup::kGp].id[argIndex];
|
||||
|
||||
if (regId != BaseReg::kIdBad) {
|
||||
RegType regType = size <= 4 && !TypeUtils::isMmx(typeId) ? RegType::kX86_Gpd : RegType::kX86_Gpq;
|
||||
arg.assignRegData(regType, regId);
|
||||
func.addUsedRegs(RegGroup::kGp, Support::bitMask(regId));
|
||||
}
|
||||
else {
|
||||
arg.assignStackOffset(int32_t(stackOffset));
|
||||
stackOffset += 8;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (TypeUtils::isFloat(typeId) || TypeUtils::isVec(typeId)) {
|
||||
uint32_t regId = BaseReg::kIdBad;
|
||||
|
||||
if (argIndex < CallConv::kMaxRegArgsPerGroup)
|
||||
regId = cc._passedOrder[RegGroup::kVec].id[argIndex];
|
||||
|
||||
if (regId != BaseReg::kIdBad) {
|
||||
// X64-ABI doesn't allow vector types (XMM|YMM|ZMM) to be passed via registers, however, VectorCall
|
||||
// was designed for that purpose.
|
||||
if (TypeUtils::isFloat(typeId) || isVectorCall) {
|
||||
RegType regType = vecTypeIdToRegType(typeId);
|
||||
arg.assignRegData(regType, regId);
|
||||
func.addUsedRegs(RegGroup::kVec, Support::bitMask(regId));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Passed via stack if the argument is float/double or indirectly. The trap is - if the argument is
|
||||
// passed indirectly, the address can be passed via register, if the argument's index has GP one.
|
||||
if (TypeUtils::isFloat(typeId)) {
|
||||
arg.assignStackOffset(int32_t(stackOffset));
|
||||
}
|
||||
else {
|
||||
uint32_t gpRegId = cc._passedOrder[RegGroup::kGp].id[argIndex];
|
||||
if (gpRegId != BaseReg::kIdBad)
|
||||
arg.assignRegData(RegType::kX86_Gpq, gpRegId);
|
||||
else
|
||||
arg.assignStackOffset(int32_t(stackOffset));
|
||||
arg.addFlags(FuncValue::kFlagIsIndirect);
|
||||
}
|
||||
|
||||
// Always 8 bytes (float/double/pointer).
|
||||
stackOffset += 8;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
func._argStackSize = stackOffset;
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
} // {FuncInternal}
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // !ASMJIT_NO_X86
|
||||
33
lib/lepton/asmjit/x86/x86func_p.h
Normal file
33
lib/lepton/asmjit/x86/x86func_p.h
Normal file
@ -0,0 +1,33 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#ifndef ASMJIT_X86_X86FUNC_P_H_INCLUDED
|
||||
#define ASMJIT_X86_X86FUNC_P_H_INCLUDED
|
||||
|
||||
#include "../core/func.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
//! \cond INTERNAL
|
||||
//! \addtogroup asmjit_x86
|
||||
//! \{
|
||||
|
||||
//! X86-specific function API (calling conventions and other utilities).
|
||||
namespace FuncInternal {
|
||||
|
||||
//! Initialize `CallConv` structure (X86 specific).
|
||||
Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept;
|
||||
|
||||
//! Initialize `FuncDetail` (X86 specific).
|
||||
Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept;
|
||||
|
||||
} // {FuncInternal}
|
||||
|
||||
//! \}
|
||||
//! \endcond
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // ASMJIT_X86_X86FUNC_P_H_INCLUDED
|
||||
2169
lib/lepton/asmjit/x86/x86globals.h
Normal file
2169
lib/lepton/asmjit/x86/x86globals.h
Normal file
File diff suppressed because it is too large
Load Diff
1732
lib/lepton/asmjit/x86/x86instapi.cpp
Normal file
1732
lib/lepton/asmjit/x86/x86instapi.cpp
Normal file
File diff suppressed because it is too large
Load Diff
41
lib/lepton/asmjit/x86/x86instapi_p.h
Normal file
41
lib/lepton/asmjit/x86/x86instapi_p.h
Normal file
@ -0,0 +1,41 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#ifndef ASMJIT_X86_X86INSTAPI_P_H_INCLUDED
|
||||
#define ASMJIT_X86_X86INSTAPI_P_H_INCLUDED
|
||||
|
||||
#include "../core/inst.h"
|
||||
#include "../core/operand.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
//! \cond INTERNAL
|
||||
//! \addtogroup asmjit_x86
|
||||
//! \{
|
||||
|
||||
namespace InstInternal {
|
||||
|
||||
#ifndef ASMJIT_NO_TEXT
|
||||
Error ASMJIT_CDECL instIdToString(Arch arch, InstId instId, String& output) noexcept;
|
||||
InstId ASMJIT_CDECL stringToInstId(Arch arch, const char* s, size_t len) noexcept;
|
||||
#endif // !ASMJIT_NO_TEXT
|
||||
|
||||
#ifndef ASMJIT_NO_VALIDATION
|
||||
Error ASMJIT_CDECL validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept;
|
||||
#endif // !ASMJIT_NO_VALIDATION
|
||||
|
||||
#ifndef ASMJIT_NO_INTROSPECTION
|
||||
Error ASMJIT_CDECL queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept;
|
||||
Error ASMJIT_CDECL queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept;
|
||||
#endif // !ASMJIT_NO_INTROSPECTION
|
||||
|
||||
} // {InstInternal}
|
||||
|
||||
//! \}
|
||||
//! \endcond
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // ASMJIT_X86_X86INSTAPI_P_H_INCLUDED
|
||||
4427
lib/lepton/asmjit/x86/x86instdb.cpp
Normal file
4427
lib/lepton/asmjit/x86/x86instdb.cpp
Normal file
File diff suppressed because it is too large
Load Diff
563
lib/lepton/asmjit/x86/x86instdb.h
Normal file
563
lib/lepton/asmjit/x86/x86instdb.h
Normal file
@ -0,0 +1,563 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#ifndef ASMJIT_X86_X86INSTDB_H_INCLUDED
|
||||
#define ASMJIT_X86_X86INSTDB_H_INCLUDED
|
||||
|
||||
#include "../x86/x86globals.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
//! \addtogroup asmjit_x86
|
||||
//! \{
|
||||
|
||||
//! Instruction database (X86).
|
||||
namespace InstDB {
|
||||
|
||||
//! Describes which operation mode is supported by an instruction.
|
||||
enum class Mode : uint8_t {
|
||||
//! Invalid mode.
|
||||
kNone = 0x00u,
|
||||
//! X86 mode supported.
|
||||
kX86 = 0x01u,
|
||||
//! X64 mode supported.
|
||||
kX64 = 0x02u,
|
||||
//! Both X86 and X64 modes supported.
|
||||
kAny = 0x03u
|
||||
};
|
||||
ASMJIT_DEFINE_ENUM_FLAGS(Mode)
|
||||
|
||||
//! Converts architecture to operation mode, see \ref Mode.
|
||||
static constexpr Mode modeFromArch(Arch arch) noexcept {
|
||||
return arch == Arch::kX86 ? Mode::kX86 :
|
||||
arch == Arch::kX64 ? Mode::kX64 : Mode::kNone;
|
||||
}
|
||||
|
||||
//! Operand signature flags used by \ref OpSignature.
|
||||
enum class OpFlags : uint64_t {
|
||||
//! No operand flags.
|
||||
kNone = 0u,
|
||||
|
||||
kRegGpbLo = 0x0000000000000001u, //!< Operand can be low 8-bit GPB register.
|
||||
kRegGpbHi = 0x0000000000000002u, //!< Operand can be high 8-bit GPB register.
|
||||
kRegGpw = 0x0000000000000004u, //!< Operand can be 16-bit GPW register.
|
||||
kRegGpd = 0x0000000000000008u, //!< Operand can be 32-bit GPD register.
|
||||
kRegGpq = 0x0000000000000010u, //!< Operand can be 64-bit GPQ register.
|
||||
kRegXmm = 0x0000000000000020u, //!< Operand can be 128-bit XMM register.
|
||||
kRegYmm = 0x0000000000000040u, //!< Operand can be 256-bit YMM register.
|
||||
kRegZmm = 0x0000000000000080u, //!< Operand can be 512-bit ZMM register.
|
||||
kRegMm = 0x0000000000000100u, //!< Operand can be 64-bit MM register.
|
||||
kRegKReg = 0x0000000000000200u, //!< Operand can be 64-bit K register.
|
||||
kRegSReg = 0x0000000000000400u, //!< Operand can be SReg (segment register).
|
||||
kRegCReg = 0x0000000000000800u, //!< Operand can be CReg (control register).
|
||||
kRegDReg = 0x0000000000001000u, //!< Operand can be DReg (debug register).
|
||||
kRegSt = 0x0000000000002000u, //!< Operand can be 80-bit ST register (X87).
|
||||
kRegBnd = 0x0000000000004000u, //!< Operand can be 128-bit BND register.
|
||||
kRegTmm = 0x0000000000008000u, //!< Operand can be 0..8192-bit TMM register.
|
||||
kRegMask = 0x000000000000FFFFu, //!< Mask of all possible register types.
|
||||
|
||||
kMemUnspecified = 0x0000000000040000u, //!< Operand can be a scalar memory pointer without size.
|
||||
kMem8 = 0x0000000000080000u, //!< Operand can be an 8-bit memory pointer.
|
||||
kMem16 = 0x0000000000100000u, //!< Operand can be a 16-bit memory pointer.
|
||||
kMem32 = 0x0000000000200000u, //!< Operand can be a 32-bit memory pointer.
|
||||
kMem48 = 0x0000000000400000u, //!< Operand can be a 48-bit memory pointer (FAR pointers only).
|
||||
kMem64 = 0x0000000000800000u, //!< Operand can be a 64-bit memory pointer.
|
||||
kMem80 = 0x0000000001000000u, //!< Operand can be an 80-bit memory pointer.
|
||||
kMem128 = 0x0000000002000000u, //!< Operand can be a 128-bit memory pointer.
|
||||
kMem256 = 0x0000000004000000u, //!< Operand can be a 256-bit memory pointer.
|
||||
kMem512 = 0x0000000008000000u, //!< Operand can be a 512-bit memory pointer.
|
||||
kMem1024 = 0x0000000010000000u, //!< Operand can be a 1024-bit memory pointer.
|
||||
kMemMask = 0x000000001FFC0000u, //!< Mask of all possible scalar memory types.
|
||||
|
||||
kVm32x = 0x0000000040000000u, //!< Operand can be a vm32x (vector) pointer.
|
||||
kVm32y = 0x0000000080000000u, //!< Operand can be a vm32y (vector) pointer.
|
||||
kVm32z = 0x0000000100000000u, //!< Operand can be a vm32z (vector) pointer.
|
||||
kVm64x = 0x0000000200000000u, //!< Operand can be a vm64x (vector) pointer.
|
||||
kVm64y = 0x0000000400000000u, //!< Operand can be a vm64y (vector) pointer.
|
||||
kVm64z = 0x0000000800000000u, //!< Operand can be a vm64z (vector) pointer.
|
||||
kVmMask = 0x0000000FC0000000u, //!< Mask of all possible vector memory types.
|
||||
|
||||
kImmI4 = 0x0000001000000000u, //!< Operand can be signed 4-bit immediate.
|
||||
kImmU4 = 0x0000002000000000u, //!< Operand can be unsigned 4-bit immediate.
|
||||
kImmI8 = 0x0000004000000000u, //!< Operand can be signed 8-bit immediate.
|
||||
kImmU8 = 0x0000008000000000u, //!< Operand can be unsigned 8-bit immediate.
|
||||
kImmI16 = 0x0000010000000000u, //!< Operand can be signed 16-bit immediate.
|
||||
kImmU16 = 0x0000020000000000u, //!< Operand can be unsigned 16-bit immediate.
|
||||
kImmI32 = 0x0000040000000000u, //!< Operand can be signed 32-bit immediate.
|
||||
kImmU32 = 0x0000080000000000u, //!< Operand can be unsigned 32-bit immediate.
|
||||
kImmI64 = 0x0000100000000000u, //!< Operand can be signed 64-bit immediate.
|
||||
kImmU64 = 0x0000200000000000u, //!< Operand can be unsigned 64-bit immediate.
|
||||
kImmMask = 0x00003FF000000000u, //!< Mask of all immediate types.
|
||||
|
||||
kRel8 = 0x0000400000000000u, //!< Operand can be relative 8-bit displacement.
|
||||
kRel32 = 0x0000800000000000u, //!< Operand can be relative 32-bit displacement.
|
||||
kRelMask = 0x0000C00000000000u, //!< Mask of all relative displacement types.
|
||||
|
||||
kFlagMemBase = 0x0001000000000000u, //!< Flag: Only memory base is allowed (no index, no offset).
|
||||
kFlagMemDs = 0x0002000000000000u, //!< Flag: Implicit memory operand's DS segment.
|
||||
kFlagMemEs = 0x0004000000000000u, //!< Flag: Implicit memory operand's ES segment.
|
||||
|
||||
kFlagMib = 0x0008000000000000u, //!< Flag: Operand is MIB (base+index) pointer.
|
||||
kFlagTMem = 0x0010000000000000u, //!< Flag: Operand is TMEM (sib_mem), AMX memory pointer.
|
||||
|
||||
kFlagImplicit = 0x0080000000000000u, //!< Flag: Operand is implicit.
|
||||
kFlagMask = 0x009F000000000000u, //!< Mask of all flags.
|
||||
|
||||
//! Contains mask of all registers, memory operands, immediate operands, and displacement operands.
|
||||
kOpMask = kRegMask | kMemMask | kVmMask | kImmMask | kRelMask
|
||||
};
|
||||
ASMJIT_DEFINE_ENUM_FLAGS(OpFlags)
|
||||
|
||||
//! Operand signature.
|
||||
//!
|
||||
//! Contains all possible operand combinations, memory size information, and a fixed register id (or `BaseReg::kIdBad`
|
||||
//! if fixed id isn't required).
|
||||
struct OpSignature {
|
||||
//! \name Members
|
||||
//! \{
|
||||
|
||||
uint64_t _flags : 56;
|
||||
uint64_t _regMask : 8;
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Accessors
|
||||
//! \{
|
||||
|
||||
//! Returns operand signature flags.
|
||||
inline OpFlags flags() const noexcept { return (OpFlags)_flags; }
|
||||
|
||||
//! Tests whether the given `flag` is set.
|
||||
inline bool hasFlag(OpFlags flag) const noexcept { return (_flags & uint64_t(flag)) != 0; }
|
||||
|
||||
//! Tests whether this signature contains at least one register operand of any type.
|
||||
inline bool hasReg() const noexcept { return hasFlag(OpFlags::kRegMask); }
|
||||
//! Tests whether this signature contains at least one scalar memory operand of any type.
|
||||
inline bool hasMem() const noexcept { return hasFlag(OpFlags::kMemMask); }
|
||||
//! Tests whether this signature contains at least one vector memory operand of any type.
|
||||
inline bool hasVm() const noexcept { return hasFlag(OpFlags::kVmMask); }
|
||||
//! Tests whether this signature contains at least one immediate operand of any type.
|
||||
inline bool hasImm() const noexcept { return hasFlag(OpFlags::kImmMask); }
|
||||
//! Tests whether this signature contains at least one relative displacement operand of any type.
|
||||
inline bool hasRel() const noexcept { return hasFlag(OpFlags::kRelMask); }
|
||||
|
||||
//! Tests whether the operand is implicit.
|
||||
inline bool isImplicit() const noexcept { return hasFlag(OpFlags::kFlagImplicit); }
|
||||
|
||||
//! Returns a physical register mask.
|
||||
inline RegMask regMask() const noexcept { return _regMask; }
|
||||
|
||||
//! \}
|
||||
};
|
||||
|
||||
ASMJIT_VARAPI const OpSignature _opSignatureTable[];
|
||||
|
||||
//! Instruction signature.
|
||||
//!
|
||||
//! Contains a sequence of operands' combinations and other metadata that defines a single instruction. This data is
|
||||
//! used by instruction validator.
|
||||
struct InstSignature {
|
||||
//! \name Members
|
||||
//! \{
|
||||
|
||||
//! Count of operands in `opIndex` (0..6).
|
||||
uint8_t _opCount : 3;
|
||||
//! Architecture modes supported (X86 / X64).
|
||||
uint8_t _mode : 2;
|
||||
//! Number of implicit operands.
|
||||
uint8_t _implicitOpCount : 3;
|
||||
//! Reserved for future use.
|
||||
uint8_t _reserved;
|
||||
//! Indexes to `OpSignature` table.
|
||||
uint8_t _opSignatureIndexes[Globals::kMaxOpCount];
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Accessors
|
||||
//! \{
|
||||
|
||||
//! Returns instruction operation mode.
|
||||
inline Mode mode() const noexcept { return (Mode)_mode; }
|
||||
//! Tests whether the instruction supports the given operating mode.
|
||||
inline bool supportsMode(Mode mode) const noexcept { return (uint8_t(_mode) & uint8_t(mode)) != 0; }
|
||||
|
||||
//! Returns the number of operands of this signature.
|
||||
inline uint32_t opCount() const noexcept { return _opCount; }
|
||||
//! Returns the number of implicit operands this signature has.
|
||||
inline uint32_t implicitOpCount() const noexcept { return _implicitOpCount; }
|
||||
//! Tests whether this instruction signature has at least one implicit operand.
|
||||
inline bool hasImplicitOperands() const noexcept { return _implicitOpCount != 0; }
|
||||
|
||||
//! Returns indexes to \ref _opSignatureTable for each operand of the instruction.
|
||||
//!
|
||||
//! \note The returned array always provides indexes for all operands (see \ref Globals::kMaxOpCount) even if the
|
||||
//! instruction provides less operands. Undefined operands have always index of zero.
|
||||
inline const uint8_t* opSignatureIndexes() const noexcept { return _opSignatureIndexes; }
|
||||
|
||||
//! Returns index to \ref _opSignatureTable, corresponding to the requested operand `index` of the instruction.
|
||||
inline uint8_t opSignatureIndex(size_t index) const noexcept {
|
||||
ASMJIT_ASSERT(index < Globals::kMaxOpCount);
|
||||
return _opSignatureIndexes[index];
|
||||
}
|
||||
|
||||
//! Returns \ref OpSignature corresponding to the requested operand `index` of the instruction.
|
||||
inline const OpSignature& opSignature(size_t index) const noexcept {
|
||||
ASMJIT_ASSERT(index < Globals::kMaxOpCount);
|
||||
return _opSignatureTable[_opSignatureIndexes[index]];
|
||||
}
|
||||
|
||||
//! \}
|
||||
};
|
||||
|
||||
ASMJIT_VARAPI const InstSignature _instSignatureTable[];
|
||||
|
||||
//! Instruction flags.
|
||||
//!
|
||||
//! Details about instruction encoding, operation, features, and some limitations.
|
||||
enum class InstFlags : uint32_t {
|
||||
//! No flags.
|
||||
kNone = 0x00000000u,
|
||||
|
||||
// Instruction Family
|
||||
// ------------------
|
||||
//
|
||||
// Instruction family information.
|
||||
|
||||
//! Instruction that accesses FPU registers.
|
||||
kFpu = 0x00000100u,
|
||||
//! Instruction that accesses MMX registers (including 3DNOW and GEODE) and EMMS.
|
||||
kMmx = 0x00000200u,
|
||||
//! Instruction that accesses XMM registers (SSE, AVX, AVX512).
|
||||
kVec = 0x00000400u,
|
||||
|
||||
// FPU Flags
|
||||
// ---------
|
||||
//
|
||||
// Used to tell the encoder which memory operand sizes are encodable.
|
||||
|
||||
//! FPU instruction can address `word_ptr` (shared with M80).
|
||||
kFpuM16 = 0x00000800u,
|
||||
//! FPU instruction can address `dword_ptr`.
|
||||
kFpuM32 = 0x00001000u,
|
||||
//! FPU instruction can address `qword_ptr`.
|
||||
kFpuM64 = 0x00002000u,
|
||||
//! FPU instruction can address `tword_ptr` (shared with M16).
|
||||
kFpuM80 = 0x00000800u,
|
||||
|
||||
// Prefixes and Encoding Flags
|
||||
// ---------------------------
|
||||
//
|
||||
// These describe optional X86 prefixes that can be used to change the instruction's operation.
|
||||
|
||||
//! Instruction can be prefixed with using the REP(REPE) or REPNE prefix.
|
||||
kRep = 0x00004000u,
|
||||
//! Rep prefix is accepted, but it has no effect other than being emitted with the instruction (as an extra byte).
|
||||
kRepIgnored = 0x00008000u,
|
||||
//! Instruction can be prefixed with using the LOCK prefix.
|
||||
kLock = 0x00010000u,
|
||||
//! Instruction can be prefixed with using the XACQUIRE prefix.
|
||||
kXAcquire = 0x00020000u,
|
||||
//! Instruction can be prefixed with using the XRELEASE prefix.
|
||||
kXRelease = 0x00040000u,
|
||||
//! Instruction uses MIB (BNDLDX|BNDSTX) to encode two registers.
|
||||
kMib = 0x00080000u,
|
||||
//! Instruction uses VSIB instead of legacy SIB.
|
||||
kVsib = 0x00100000u,
|
||||
//! Instruction uses TSIB (or SIB_MEM) encoding (MODRM followed by SIB).
|
||||
kTsib = 0x00200000u,
|
||||
|
||||
// If both `kPrefixVex` and `kPrefixEvex` flags are specified it means that the instructions can be encoded
|
||||
// by either VEX or EVEX prefix. In that case AsmJit checks global options and also instruction options to decide
|
||||
// whether to emit VEX or EVEX prefix.
|
||||
|
||||
//! Instruction can be encoded by VEX|XOP (AVX|AVX2|BMI|XOP|...).
|
||||
kVex = 0x00400000u,
|
||||
//! Instruction can be encoded by EVEX (AVX512).
|
||||
kEvex = 0x00800000u,
|
||||
//! EVEX encoding is preferred over VEX encoding (AVX515_VNNI vs AVX_VNNI).
|
||||
kPreferEvex = 0x01000000u,
|
||||
//! EVEX and VEX signatures are compatible.
|
||||
kEvexCompat = 0x02000000u,
|
||||
//! EVEX instruction requires K register in the first operand (compare instructions).
|
||||
kEvexKReg = 0x04000000u,
|
||||
//! EVEX instruction requires two operands and K register as a selector (gather instructions).
|
||||
kEvexTwoOp = 0x08000000u,
|
||||
//! VEX instruction that can be transformed to a compatible EVEX instruction.
|
||||
kEvexTransformable = 0x10000000u,
|
||||
|
||||
// Other Flags
|
||||
// -----------
|
||||
|
||||
//! Instruction uses consecutive registers.
|
||||
//!
|
||||
//! Used by V4FMADDPS, V4FMADDSS, V4FNMADDPS, V4FNMADDSS, VP4DPWSSD, VP4DPWSSDS, VP2INTERSECTD, and VP2INTERSECTQ
|
||||
//! instructions
|
||||
kConsecutiveRegs = 0x20000000u
|
||||
};
|
||||
ASMJIT_DEFINE_ENUM_FLAGS(InstFlags)
|
||||
|
||||
//! AVX-512 flags.
|
||||
enum class Avx512Flags : uint32_t {
|
||||
//! No AVX-512 flags.
|
||||
kNone = 0,
|
||||
|
||||
//! Internally used in tables, has no meaning.
|
||||
k_ = 0x00000000u,
|
||||
//! Supports masking {k1..k7}.
|
||||
kK = 0x00000001u,
|
||||
//! Supports zeroing {z}, must be used together with `kAvx512k`.
|
||||
kZ = 0x00000002u,
|
||||
//! Supports 'embedded-rounding' {er} with implicit {sae},
|
||||
kER = 0x00000004u,
|
||||
//! Supports 'suppress-all-exceptions' {sae}.
|
||||
kSAE = 0x00000008u,
|
||||
//! Supports 16-bit broadcast 'b16'.
|
||||
kB16 = 0x00000010u,
|
||||
//! Supports 32-bit broadcast 'b32'.
|
||||
kB32 = 0x00000020u,
|
||||
//! Supports 64-bit broadcast 'b64'.
|
||||
kB64 = 0x00000040u,
|
||||
//! Operates on a vector of consecutive registers (AVX512_4FMAPS and AVX512_4VNNIW).
|
||||
kT4X = 0x00000080u,
|
||||
|
||||
//! Implicit zeroing if {k} masking is used. Using {z} is not valid in this case as it's implicit.
|
||||
kImplicitZ = 0x00000100,
|
||||
};
|
||||
ASMJIT_DEFINE_ENUM_FLAGS(Avx512Flags)
|
||||
|
||||
//! Instruction common information.
|
||||
//!
|
||||
//! Aggregated information shared across one or more instruction.
|
||||
struct CommonInfo {
|
||||
//! Instruction flags.
|
||||
uint32_t _flags;
|
||||
//! Reserved for future use.
|
||||
uint32_t _avx512Flags : 11;
|
||||
//! First `InstSignature` entry in the database.
|
||||
uint32_t _iSignatureIndex : 11;
|
||||
//! Number of relevant `ISignature` entries.
|
||||
uint32_t _iSignatureCount : 5;
|
||||
//! Instruction control flow category, see \ref InstControlFlow.
|
||||
uint32_t _controlFlow : 3;
|
||||
//! Specifies what happens if all source operands share the same register.
|
||||
uint32_t _sameRegHint : 2;
|
||||
|
||||
//! \name Accessors
|
||||
//! \{
|
||||
|
||||
//! Returns instruction flags.
|
||||
inline InstFlags flags() const noexcept { return (InstFlags)_flags; }
|
||||
//! Tests whether the instruction has a `flag`.
|
||||
inline bool hasFlag(InstFlags flag) const noexcept { return Support::test(_flags, flag); }
|
||||
|
||||
//! Returns instruction AVX-512 flags.
|
||||
inline Avx512Flags avx512Flags() const noexcept { return (Avx512Flags)_avx512Flags; }
|
||||
//! Tests whether the instruction has an AVX-512 `flag`.
|
||||
inline bool hasAvx512Flag(Avx512Flags flag) const noexcept { return Support::test(_avx512Flags, flag); }
|
||||
|
||||
//! Tests whether the instruction is FPU instruction.
|
||||
inline bool isFpu() const noexcept { return hasFlag(InstFlags::kFpu); }
|
||||
//! Tests whether the instruction is MMX/3DNOW instruction that accesses MMX registers (includes EMMS and FEMMS).
|
||||
inline bool isMmx() const noexcept { return hasFlag(InstFlags::kMmx); }
|
||||
//! Tests whether the instruction is SSE|AVX|AVX512 instruction that accesses XMM|YMM|ZMM registers.
|
||||
inline bool isVec() const noexcept { return hasFlag(InstFlags::kVec); }
|
||||
//! Tests whether the instruction is SSE+ (SSE4.2, AES, SHA included) instruction that accesses XMM registers.
|
||||
inline bool isSse() const noexcept { return (flags() & (InstFlags::kVec | InstFlags::kVex | InstFlags::kEvex)) == InstFlags::kVec; }
|
||||
//! Tests whether the instruction is AVX+ (FMA included) instruction that accesses XMM|YMM|ZMM registers.
|
||||
inline bool isAvx() const noexcept { return isVec() && isVexOrEvex(); }
|
||||
|
||||
//! Tests whether the instruction can be prefixed with LOCK prefix.
|
||||
inline bool hasLockPrefix() const noexcept { return hasFlag(InstFlags::kLock); }
|
||||
//! Tests whether the instruction can be prefixed with REP (REPE|REPZ) prefix.
|
||||
inline bool hasRepPrefix() const noexcept { return hasFlag(InstFlags::kRep); }
|
||||
//! Tests whether the instruction can be prefixed with XACQUIRE prefix.
|
||||
inline bool hasXAcquirePrefix() const noexcept { return hasFlag(InstFlags::kXAcquire); }
|
||||
//! Tests whether the instruction can be prefixed with XRELEASE prefix.
|
||||
inline bool hasXReleasePrefix() const noexcept { return hasFlag(InstFlags::kXRelease); }
|
||||
|
||||
//! Tests whether the rep prefix is supported by the instruction, but ignored (has no effect).
|
||||
inline bool isRepIgnored() const noexcept { return hasFlag(InstFlags::kRepIgnored); }
|
||||
//! Tests whether the instruction uses MIB.
|
||||
inline bool isMibOp() const noexcept { return hasFlag(InstFlags::kMib); }
|
||||
//! Tests whether the instruction uses VSIB.
|
||||
inline bool isVsibOp() const noexcept { return hasFlag(InstFlags::kVsib); }
|
||||
//! Tests whether the instruction uses TSIB (AMX, instruction requires MOD+SIB).
|
||||
inline bool isTsibOp() const noexcept { return hasFlag(InstFlags::kTsib); }
|
||||
//! Tests whether the instruction uses VEX (can be set together with EVEX if both are encodable).
|
||||
inline bool isVex() const noexcept { return hasFlag(InstFlags::kVex); }
|
||||
//! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
|
||||
inline bool isEvex() const noexcept { return hasFlag(InstFlags::kEvex); }
|
||||
//! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
|
||||
inline bool isVexOrEvex() const noexcept { return hasFlag(InstFlags::kVex | InstFlags::kEvex); }
|
||||
|
||||
//! Tests whether the instruction should prefer EVEX prefix instead of VEX prefix.
|
||||
inline bool preferEvex() const noexcept { return hasFlag(InstFlags::kPreferEvex); }
|
||||
|
||||
inline bool isEvexCompatible() const noexcept { return hasFlag(InstFlags::kEvexCompat); }
|
||||
inline bool isEvexKRegOnly() const noexcept { return hasFlag(InstFlags::kEvexKReg); }
|
||||
inline bool isEvexTwoOpOnly() const noexcept { return hasFlag(InstFlags::kEvexTwoOp); }
|
||||
inline bool isEvexTransformable() const noexcept { return hasFlag(InstFlags::kEvexTransformable); }
|
||||
|
||||
//! Tests whether the instruction supports AVX512 masking {k}.
|
||||
inline bool hasAvx512K() const noexcept { return hasAvx512Flag(Avx512Flags::kK); }
|
||||
//! Tests whether the instruction supports AVX512 zeroing {k}{z}.
|
||||
inline bool hasAvx512Z() const noexcept { return hasAvx512Flag(Avx512Flags::kZ); }
|
||||
//! Tests whether the instruction supports AVX512 embedded-rounding {er}.
|
||||
inline bool hasAvx512ER() const noexcept { return hasAvx512Flag(Avx512Flags::kER); }
|
||||
//! Tests whether the instruction supports AVX512 suppress-all-exceptions {sae}.
|
||||
inline bool hasAvx512SAE() const noexcept { return hasAvx512Flag(Avx512Flags::kSAE); }
|
||||
//! Tests whether the instruction supports AVX512 broadcast (either 32-bit or 64-bit).
|
||||
inline bool hasAvx512B() const noexcept { return hasAvx512Flag(Avx512Flags::kB16 | Avx512Flags::kB32 | Avx512Flags::kB64); }
|
||||
//! Tests whether the instruction supports AVX512 broadcast (16-bit).
|
||||
inline bool hasAvx512B16() const noexcept { return hasAvx512Flag(Avx512Flags::kB16); }
|
||||
//! Tests whether the instruction supports AVX512 broadcast (32-bit).
|
||||
inline bool hasAvx512B32() const noexcept { return hasAvx512Flag(Avx512Flags::kB32); }
|
||||
//! Tests whether the instruction supports AVX512 broadcast (64-bit).
|
||||
inline bool hasAvx512B64() const noexcept { return hasAvx512Flag(Avx512Flags::kB64); }
|
||||
|
||||
// Returns the size of the broadcast - either 2, 4, or 8, or 0 if broadcast is not supported.
|
||||
inline uint32_t broadcastSize() const noexcept {
|
||||
constexpr uint32_t kShift = Support::ConstCTZ<uint32_t(Avx512Flags::kB16)>::value;
|
||||
return (uint32_t(_avx512Flags) & uint32_t(Avx512Flags::kB16 | Avx512Flags::kB32 | Avx512Flags::kB64)) >> (kShift - 1);
|
||||
}
|
||||
|
||||
inline uint32_t signatureIndex() const noexcept { return _iSignatureIndex; }
|
||||
inline uint32_t signatureCount() const noexcept { return _iSignatureCount; }
|
||||
|
||||
inline const InstSignature* signatureData() const noexcept { return _instSignatureTable + _iSignatureIndex; }
|
||||
inline const InstSignature* signatureEnd() const noexcept { return _instSignatureTable + _iSignatureIndex + _iSignatureCount; }
|
||||
|
||||
//! Returns a control flow category of the instruction.
|
||||
inline InstControlFlow controlFlow() const noexcept { return (InstControlFlow)_controlFlow; }
|
||||
|
||||
//! Returns a hint that can be used when both inputs are the same register.
|
||||
inline InstSameRegHint sameRegHint() const noexcept { return (InstSameRegHint)_sameRegHint; }
|
||||
|
||||
//! \}
|
||||
};
|
||||
|
||||
ASMJIT_VARAPI const CommonInfo _commonInfoTable[];
|
||||
|
||||
//! Instruction information.
|
||||
struct InstInfo {
|
||||
//! Index to \ref _nameData.
|
||||
uint32_t _nameDataIndex : 14;
|
||||
//! Index to \ref _commonInfoTable.
|
||||
uint32_t _commonInfoIndex : 10;
|
||||
//! Index to \ref _additionalInfoTable.
|
||||
uint32_t _additionalInfoIndex : 8;
|
||||
|
||||
//! Instruction encoding (internal encoding identifier used by \ref Assembler).
|
||||
uint8_t _encoding;
|
||||
//! Main opcode value (0..255).
|
||||
uint8_t _mainOpcodeValue;
|
||||
//! Index to \ref _mainOpcodeTable` that is combined with \ref _mainOpcodeValue to form the final opcode.
|
||||
uint8_t _mainOpcodeIndex;
|
||||
//! Index to \ref _altOpcodeTable that contains a full alternative opcode.
|
||||
uint8_t _altOpcodeIndex;
|
||||
|
||||
//! \name Accessors
|
||||
//! \{
|
||||
|
||||
//! Returns common information, see \ref CommonInfo.
|
||||
inline const CommonInfo& commonInfo() const noexcept { return _commonInfoTable[_commonInfoIndex]; }
|
||||
|
||||
//! Returns instruction flags, see \ref Flags.
|
||||
inline InstFlags flags() const noexcept { return commonInfo().flags(); }
|
||||
//! Tests whether the instruction has flag `flag`, see \ref Flags.
|
||||
inline bool hasFlag(InstFlags flag) const noexcept { return commonInfo().hasFlag(flag); }
|
||||
|
||||
//! Returns instruction AVX-512 flags, see \ref Avx512Flags.
|
||||
inline Avx512Flags avx512Flags() const noexcept { return commonInfo().avx512Flags(); }
|
||||
//! Tests whether the instruction has an AVX-512 `flag`, see \ref Avx512Flags.
|
||||
inline bool hasAvx512Flag(Avx512Flags flag) const noexcept { return commonInfo().hasAvx512Flag(flag); }
|
||||
|
||||
//! Tests whether the instruction is FPU instruction.
|
||||
inline bool isFpu() const noexcept { return commonInfo().isFpu(); }
|
||||
//! Tests whether the instruction is MMX/3DNOW instruction that accesses MMX registers (includes EMMS and FEMMS).
|
||||
inline bool isMmx() const noexcept { return commonInfo().isMmx(); }
|
||||
//! Tests whether the instruction is SSE|AVX|AVX512 instruction that accesses XMM|YMM|ZMM registers.
|
||||
inline bool isVec() const noexcept { return commonInfo().isVec(); }
|
||||
//! Tests whether the instruction is SSE+ (SSE4.2, AES, SHA included) instruction that accesses XMM registers.
|
||||
inline bool isSse() const noexcept { return commonInfo().isSse(); }
|
||||
//! Tests whether the instruction is AVX+ (FMA included) instruction that accesses XMM|YMM|ZMM registers.
|
||||
inline bool isAvx() const noexcept { return commonInfo().isAvx(); }
|
||||
|
||||
//! Tests whether the instruction can be prefixed with LOCK prefix.
|
||||
inline bool hasLockPrefix() const noexcept { return commonInfo().hasLockPrefix(); }
|
||||
//! Tests whether the instruction can be prefixed with REP (REPE|REPZ) prefix.
|
||||
inline bool hasRepPrefix() const noexcept { return commonInfo().hasRepPrefix(); }
|
||||
//! Tests whether the instruction can be prefixed with XACQUIRE prefix.
|
||||
inline bool hasXAcquirePrefix() const noexcept { return commonInfo().hasXAcquirePrefix(); }
|
||||
//! Tests whether the instruction can be prefixed with XRELEASE prefix.
|
||||
inline bool hasXReleasePrefix() const noexcept { return commonInfo().hasXReleasePrefix(); }
|
||||
|
||||
//! Tests whether the rep prefix is supported by the instruction, but ignored (has no effect).
|
||||
inline bool isRepIgnored() const noexcept { return commonInfo().isRepIgnored(); }
|
||||
//! Tests whether the instruction uses MIB.
|
||||
inline bool isMibOp() const noexcept { return hasFlag(InstFlags::kMib); }
|
||||
//! Tests whether the instruction uses VSIB.
|
||||
inline bool isVsibOp() const noexcept { return hasFlag(InstFlags::kVsib); }
|
||||
//! Tests whether the instruction uses VEX (can be set together with EVEX if both are encodable).
|
||||
inline bool isVex() const noexcept { return hasFlag(InstFlags::kVex); }
|
||||
//! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
|
||||
inline bool isEvex() const noexcept { return hasFlag(InstFlags::kEvex); }
|
||||
//! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
|
||||
inline bool isVexOrEvex() const noexcept { return hasFlag(InstFlags::kVex | InstFlags::kEvex); }
|
||||
|
||||
inline bool isEvexCompatible() const noexcept { return hasFlag(InstFlags::kEvexCompat); }
|
||||
inline bool isEvexKRegOnly() const noexcept { return hasFlag(InstFlags::kEvexKReg); }
|
||||
inline bool isEvexTwoOpOnly() const noexcept { return hasFlag(InstFlags::kEvexTwoOp); }
|
||||
inline bool isEvexTransformable() const noexcept { return hasFlag(InstFlags::kEvexTransformable); }
|
||||
|
||||
//! Tests whether the instruction supports AVX512 masking {k}.
|
||||
inline bool hasAvx512K() const noexcept { return hasAvx512Flag(Avx512Flags::kK); }
|
||||
//! Tests whether the instruction supports AVX512 zeroing {k}{z}.
|
||||
inline bool hasAvx512Z() const noexcept { return hasAvx512Flag(Avx512Flags::kZ); }
|
||||
//! Tests whether the instruction supports AVX512 embedded-rounding {er}.
|
||||
inline bool hasAvx512ER() const noexcept { return hasAvx512Flag(Avx512Flags::kER); }
|
||||
//! Tests whether the instruction supports AVX512 suppress-all-exceptions {sae}.
|
||||
inline bool hasAvx512SAE() const noexcept { return hasAvx512Flag(Avx512Flags::kSAE); }
|
||||
//! Tests whether the instruction supports AVX512 broadcast (either 32-bit or 64-bit).
|
||||
inline bool hasAvx512B() const noexcept { return hasAvx512Flag(Avx512Flags::kB16 | Avx512Flags::kB32 | Avx512Flags::kB64); }
|
||||
//! Tests whether the instruction supports AVX512 broadcast (16-bit).
|
||||
inline bool hasAvx512B16() const noexcept { return hasAvx512Flag(Avx512Flags::kB16); }
|
||||
//! Tests whether the instruction supports AVX512 broadcast (32-bit).
|
||||
inline bool hasAvx512B32() const noexcept { return hasAvx512Flag(Avx512Flags::kB32); }
|
||||
//! Tests whether the instruction supports AVX512 broadcast (64-bit).
|
||||
inline bool hasAvx512B64() const noexcept { return hasAvx512Flag(Avx512Flags::kB64); }
|
||||
|
||||
//! Returns a control flow category of the instruction.
|
||||
inline InstControlFlow controlFlow() const noexcept { return commonInfo().controlFlow(); }
|
||||
//! Returns a hint that can be used when both inputs are the same register.
|
||||
inline InstSameRegHint sameRegHint() const noexcept { return commonInfo().sameRegHint(); }
|
||||
|
||||
inline uint32_t signatureIndex() const noexcept { return commonInfo().signatureIndex(); }
|
||||
inline uint32_t signatureCount() const noexcept { return commonInfo().signatureCount(); }
|
||||
|
||||
inline const InstSignature* signatureData() const noexcept { return commonInfo().signatureData(); }
|
||||
inline const InstSignature* signatureEnd() const noexcept { return commonInfo().signatureEnd(); }
|
||||
|
||||
//! \}
|
||||
};
|
||||
|
||||
ASMJIT_VARAPI const InstInfo _instInfoTable[];
|
||||
|
||||
static inline const InstInfo& infoById(InstId instId) noexcept {
|
||||
ASMJIT_ASSERT(Inst::isDefinedId(instId));
|
||||
return _instInfoTable[instId];
|
||||
}
|
||||
|
||||
//! \cond INTERNAL
|
||||
static_assert(sizeof(OpSignature) == 8, "InstDB::OpSignature must be 8 bytes long");
|
||||
//! \endcond
|
||||
|
||||
} // {InstDB}
|
||||
|
||||
//! \}
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // ASMJIT_X86_X86INSTDB_H_INCLUDED
|
||||
311
lib/lepton/asmjit/x86/x86instdb_p.h
Normal file
311
lib/lepton/asmjit/x86/x86instdb_p.h
Normal file
@ -0,0 +1,311 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#ifndef ASMJIT_X86_X86INSTDB_P_H_INCLUDED
|
||||
#define ASMJIT_X86_X86INSTDB_P_H_INCLUDED
|
||||
|
||||
#include "../x86/x86instdb.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
//! \cond INTERNAL
|
||||
//! \addtogroup asmjit_x86
|
||||
//! \{
|
||||
|
||||
namespace InstDB {
|
||||
|
||||
//! Instruction encoding (X86).
|
||||
//!
|
||||
//! This is a specific identifier that is used by AsmJit to describe the way each instruction is encoded. Some
|
||||
//! encodings are special only for a single instruction as X86 instruction set contains a lot of legacy encodings,
|
||||
//! and some encodings describe a group of instructions that share some commons, like MMX, SSE, AVX, AVX512
|
||||
//! instructions, etc...
|
||||
enum EncodingId : uint32_t {
|
||||
kEncodingNone = 0, //!< Never used.
|
||||
kEncodingX86Op, //!< X86 [OP].
|
||||
kEncodingX86Op_Mod11RM, //!< X86 [OP] (opcode with ModRM byte where MOD must be 11b).
|
||||
kEncodingX86Op_Mod11RM_I8, //!< X86 [OP] (opcode with ModRM byte + 8-bit immediate).
|
||||
kEncodingX86Op_xAddr, //!< X86 [OP] (implicit address in the first register operand).
|
||||
kEncodingX86Op_xAX, //!< X86 [OP] (implicit or explicit '?AX' form).
|
||||
kEncodingX86Op_xDX_xAX, //!< X86 [OP] (implicit or explicit '?DX, ?AX' form).
|
||||
kEncodingX86Op_MemZAX, //!< X86 [OP] (implicit or explicit '[EAX|RAX]' form).
|
||||
kEncodingX86I_xAX, //!< X86 [I] (implicit or explicit '?AX' form).
|
||||
kEncodingX86M, //!< X86 [M] (handles 2|4|8-bytes size).
|
||||
kEncodingX86M_NoMemSize, //!< X86 [M] (handles 2|4|8-bytes size, but doesn't consider memory size).
|
||||
kEncodingX86M_NoSize, //!< X86 [M] (doesn't handle any size).
|
||||
kEncodingX86M_GPB, //!< X86 [M] (handles single-byte size).
|
||||
kEncodingX86M_GPB_MulDiv, //!< X86 [M] (like GPB, handles implicit|explicit MUL|DIV|IDIV).
|
||||
kEncodingX86M_Only, //!< X86 [M] (restricted to memory operand of any size).
|
||||
kEncodingX86M_Only_EDX_EAX, //!< X86 [M] (memory operand only, followed by implicit <edx> and <eax>).
|
||||
kEncodingX86M_Nop, //!< X86 [M] (special case of NOP instruction).
|
||||
kEncodingX86R_Native, //!< X86 [R] (register must be either 32-bit or 64-bit depending on arch).
|
||||
kEncodingX86R_FromM, //!< X86 [R] - which specifies memory address.
|
||||
kEncodingX86R32_EDX_EAX, //!< X86 [R32] followed by implicit EDX and EAX.
|
||||
kEncodingX86Rm, //!< X86 [RM] (doesn't handle single-byte size).
|
||||
kEncodingX86Rm_Raw66H, //!< X86 [RM] (used by LZCNT, POPCNT, and TZCNT).
|
||||
kEncodingX86Rm_NoSize, //!< X86 [RM] (doesn't add REX.W prefix if 64-bit reg is used).
|
||||
kEncodingX86Mr, //!< X86 [MR] (doesn't handle single-byte size).
|
||||
kEncodingX86Mr_NoSize, //!< X86 [MR] (doesn't handle any size).
|
||||
kEncodingX86Arith, //!< X86 adc, add, and, cmp, or, sbb, sub, xor.
|
||||
kEncodingX86Bswap, //!< X86 bswap.
|
||||
kEncodingX86Bt, //!< X86 bt, btc, btr, bts.
|
||||
kEncodingX86Call, //!< X86 call.
|
||||
kEncodingX86Cmpxchg, //!< X86 [MR] cmpxchg.
|
||||
kEncodingX86Cmpxchg8b_16b, //!< X86 [MR] cmpxchg8b, cmpxchg16b.
|
||||
kEncodingX86Crc, //!< X86 crc32.
|
||||
kEncodingX86Enter, //!< X86 enter.
|
||||
kEncodingX86Imul, //!< X86 imul.
|
||||
kEncodingX86In, //!< X86 in.
|
||||
kEncodingX86Ins, //!< X86 ins[b|q|d].
|
||||
kEncodingX86IncDec, //!< X86 inc, dec.
|
||||
kEncodingX86Int, //!< X86 int (interrupt).
|
||||
kEncodingX86Jcc, //!< X86 jcc.
|
||||
kEncodingX86JecxzLoop, //!< X86 jcxz, jecxz, jrcxz, loop, loope, loopne.
|
||||
kEncodingX86Jmp, //!< X86 jmp.
|
||||
kEncodingX86JmpRel, //!< X86 xbegin.
|
||||
kEncodingX86LcallLjmp, //!< X86 lcall/ljmp.
|
||||
kEncodingX86Lea, //!< X86 lea.
|
||||
kEncodingX86Mov, //!< X86 mov (all possible cases).
|
||||
kEncodingX86Movabs, //!< X86 movabs.
|
||||
kEncodingX86MovsxMovzx, //!< X86 movsx, movzx.
|
||||
kEncodingX86MovntiMovdiri, //!< X86 movnti/movdiri.
|
||||
kEncodingX86EnqcmdMovdir64b, //!< X86 enqcmd/enqcmds/movdir64b.
|
||||
kEncodingX86Out, //!< X86 out.
|
||||
kEncodingX86Outs, //!< X86 out[b|w|d].
|
||||
kEncodingX86Push, //!< X86 push.
|
||||
kEncodingX86Pop, //!< X86 pop.
|
||||
kEncodingX86Ret, //!< X86 ret.
|
||||
kEncodingX86Rot, //!< X86 rcl, rcr, rol, ror, sal, sar, shl, shr.
|
||||
kEncodingX86Set, //!< X86 setcc.
|
||||
kEncodingX86ShldShrd, //!< X86 shld, shrd.
|
||||
kEncodingX86StrRm, //!< X86 lods.
|
||||
kEncodingX86StrMr, //!< X86 scas, stos.
|
||||
kEncodingX86StrMm, //!< X86 cmps, movs.
|
||||
kEncodingX86Test, //!< X86 test.
|
||||
kEncodingX86Xadd, //!< X86 xadd.
|
||||
kEncodingX86Xchg, //!< X86 xchg.
|
||||
kEncodingX86Fence, //!< X86 lfence, mfence, sfence.
|
||||
kEncodingX86Bndmov, //!< X86 [RM|MR] (used by BNDMOV).
|
||||
kEncodingFpuOp, //!< FPU [OP].
|
||||
kEncodingFpuArith, //!< FPU fadd, fdiv, fdivr, fmul, fsub, fsubr.
|
||||
kEncodingFpuCom, //!< FPU fcom, fcomp.
|
||||
kEncodingFpuFldFst, //!< FPU fld, fst, fstp.
|
||||
kEncodingFpuM, //!< FPU fiadd, ficom, ficomp, fidiv, fidivr, fild, fimul, fist, fistp, fisttp, fisub, fisubr.
|
||||
kEncodingFpuR, //!< FPU fcmov, fcomi, fcomip, ffree, fucom, fucomi, fucomip, fucomp, fxch.
|
||||
kEncodingFpuRDef, //!< FPU faddp, fdivp, fdivrp, fmulp, fsubp, fsubrp.
|
||||
kEncodingFpuStsw, //!< FPU fnstsw, Fstsw.
|
||||
kEncodingExtRm, //!< EXT [RM].
|
||||
kEncodingExtRm_XMM0, //!< EXT [RM<XMM0>].
|
||||
kEncodingExtRm_ZDI, //!< EXT [RM<ZDI>].
|
||||
kEncodingExtRm_P, //!< EXT [RM] (propagates 66H if the instruction uses XMM register).
|
||||
kEncodingExtRm_Wx, //!< EXT [RM] (propagates REX.W if GPQ is used or the second operand is GPQ/QWORD_PTR).
|
||||
kEncodingExtRm_Wx_GpqOnly, //!< EXT [RM] (propagates REX.W if the first operand is GPQ register).
|
||||
kEncodingExtRmRi, //!< EXT [RM|RI].
|
||||
kEncodingExtRmRi_P, //!< EXT [RM|RI] (propagates 66H if the instruction uses XMM register).
|
||||
kEncodingExtRmi, //!< EXT [RMI].
|
||||
kEncodingExtRmi_P, //!< EXT [RMI] (propagates 66H if the instruction uses XMM register).
|
||||
kEncodingExtPextrw, //!< EXT pextrw.
|
||||
kEncodingExtExtract, //!< EXT pextrb, pextrd, pextrq, extractps.
|
||||
kEncodingExtMov, //!< EXT mov?? - #1:[MM|XMM, MM|XMM|Mem] #2:[MM|XMM|Mem, MM|XMM].
|
||||
kEncodingExtMovbe, //!< EXT movbe.
|
||||
kEncodingExtMovd, //!< EXT movd.
|
||||
kEncodingExtMovq, //!< EXT movq.
|
||||
kEncodingExtExtrq, //!< EXT extrq (SSE4A).
|
||||
kEncodingExtInsertq, //!< EXT insrq (SSE4A).
|
||||
kEncodingExt3dNow, //!< EXT [RMI] (3DNOW specific).
|
||||
kEncodingVexOp, //!< VEX [OP].
|
||||
kEncodingVexOpMod, //!< VEX [OP] with MODR/M.
|
||||
kEncodingVexKmov, //!< VEX [RM|MR] (used by kmov[b|w|d|q]).
|
||||
kEncodingVexR_Wx, //!< VEX|EVEX [R] (propagatex VEX.W if GPQ used).
|
||||
kEncodingVexM, //!< VEX|EVEX [M].
|
||||
kEncodingVexM_VM, //!< VEX|EVEX [M] (propagates VEX|EVEX.L, VSIB support).
|
||||
kEncodingVexMr_Lx, //!< VEX|EVEX [MR] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexMr_VM, //!< VEX|EVEX [MR] (VSIB support).
|
||||
kEncodingVexMri, //!< VEX|EVEX [MRI].
|
||||
kEncodingVexMri_Lx, //!< VEX|EVEX [MRI] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexMri_Vpextrw, //!< VEX|EVEX [MRI] (special case required by VPEXTRW instruction).
|
||||
kEncodingVexRm, //!< VEX|EVEX [RM].
|
||||
kEncodingVexRm_ZDI, //!< VEX|EVEX [RM<ZDI>].
|
||||
kEncodingVexRm_Wx, //!< VEX|EVEX [RM] (propagates VEX|EVEX.W if GPQ used).
|
||||
kEncodingVexRm_Lx, //!< VEX|EVEX [RM] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexRm_Lx_Narrow, //!< VEX|EVEX [RM] (the destination vector size is narrowed).
|
||||
kEncodingVexRm_Lx_Bcst, //!< VEX|EVEX [RM] (can handle broadcast r32/r64).
|
||||
kEncodingVexRm_VM, //!< VEX|EVEX [RM] (propagates VEX|EVEX.L, VSIB support).
|
||||
kEncodingVexRm_T1_4X, //!< EVEX [RM] (used by NN instructions that use RM-T1_4X encoding).
|
||||
kEncodingVexRmi, //!< VEX|EVEX [RMI].
|
||||
kEncodingVexRmi_Wx, //!< VEX|EVEX [RMI] (propagates VEX|EVEX.W if GPQ used).
|
||||
kEncodingVexRmi_Lx, //!< VEX|EVEX [RMI] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexRvm, //!< VEX|EVEX [RVM].
|
||||
kEncodingVexRvm_Wx, //!< VEX|EVEX [RVM] (propagates VEX|EVEX.W if GPQ used).
|
||||
kEncodingVexRvm_ZDX_Wx, //!< VEX|EVEX [RVM<ZDX>] (propagates VEX|EVEX.W if GPQ used).
|
||||
kEncodingVexRvm_Lx, //!< VEX|EVEX [RVM] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexRvm_Lx_KEvex, //!< VEX|EVEX [RVM] (forces EVEX prefix if K register is used on destination).
|
||||
kEncodingVexRvm_Lx_2xK, //!< VEX|EVEX [RVM] (vp2intersectd/vp2intersectq).
|
||||
kEncodingVexRvmr, //!< VEX|EVEX [RVMR].
|
||||
kEncodingVexRvmr_Lx, //!< VEX|EVEX [RVMR] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexRvmi, //!< VEX|EVEX [RVMI].
|
||||
kEncodingVexRvmi_KEvex, //!< VEX|EVEX [RVMI] (forces EVEX prefix if K register is used on destination).
|
||||
kEncodingVexRvmi_Lx, //!< VEX|EVEX [RVMI] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexRvmi_Lx_KEvex, //!< VEX|EVEX [RVMI] (forces EVEX prefix if K register is used on destination).
|
||||
kEncodingVexRmv, //!< VEX|EVEX [RMV].
|
||||
kEncodingVexRmv_Wx, //!< VEX|EVEX [RMV] (propagates VEX|EVEX.W if GPQ used).
|
||||
kEncodingVexRmv_VM, //!< VEX|EVEX [RMV] (propagates VEX|EVEX.L, VSIB support).
|
||||
kEncodingVexRmvRm_VM, //!< VEX|EVEX [RMV|RM] (propagates VEX|EVEX.L, VSIB support).
|
||||
kEncodingVexRmvi, //!< VEX|EVEX [RMVI].
|
||||
kEncodingVexRmMr, //!< VEX|EVEX [RM|MR].
|
||||
kEncodingVexRmMr_Lx, //!< VEX|EVEX [RM|MR] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexRvmRmv, //!< VEX|EVEX [RVM|RMV].
|
||||
kEncodingVexRvmRmi, //!< VEX|EVEX [RVM|RMI].
|
||||
kEncodingVexRvmRmi_Lx, //!< VEX|EVEX [RVM|RMI] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexRvmRmvRmi, //!< VEX|EVEX [RVM|RMV|RMI].
|
||||
kEncodingVexRvmMr, //!< VEX|EVEX [RVM|MR].
|
||||
kEncodingVexRvmMvr, //!< VEX|EVEX [RVM|MVR].
|
||||
kEncodingVexRvmMvr_Lx, //!< VEX|EVEX [RVM|MVR] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexRvmVmi, //!< VEX|EVEX [RVM|VMI].
|
||||
kEncodingVexRvmVmi_Lx, //!< VEX|EVEX [RVM|VMI] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexRvmVmi_Lx_MEvex, //!< VEX|EVEX [RVM|VMI] (propagates EVEX if the second operand is memory).
|
||||
kEncodingVexVm, //!< VEX|EVEX [VM].
|
||||
kEncodingVexVm_Wx, //!< VEX|EVEX [VM] (propagates VEX|EVEX.W if GPQ used).
|
||||
kEncodingVexVmi, //!< VEX|EVEX [VMI].
|
||||
kEncodingVexVmi_Lx, //!< VEX|EVEX [VMI] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexVmi4_Wx, //!< VEX|EVEX [VMI] (propagates VEX|EVEX.W if GPQ used, DWORD Immediate).
|
||||
kEncodingVexVmi_Lx_MEvex, //!< VEX|EVEX [VMI] (force EVEX prefix when the second operand is memory)
|
||||
kEncodingVexRvrmRvmr, //!< VEX|EVEX [RVRM|RVMR].
|
||||
kEncodingVexRvrmRvmr_Lx, //!< VEX|EVEX [RVRM|RVMR] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexRvrmiRvmri_Lx, //!< VEX|EVEX [RVRMI|RVMRI] (propagates VEX|EVEX.L if YMM used).
|
||||
kEncodingVexMovdMovq, //!< VEX|EVEX vmovd, vmovq.
|
||||
kEncodingVexMovssMovsd, //!< VEX|EVEX vmovss, vmovsd.
|
||||
kEncodingFma4, //!< FMA4 [R, R, R/M, R/M].
|
||||
kEncodingFma4_Lx, //!< FMA4 [R, R, R/M, R/M] (propagates AVX.L if YMM used).
|
||||
kEncodingAmxCfg, //!< AMX ldtilecfg/sttilecfg.
|
||||
kEncodingAmxR, //!< AMX [R] - tilezero.
|
||||
kEncodingAmxRm, //!< AMX tileloadd/tileloaddt1.
|
||||
kEncodingAmxMr, //!< AMX tilestored.
|
||||
kEncodingAmxRmv, //!< AMX instructions that use TMM registers.
|
||||
kEncodingCount //!< Count of instruction encodings.
|
||||
};
|
||||
|
||||
//! Additional information table, provides CPU extensions required to execute an instruction and RW flags.
|
||||
struct AdditionalInfo {
|
||||
//! Index to `_instFlagsTable`.
|
||||
uint8_t _instFlagsIndex;
|
||||
//! Index to `_rwFlagsTable`.
|
||||
uint8_t _rwFlagsIndex;
|
||||
//! Features vector.
|
||||
uint8_t _features[6];
|
||||
|
||||
inline const uint8_t* featuresBegin() const noexcept { return _features; }
|
||||
inline const uint8_t* featuresEnd() const noexcept { return _features + ASMJIT_ARRAY_SIZE(_features); }
|
||||
};
|
||||
|
||||
// ${NameLimits:Begin}
|
||||
// ------------------- Automatically generated, do not edit -------------------
|
||||
enum : uint32_t { kMaxNameSize = 17 };
|
||||
// ----------------------------------------------------------------------------
|
||||
// ${NameLimits:End}
|
||||
|
||||
struct InstNameIndex {
|
||||
uint16_t start;
|
||||
uint16_t end;
|
||||
};
|
||||
|
||||
struct RWInfo {
|
||||
enum Category : uint8_t {
|
||||
kCategoryGeneric,
|
||||
kCategoryMov,
|
||||
kCategoryMovabs,
|
||||
kCategoryImul,
|
||||
kCategoryMovh64,
|
||||
kCategoryPunpcklxx,
|
||||
kCategoryVmaskmov,
|
||||
kCategoryVmovddup,
|
||||
kCategoryVmovmskpd,
|
||||
kCategoryVmovmskps,
|
||||
kCategoryVmov1_2,
|
||||
kCategoryVmov1_4,
|
||||
kCategoryVmov1_8,
|
||||
kCategoryVmov2_1,
|
||||
kCategoryVmov4_1,
|
||||
kCategoryVmov8_1
|
||||
};
|
||||
|
||||
uint8_t category;
|
||||
uint8_t rmInfo;
|
||||
uint8_t opInfoIndex[6];
|
||||
};
|
||||
|
||||
struct RWInfoOp {
|
||||
uint64_t rByteMask;
|
||||
uint64_t wByteMask;
|
||||
uint8_t physId;
|
||||
uint8_t consecutiveLeadCount;
|
||||
uint8_t reserved[2];
|
||||
OpRWFlags flags;
|
||||
};
|
||||
|
||||
//! R/M information.
|
||||
//!
|
||||
//! This data is used to replace register operand by a memory operand reliably.
|
||||
struct RWInfoRm {
|
||||
enum Category : uint8_t {
|
||||
kCategoryNone = 0,
|
||||
kCategoryFixed,
|
||||
kCategoryConsistent,
|
||||
kCategoryHalf,
|
||||
kCategoryQuarter,
|
||||
kCategoryEighth
|
||||
};
|
||||
|
||||
enum Flags : uint8_t {
|
||||
kFlagAmbiguous = 0x01,
|
||||
//! Special semantics for PEXTRW - memory operand can only be used with SSE4.1 instruction and it's forbidden in MMX.
|
||||
kFlagPextrw = 0x02,
|
||||
//! Special semantics for MOVSS and MOVSD - doesn't zero extend the destination if the operation is a reg to reg move.
|
||||
kFlagMovssMovsd = 0x04,
|
||||
//! Special semantics for AVX shift instructions that do not provide reg/mem in AVX/AVX2 mode (AVX-512 is required).
|
||||
kFlagFeatureIfRMI = 0x08
|
||||
};
|
||||
|
||||
uint8_t category;
|
||||
uint8_t rmOpsMask;
|
||||
uint8_t fixedSize;
|
||||
uint8_t flags;
|
||||
uint8_t rmFeature;
|
||||
};
|
||||
|
||||
struct RWFlagsInfoTable {
|
||||
//! CPU/FPU flags read.
|
||||
uint32_t readFlags;
|
||||
//! CPU/FPU flags written or undefined.
|
||||
uint32_t writeFlags;
|
||||
};
|
||||
|
||||
extern const uint8_t rwInfoIndexA[Inst::_kIdCount];
|
||||
extern const uint8_t rwInfoIndexB[Inst::_kIdCount];
|
||||
extern const RWInfo rwInfoA[];
|
||||
extern const RWInfo rwInfoB[];
|
||||
extern const RWInfoOp rwInfoOp[];
|
||||
extern const RWInfoRm rwInfoRm[];
|
||||
extern const RWFlagsInfoTable _rwFlagsInfoTable[];
|
||||
extern const InstRWFlags _instFlagsTable[];
|
||||
|
||||
extern const uint32_t _mainOpcodeTable[];
|
||||
extern const uint32_t _altOpcodeTable[];
|
||||
|
||||
#ifndef ASMJIT_NO_TEXT
|
||||
extern const char _nameData[];
|
||||
extern const InstNameIndex instNameIndex[26];
|
||||
#endif // !ASMJIT_NO_TEXT
|
||||
|
||||
extern const AdditionalInfo _additionalInfoTable[];
|
||||
|
||||
} // {InstDB}
|
||||
|
||||
//! \}
|
||||
//! \endcond
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // ASMJIT_X86_X86INSTDB_P_H_INCLUDED
|
||||
436
lib/lepton/asmjit/x86/x86opcode_p.h
Normal file
436
lib/lepton/asmjit/x86/x86opcode_p.h
Normal file
@ -0,0 +1,436 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#ifndef ASMJIT_X86_X86OPCODE_P_H_INCLUDED
|
||||
#define ASMJIT_X86_X86OPCODE_P_H_INCLUDED
|
||||
|
||||
#include "../x86/x86globals.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
//! \cond INTERNAL
|
||||
//! \addtogroup asmjit_x86
|
||||
//! \{
|
||||
|
||||
//! Helper class to store and manipulate X86 opcodes.
|
||||
//!
|
||||
//! The first 8 least significant bits describe the opcode byte as defined in ISA manuals, all other bits
|
||||
//! describe other properties like prefixes, see `Opcode::Bits` for more information.
|
||||
struct Opcode {
|
||||
uint32_t v;
|
||||
|
||||
//! Describes a meaning of all bits of AsmJit's 32-bit opcode value.
|
||||
//!
|
||||
//! This schema is AsmJit specific and has been designed to allow encoding of all X86 instructions available. X86,
|
||||
//! MMX, and SSE+ instructions always use `MM` and `PP` fields, which are encoded to corresponding prefixes needed
|
||||
//! by X86 or SIMD instructions. AVX+ instructions embed `MMMMM` and `PP` fields in a VEX prefix, and AVX-512
|
||||
//! instructions embed `MM` and `PP` in EVEX prefix.
|
||||
//!
|
||||
//! The instruction opcode definition uses 1 or 2 bytes as an opcode value. 1 byte is needed by most of the
|
||||
//! instructions, 2 bytes are only used by legacy X87-FPU instructions. This means that a second byte is free to
|
||||
//! by used by instructions encoded by using VEX and/or EVEX prefix.
|
||||
//!
|
||||
//! The fields description:
|
||||
//!
|
||||
//! - `MM` field is used to encode prefixes needed by the instruction or as a part of VEX/EVEX prefix. Described as
|
||||
//! `mm` and `mmmmm` in instruction manuals.
|
||||
//!
|
||||
//! NOTE: Since `MM` field is defined as `mmmmm` (5 bits), but only 2 least significant bits are used by VEX and
|
||||
//! EVEX prefixes, and additional 4th bit is used by XOP prefix, AsmJit uses the 3rd and 5th bit for it's own
|
||||
//! purposes. These bits will probably never be used in future encodings as AVX512 uses only `000mm` from `mmmmm`.
|
||||
//!
|
||||
//! - `PP` field is used to encode prefixes needed by the instruction or as a part of VEX/EVEX prefix. Described as
|
||||
//! `pp` in instruction manuals.
|
||||
//!
|
||||
//! - `LL` field is used exclusively by AVX+ and AVX512+ instruction sets. It describes vector size, which is `L.128`
|
||||
//! for XMM register, `L.256` for for YMM register, and `L.512` for ZMM register. The `LL` field is omitted in case
|
||||
//! that instruction supports multiple vector lengths, however, if the instruction requires specific `L` value it
|
||||
//! must be specified as a part of the opcode.
|
||||
//!
|
||||
//! NOTE: `LL` having value `11` is not defined yet.
|
||||
//!
|
||||
//! - `W` field is the most complicated. It was added by 64-bit architecture to promote default operation width
|
||||
//! (instructions that perform 32-bit operation by default require to override the width to 64-bit explicitly).
|
||||
//! There is nothing wrong on this, however, some instructions introduced implicit `W` override, for example a
|
||||
//! `cdqe` instruction is basically a `cwde` instruction with overridden `W` (set to 1). There are some others
|
||||
//! in the base X86 instruction set. More recent instruction sets started using `W` field more often:
|
||||
//!
|
||||
//! - AVX instructions started using `W` field as an extended opcode for FMA, GATHER, PERM, and other instructions.
|
||||
//! It also uses `W` field to override the default operation width in instructions like `vmovq`.
|
||||
//!
|
||||
//! - AVX-512 instructions started using `W` field as an extended opcode for all new instructions. This wouldn't
|
||||
//! have been an issue if the `W` field of AVX-512 have matched AVX, but this is not always the case.
|
||||
//!
|
||||
//! - `O` field is an extended opcode field (3 bits) embedded in ModR/M BYTE.
|
||||
//!
|
||||
//! - `CDSHL` and `CDTT` fields describe 'compressed-displacement'. `CDSHL` is defined for each instruction that is
|
||||
//! AVX-512 encodable (EVEX) and contains a base N shift (base shift to perform the calculation). The `CDTT` field
|
||||
//! is derived from instruction specification and describes additional shift to calculate the final `CDSHL` that
|
||||
//! will be used in SIB byte.
|
||||
//!
|
||||
//! \note Don't reorder any fields here, the shifts and masks were defined carefully to make encoding of X86
|
||||
//! instructions fast, especially to construct REX, VEX, and EVEX prefixes in the most efficient way. Changing
|
||||
//! values defined by these enums many cause AsmJit to emit invalid binary representations of instructions passed to
|
||||
//! `x86::Assembler::_emit`.
|
||||
enum Bits : uint32_t {
|
||||
// MM & VEX & EVEX & XOP
|
||||
// ---------------------
|
||||
//
|
||||
// Two meanings:
|
||||
// * Part of a legacy opcode (prefixes emitted before the main opcode byte).
|
||||
// * `MMMMM` field in VEX|EVEX|XOP instruction.
|
||||
//
|
||||
// AVX reserves 5 bits for `MMMMM` field, however AVX instructions only use 2 bits and XOP 3 bits. AVX-512 shrinks
|
||||
// `MMMMM` field into `MMM` so it's safe to use [4:3] bits of `MMMMM` field for internal payload.
|
||||
//
|
||||
// AsmJit divides MMMMM field into this layout:
|
||||
//
|
||||
// [2:0] - Used to describe 0F, 0F38 and 0F3A legacy prefix bytes and 3 bits of MMMMM field for XOP/AVX/AVX512.
|
||||
// [3] - Required by XOP instructions, so we use this bit also to indicate that this is a XOP opcode.
|
||||
// [4] - Used to force EVEX prefix - this bit is not used by any X86 instruction yet, so AsmJit uses it to
|
||||
// describe EVEX only instructions or sets its bit when user uses InstOptions::kX86_Evex to force EVEX.
|
||||
kMM_Shift = 8,
|
||||
kMM_Mask = 0x1Fu << kMM_Shift,
|
||||
kMM_00 = 0x00u << kMM_Shift,
|
||||
kMM_0F = 0x01u << kMM_Shift,
|
||||
kMM_0F38 = 0x02u << kMM_Shift,
|
||||
kMM_0F3A = 0x03u << kMM_Shift, // Described also as XOP.M3 in AMD manuals.
|
||||
kMM_0F01 = 0x04u << kMM_Shift, // AsmJit way to describe 0F01 (never VEX/EVEX).
|
||||
|
||||
kMM_MAP5 = 0x05u << kMM_Shift, // EVEX.MAP5.
|
||||
kMM_MAP6 = 0x06u << kMM_Shift, // EVEX.MAP6.
|
||||
|
||||
// `XOP` field is only used to force XOP prefix instead of VEX3 prefix. We know XOP encodings always use 0b1000
|
||||
// bit of MM field and that no VEX and EVEX instruction use such bit yet, so we can use this bit to force XOP
|
||||
// prefix to be emitted instead of VEX3 prefix. See `x86VEXPrefix` defined in `x86assembler.cpp`.
|
||||
kMM_XOP08 = 0x08u << kMM_Shift, // XOP.M8.
|
||||
kMM_XOP09 = 0x09u << kMM_Shift, // XOP.M9.
|
||||
kMM_XOP0A = 0x0Au << kMM_Shift, // XOP.MA.
|
||||
|
||||
kMM_IsXOP_Shift= kMM_Shift + 3,
|
||||
kMM_IsXOP = kMM_XOP08,
|
||||
|
||||
// NOTE: Force VEX3 allows to force to emit VEX3 instead of VEX2 in some cases (similar to forcing REX prefix).
|
||||
// Force EVEX will force emitting EVEX prefix instead of VEX2|VEX3. EVEX-only instructions will have ForceEvex
|
||||
// always set, however. instructions that can be encoded by either VEX or EVEX prefix should not have ForceEvex
|
||||
// set.
|
||||
kMM_ForceEvex = 0x10u << kMM_Shift, // Force 4-BYTE EVEX prefix.
|
||||
|
||||
// FPU_2B - Second-Byte of the Opcode used by FPU
|
||||
// ----------------------------------------------
|
||||
//
|
||||
// Second byte opcode. This BYTE is ONLY used by FPU instructions and collides with 3 bits from `MM` and 5 bits
|
||||
// from 'CDSHL' and 'CDTT'. It's fine as FPU and AVX512 flags are never used at the same time.
|
||||
kFPU_2B_Shift = 10,
|
||||
kFPU_2B_Mask = 0xFF << kFPU_2B_Shift,
|
||||
|
||||
// CDSHL & CDTT
|
||||
// ------------
|
||||
//
|
||||
// Compressed displacement bits.
|
||||
//
|
||||
// Each opcode defines the base size (N) shift:
|
||||
// [0]: BYTE (1 byte).
|
||||
// [1]: WORD (2 bytes).
|
||||
// [2]: DWORD (4 bytes - float/int32).
|
||||
// [3]: QWORD (8 bytes - double/int64).
|
||||
// [4]: OWORD (16 bytes - used by FV|FVM|M128).
|
||||
//
|
||||
// Which is then scaled by the instruction's TT (TupleType) into possible:
|
||||
// [5]: YWORD (32 bytes)
|
||||
// [6]: ZWORD (64 bytes)
|
||||
//
|
||||
// These bits are then adjusted before calling EmitModSib or EmitModVSib.
|
||||
kCDSHL_Shift = 13,
|
||||
kCDSHL_Mask = 0x7u << kCDSHL_Shift,
|
||||
|
||||
kCDSHL__ = 0x0u << kCDSHL_Shift, // Base element size not used.
|
||||
kCDSHL_0 = 0x0u << kCDSHL_Shift, // N << 0.
|
||||
kCDSHL_1 = 0x1u << kCDSHL_Shift, // N << 1.
|
||||
kCDSHL_2 = 0x2u << kCDSHL_Shift, // N << 2.
|
||||
kCDSHL_3 = 0x3u << kCDSHL_Shift, // N << 3.
|
||||
kCDSHL_4 = 0x4u << kCDSHL_Shift, // N << 4.
|
||||
kCDSHL_5 = 0x5u << kCDSHL_Shift, // N << 5.
|
||||
|
||||
// Compressed displacement tuple-type (specific to AsmJit).
|
||||
//
|
||||
// Since we store the base offset independently of CDTT we can simplify the number of 'TUPLE_TYPE' groups
|
||||
// significantly and just handle special cases.
|
||||
kCDTT_Shift = 16,
|
||||
kCDTT_Mask = 0x3u << kCDTT_Shift,
|
||||
kCDTT_None = 0x0u << kCDTT_Shift, // Does nothing.
|
||||
kCDTT_ByLL = 0x1u << kCDTT_Shift, // Scales by LL (1x 2x 4x).
|
||||
kCDTT_T1W = 0x2u << kCDTT_Shift, // Used to add 'W' to the shift.
|
||||
kCDTT_DUP = 0x3u << kCDTT_Shift, // Special 'VMOVDDUP' case.
|
||||
|
||||
// Aliases that match names used in instruction manuals.
|
||||
kCDTT__ = kCDTT_None,
|
||||
kCDTT_FV = kCDTT_ByLL,
|
||||
kCDTT_HV = kCDTT_ByLL,
|
||||
kCDTT_QV = kCDTT_ByLL,
|
||||
kCDTT_FVM = kCDTT_ByLL,
|
||||
kCDTT_T1S = kCDTT_None,
|
||||
kCDTT_T1F = kCDTT_None,
|
||||
kCDTT_T1_4X = kCDTT_None,
|
||||
kCDTT_T4X = kCDTT_None, // Alias to have only 3 letters.
|
||||
kCDTT_T2 = kCDTT_None,
|
||||
kCDTT_T4 = kCDTT_None,
|
||||
kCDTT_T8 = kCDTT_None,
|
||||
kCDTT_HVM = kCDTT_ByLL,
|
||||
kCDTT_QVM = kCDTT_ByLL,
|
||||
kCDTT_OVM = kCDTT_ByLL,
|
||||
kCDTT_128 = kCDTT_None,
|
||||
|
||||
// `O` Field in ModR/M (??:xxx:???)
|
||||
// --------------------------------
|
||||
|
||||
kModO_Shift = 18,
|
||||
kModO_Mask = 0x7u << kModO_Shift,
|
||||
|
||||
kModO__ = 0x0u,
|
||||
kModO_0 = 0x0u << kModO_Shift,
|
||||
kModO_1 = 0x1u << kModO_Shift,
|
||||
kModO_2 = 0x2u << kModO_Shift,
|
||||
kModO_3 = 0x3u << kModO_Shift,
|
||||
kModO_4 = 0x4u << kModO_Shift,
|
||||
kModO_5 = 0x5u << kModO_Shift,
|
||||
kModO_6 = 0x6u << kModO_Shift,
|
||||
kModO_7 = 0x7u << kModO_Shift,
|
||||
|
||||
// `RM` Field in ModR/M (??:???:xxx)
|
||||
// ---------------------------------
|
||||
//
|
||||
// Second data field used by ModR/M byte. This is only used by few instructions that use OPCODE+MOD/RM where both
|
||||
// values in Mod/RM are part of the opcode.
|
||||
|
||||
kModRM_Shift = 13,
|
||||
kModRM_Mask = 0x7u << kModRM_Shift,
|
||||
|
||||
kModRM__ = 0x0u,
|
||||
kModRM_0 = 0x0u << kModRM_Shift,
|
||||
kModRM_1 = 0x1u << kModRM_Shift,
|
||||
kModRM_2 = 0x2u << kModRM_Shift,
|
||||
kModRM_3 = 0x3u << kModRM_Shift,
|
||||
kModRM_4 = 0x4u << kModRM_Shift,
|
||||
kModRM_5 = 0x5u << kModRM_Shift,
|
||||
kModRM_6 = 0x6u << kModRM_Shift,
|
||||
kModRM_7 = 0x7u << kModRM_Shift,
|
||||
|
||||
// `PP` Field
|
||||
// ----------
|
||||
//
|
||||
// These fields are stored deliberately right after each other as it makes it easier to construct VEX prefix from
|
||||
// the opcode value stored in the instruction database.
|
||||
//
|
||||
// Two meanings:
|
||||
// * "PP" field in AVX/XOP/AVX-512 instruction.
|
||||
// * Mandatory Prefix in legacy encoding.
|
||||
//
|
||||
// AVX reserves 2 bits for `PP` field, but AsmJit extends the storage by 1 more bit that is used to emit 9B prefix
|
||||
// for some X87-FPU instructions.
|
||||
|
||||
kPP_Shift = 21,
|
||||
kPP_VEXMask = 0x03u << kPP_Shift, // PP field mask used by VEX/EVEX.
|
||||
kPP_FPUMask = 0x07u << kPP_Shift, // Mask used by EMIT_PP, also includes '0x9B'.
|
||||
kPP_00 = 0x00u << kPP_Shift,
|
||||
kPP_66 = 0x01u << kPP_Shift,
|
||||
kPP_F3 = 0x02u << kPP_Shift,
|
||||
kPP_F2 = 0x03u << kPP_Shift,
|
||||
|
||||
kPP_9B = 0x07u << kPP_Shift, // AsmJit specific to emit FPU's '9B' byte.
|
||||
|
||||
// REX|VEX|EVEX B|X|R|W Bits
|
||||
// -------------------------
|
||||
//
|
||||
// NOTE: REX.[B|X|R] are never stored within the opcode itself, they are reserved by AsmJit are are added
|
||||
// dynamically to the opcode to represent [REX|VEX|EVEX].[B|X|R] bits. REX.W can be stored in DB as it's sometimes
|
||||
// part of the opcode itself.
|
||||
|
||||
// These must be binary compatible with instruction options.
|
||||
kREX_Shift = 24,
|
||||
kREX_Mask = 0x0Fu << kREX_Shift,
|
||||
kB = 0x01u << kREX_Shift, // Never stored in DB, used by encoder.
|
||||
kX = 0x02u << kREX_Shift, // Never stored in DB, used by encoder.
|
||||
kR = 0x04u << kREX_Shift, // Never stored in DB, used by encoder.
|
||||
kW = 0x08u << kREX_Shift,
|
||||
kW_Shift = kREX_Shift + 3,
|
||||
|
||||
kW__ = 0u << kW_Shift, // REX.W/VEX.W is unspecified.
|
||||
kW_x = 0u << kW_Shift, // REX.W/VEX.W is based on instruction operands.
|
||||
kW_I = 0u << kW_Shift, // REX.W/VEX.W is ignored (WIG).
|
||||
kW_0 = 0u << kW_Shift, // REX.W/VEX.W is 0 (W0).
|
||||
kW_1 = 1u << kW_Shift, // REX.W/VEX.W is 1 (W1).
|
||||
|
||||
// EVEX.W Field
|
||||
// ------------
|
||||
//
|
||||
// `W` field used by EVEX instruction encoding.
|
||||
|
||||
kEvex_W_Shift = 28,
|
||||
kEvex_W_Mask = 1u << kEvex_W_Shift,
|
||||
|
||||
kEvex_W__ = 0u << kEvex_W_Shift, // EVEX.W is unspecified (not EVEX instruction).
|
||||
kEvex_W_x = 0u << kEvex_W_Shift, // EVEX.W is based on instruction operands.
|
||||
kEvex_W_I = 0u << kEvex_W_Shift, // EVEX.W is ignored (WIG).
|
||||
kEvex_W_0 = 0u << kEvex_W_Shift, // EVEX.W is 0 (W0).
|
||||
kEvex_W_1 = 1u << kEvex_W_Shift, // EVEX.W is 1 (W1).
|
||||
|
||||
// `L` or `LL` field in AVX/XOP/AVX-512
|
||||
// ------------------------------------
|
||||
//
|
||||
// VEX/XOP prefix can only use the first bit `L.128` or `L.256`. EVEX prefix prefix makes it possible to use also
|
||||
// `L.512`. If the instruction set manual describes an instruction by `LIG` it means that the `L` field is ignored
|
||||
// and AsmJit defaults to `0` in such case.
|
||||
kLL_Shift = 29,
|
||||
kLL_Mask = 0x3u << kLL_Shift,
|
||||
|
||||
kLL__ = 0x0u << kLL_Shift, // LL is unspecified.
|
||||
kLL_x = 0x0u << kLL_Shift, // LL is based on instruction operands.
|
||||
kLL_I = 0x0u << kLL_Shift, // LL is ignored (LIG).
|
||||
kLL_0 = 0x0u << kLL_Shift, // LL is 0 (L.128).
|
||||
kLL_1 = 0x1u << kLL_Shift, // LL is 1 (L.256).
|
||||
kLL_2 = 0x2u << kLL_Shift, // LL is 2 (L.512).
|
||||
|
||||
// Opcode Combinations
|
||||
// -------------------
|
||||
|
||||
k0 = 0, // '__' (no prefix, used internally).
|
||||
k000000 = kPP_00 | kMM_00, // '__' (no prefix, to be the same width as others).
|
||||
k000F00 = kPP_00 | kMM_0F, // '0F'
|
||||
k000F01 = kPP_00 | kMM_0F01, // '0F01'
|
||||
k000F0F = kPP_00 | kMM_0F, // '0F0F' - 3DNOW, equal to 0x0F, must have special encoding to take effect.
|
||||
k000F38 = kPP_00 | kMM_0F38, // 'NP.0F38'
|
||||
k000F3A = kPP_00 | kMM_0F3A, // 'NP.0F3A'
|
||||
k00MAP5 = kPP_00 | kMM_MAP5, // 'NP.MAP5'
|
||||
k00MAP6 = kPP_00 | kMM_MAP6, // 'NP.MAP5'
|
||||
k660000 = kPP_66 | kMM_00, // '66'
|
||||
k660F00 = kPP_66 | kMM_0F, // '66.0F'
|
||||
k660F01 = kPP_66 | kMM_0F01, // '66.0F01'
|
||||
k660F38 = kPP_66 | kMM_0F38, // '66.0F38'
|
||||
k660F3A = kPP_66 | kMM_0F3A, // '66.0F3A'
|
||||
k66MAP5 = kPP_66 | kMM_MAP5, // '66.MAP5'
|
||||
k66MAP6 = kPP_66 | kMM_MAP6, // '66.MAP5'
|
||||
kF20000 = kPP_F2 | kMM_00, // 'F2'
|
||||
kF20F00 = kPP_F2 | kMM_0F, // 'F2.0F'
|
||||
kF20F01 = kPP_F2 | kMM_0F01, // 'F2.0F01'
|
||||
kF20F38 = kPP_F2 | kMM_0F38, // 'F2.0F38'
|
||||
kF20F3A = kPP_F2 | kMM_0F3A, // 'F2.0F3A'
|
||||
kF2MAP5 = kPP_F2 | kMM_MAP5, // 'F2.MAP5'
|
||||
kF2MAP6 = kPP_F2 | kMM_MAP6, // 'F2.MAP5'
|
||||
kF30000 = kPP_F3 | kMM_00, // 'F3'
|
||||
kF30F00 = kPP_F3 | kMM_0F, // 'F3.0F'
|
||||
kF30F01 = kPP_F3 | kMM_0F01, // 'F3.0F01'
|
||||
kF30F38 = kPP_F3 | kMM_0F38, // 'F3.0F38'
|
||||
kF30F3A = kPP_F3 | kMM_0F3A, // 'F3.0F3A'
|
||||
kF3MAP5 = kPP_F3 | kMM_MAP5, // 'F3.MAP5'
|
||||
kF3MAP6 = kPP_F3 | kMM_MAP6, // 'F3.MAP5'
|
||||
kFPU_00 = kPP_00 | kMM_00, // '__' (FPU)
|
||||
kFPU_9B = kPP_9B | kMM_00, // '9B' (FPU)
|
||||
kXOP_M8 = kPP_00 | kMM_XOP08, // 'M8' (XOP)
|
||||
kXOP_M9 = kPP_00 | kMM_XOP09, // 'M9' (XOP)
|
||||
kXOP_MA = kPP_00 | kMM_XOP0A // 'MA' (XOP)
|
||||
};
|
||||
|
||||
// Opcode Builder
|
||||
// --------------
|
||||
|
||||
inline uint32_t get() const noexcept { return v; }
|
||||
|
||||
inline bool hasW() const noexcept { return (v & kW) != 0; }
|
||||
inline bool has66h() const noexcept { return (v & kPP_66) != 0; }
|
||||
|
||||
inline Opcode& add(uint32_t x) noexcept { return operator+=(x); }
|
||||
|
||||
inline Opcode& add66h() noexcept { return operator|=(kPP_66); }
|
||||
template<typename T>
|
||||
inline Opcode& add66hIf(T exp) noexcept { return operator|=(uint32_t(exp) << kPP_Shift); }
|
||||
template<typename T>
|
||||
inline Opcode& add66hBySize(T size) noexcept { return add66hIf(size == 2); }
|
||||
|
||||
inline Opcode& addW() noexcept { return operator|=(kW); }
|
||||
template<typename T>
|
||||
inline Opcode& addWIf(T exp) noexcept { return operator|=(uint32_t(exp) << kW_Shift); }
|
||||
template<typename T>
|
||||
inline Opcode& addWBySize(T size) noexcept { return addWIf(size == 8); }
|
||||
|
||||
template<typename T>
|
||||
inline Opcode& addPrefixBySize(T size) noexcept {
|
||||
static const uint32_t mask[16] = {
|
||||
0, // #0
|
||||
0, // #1 -> nothing (already handled or not possible)
|
||||
kPP_66, // #2 -> 66H
|
||||
0, // #3
|
||||
0, // #4 -> nothing
|
||||
0, // #5
|
||||
0, // #6
|
||||
0, // #7
|
||||
kW // #8 -> REX.W
|
||||
};
|
||||
return operator|=(mask[size & 0xF]);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline Opcode& addArithBySize(T size) noexcept {
|
||||
static const uint32_t mask[16] = {
|
||||
0, // #0
|
||||
0, // #1 -> nothing
|
||||
1 | kPP_66, // #2 -> NOT_BYTE_OP(1) and 66H
|
||||
0, // #3
|
||||
1, // #4 -> NOT_BYTE_OP(1)
|
||||
0, // #5
|
||||
0, // #6
|
||||
0, // #7
|
||||
1 | kW // #8 -> NOT_BYTE_OP(1) and REX.W
|
||||
};
|
||||
return operator|=(mask[size & 0xF]);
|
||||
}
|
||||
|
||||
inline Opcode& forceEvex() noexcept { return operator|=(kMM_ForceEvex); }
|
||||
template<typename T>
|
||||
inline Opcode& forceEvexIf(T exp) noexcept { return operator|=(uint32_t(exp) << Support::ConstCTZ<uint32_t(kMM_ForceEvex)>::value); }
|
||||
|
||||
//! Extract `O` field (R) from the opcode (specified as /0..7 in instruction manuals).
|
||||
inline uint32_t extractModO() const noexcept {
|
||||
return (v >> kModO_Shift) & 0x07;
|
||||
}
|
||||
|
||||
//! Extract `RM` field (RM) from the opcode (usually specified as another opcode value).
|
||||
inline uint32_t extractModRM() const noexcept {
|
||||
return (v >> kModRM_Shift) & 0x07;
|
||||
}
|
||||
|
||||
//! Extract `REX` prefix from opcode combined with `options`.
|
||||
inline uint32_t extractRex(InstOptions options) const noexcept {
|
||||
// kREX was designed in a way that when shifted there will be no bytes set except REX.[B|X|R|W].
|
||||
// The returned value forms a real REX prefix byte. This case should be unit-tested as well.
|
||||
return (v | uint32_t(options)) >> kREX_Shift;
|
||||
}
|
||||
|
||||
inline uint32_t extractLLMMMMM(InstOptions options) const noexcept {
|
||||
uint32_t llMmmmm = uint32_t(v & (kLL_Mask | kMM_Mask));
|
||||
uint32_t vexEvex = uint32_t(options & InstOptions::kX86_Evex);
|
||||
return (llMmmmm | vexEvex) >> kMM_Shift;
|
||||
}
|
||||
|
||||
inline Opcode& operator=(uint32_t x) noexcept { v = x; return *this; }
|
||||
inline Opcode& operator+=(uint32_t x) noexcept { v += x; return *this; }
|
||||
inline Opcode& operator-=(uint32_t x) noexcept { v -= x; return *this; }
|
||||
inline Opcode& operator&=(uint32_t x) noexcept { v &= x; return *this; }
|
||||
inline Opcode& operator|=(uint32_t x) noexcept { v |= x; return *this; }
|
||||
inline Opcode& operator^=(uint32_t x) noexcept { v ^= x; return *this; }
|
||||
|
||||
inline uint32_t operator&(uint32_t x) const noexcept { return v & x; }
|
||||
inline uint32_t operator|(uint32_t x) const noexcept { return v | x; }
|
||||
inline uint32_t operator^(uint32_t x) const noexcept { return v ^ x; }
|
||||
inline uint32_t operator<<(uint32_t x) const noexcept { return v << x; }
|
||||
inline uint32_t operator>>(uint32_t x) const noexcept { return v >> x; }
|
||||
};
|
||||
|
||||
//! \}
|
||||
//! \endcond
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // ASMJIT_X86_X86OPCODE_P_H_INCLUDED
|
||||
231
lib/lepton/asmjit/x86/x86operand.cpp
Normal file
231
lib/lepton/asmjit/x86/x86operand.cpp
Normal file
@ -0,0 +1,231 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#include "../core/api-build_p.h"
|
||||
#if !defined(ASMJIT_NO_X86)
|
||||
|
||||
#include "../core/misc_p.h"
|
||||
#include "../x86/x86operand.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
// x86::Operand - Tests
|
||||
// ====================
|
||||
|
||||
#if defined(ASMJIT_TEST)
|
||||
UNIT(x86_operand) {
|
||||
Label L(1000); // Label with some ID.
|
||||
|
||||
INFO("Checking basic properties of built-in X86 registers");
|
||||
EXPECT(gpb(Gp::kIdAx) == al);
|
||||
EXPECT(gpb(Gp::kIdBx) == bl);
|
||||
EXPECT(gpb(Gp::kIdCx) == cl);
|
||||
EXPECT(gpb(Gp::kIdDx) == dl);
|
||||
|
||||
EXPECT(gpb_lo(Gp::kIdAx) == al);
|
||||
EXPECT(gpb_lo(Gp::kIdBx) == bl);
|
||||
EXPECT(gpb_lo(Gp::kIdCx) == cl);
|
||||
EXPECT(gpb_lo(Gp::kIdDx) == dl);
|
||||
|
||||
EXPECT(gpb_hi(Gp::kIdAx) == ah);
|
||||
EXPECT(gpb_hi(Gp::kIdBx) == bh);
|
||||
EXPECT(gpb_hi(Gp::kIdCx) == ch);
|
||||
EXPECT(gpb_hi(Gp::kIdDx) == dh);
|
||||
|
||||
EXPECT(gpw(Gp::kIdAx) == ax);
|
||||
EXPECT(gpw(Gp::kIdBx) == bx);
|
||||
EXPECT(gpw(Gp::kIdCx) == cx);
|
||||
EXPECT(gpw(Gp::kIdDx) == dx);
|
||||
|
||||
EXPECT(gpd(Gp::kIdAx) == eax);
|
||||
EXPECT(gpd(Gp::kIdBx) == ebx);
|
||||
EXPECT(gpd(Gp::kIdCx) == ecx);
|
||||
EXPECT(gpd(Gp::kIdDx) == edx);
|
||||
|
||||
EXPECT(gpq(Gp::kIdAx) == rax);
|
||||
EXPECT(gpq(Gp::kIdBx) == rbx);
|
||||
EXPECT(gpq(Gp::kIdCx) == rcx);
|
||||
EXPECT(gpq(Gp::kIdDx) == rdx);
|
||||
|
||||
EXPECT(gpb(Gp::kIdAx) != dl);
|
||||
EXPECT(gpw(Gp::kIdBx) != cx);
|
||||
EXPECT(gpd(Gp::kIdCx) != ebx);
|
||||
EXPECT(gpq(Gp::kIdDx) != rax);
|
||||
|
||||
INFO("Checking if x86::reg(...) matches built-in IDs");
|
||||
EXPECT(gpb(5) == bpl);
|
||||
EXPECT(gpw(5) == bp);
|
||||
EXPECT(gpd(5) == ebp);
|
||||
EXPECT(gpq(5) == rbp);
|
||||
EXPECT(st(5) == st5);
|
||||
EXPECT(mm(5) == mm5);
|
||||
EXPECT(k(5) == k5);
|
||||
EXPECT(cr(5) == cr5);
|
||||
EXPECT(dr(5) == dr5);
|
||||
EXPECT(xmm(5) == xmm5);
|
||||
EXPECT(ymm(5) == ymm5);
|
||||
EXPECT(zmm(5) == zmm5);
|
||||
|
||||
INFO("Checking x86::Gp register properties");
|
||||
EXPECT(Gp().isReg() == true);
|
||||
EXPECT(eax.isReg() == true);
|
||||
EXPECT(eax.id() == 0);
|
||||
EXPECT(eax.size() == 4);
|
||||
EXPECT(eax.type() == RegType::kX86_Gpd);
|
||||
EXPECT(eax.group() == RegGroup::kGp);
|
||||
|
||||
INFO("Checking x86::Xmm register properties");
|
||||
EXPECT(Xmm().isReg() == true);
|
||||
EXPECT(xmm4.isReg() == true);
|
||||
EXPECT(xmm4.id() == 4);
|
||||
EXPECT(xmm4.size() == 16);
|
||||
EXPECT(xmm4.type() == RegType::kX86_Xmm);
|
||||
EXPECT(xmm4.group() == RegGroup::kVec);
|
||||
EXPECT(xmm4.isVec());
|
||||
|
||||
INFO("Checking x86::Ymm register properties");
|
||||
EXPECT(Ymm().isReg() == true);
|
||||
EXPECT(ymm5.isReg() == true);
|
||||
EXPECT(ymm5.id() == 5);
|
||||
EXPECT(ymm5.size() == 32);
|
||||
EXPECT(ymm5.type() == RegType::kX86_Ymm);
|
||||
EXPECT(ymm5.group() == RegGroup::kVec);
|
||||
EXPECT(ymm5.isVec());
|
||||
|
||||
INFO("Checking x86::Zmm register properties");
|
||||
EXPECT(Zmm().isReg() == true);
|
||||
EXPECT(zmm6.isReg() == true);
|
||||
EXPECT(zmm6.id() == 6);
|
||||
EXPECT(zmm6.size() == 64);
|
||||
EXPECT(zmm6.type() == RegType::kX86_Zmm);
|
||||
EXPECT(zmm6.group() == RegGroup::kVec);
|
||||
EXPECT(zmm6.isVec());
|
||||
|
||||
INFO("Checking x86::Vec register properties");
|
||||
EXPECT(Vec().isReg() == true);
|
||||
// Converts a VEC register to a type of the passed register, but keeps the ID.
|
||||
EXPECT(xmm4.cloneAs(ymm10) == ymm4);
|
||||
EXPECT(xmm4.cloneAs(zmm11) == zmm4);
|
||||
EXPECT(ymm5.cloneAs(xmm12) == xmm5);
|
||||
EXPECT(ymm5.cloneAs(zmm13) == zmm5);
|
||||
EXPECT(zmm6.cloneAs(xmm14) == xmm6);
|
||||
EXPECT(zmm6.cloneAs(ymm15) == ymm6);
|
||||
|
||||
EXPECT(xmm7.xmm() == xmm7);
|
||||
EXPECT(xmm7.ymm() == ymm7);
|
||||
EXPECT(xmm7.zmm() == zmm7);
|
||||
|
||||
EXPECT(ymm7.xmm() == xmm7);
|
||||
EXPECT(ymm7.ymm() == ymm7);
|
||||
EXPECT(ymm7.zmm() == zmm7);
|
||||
|
||||
EXPECT(zmm7.xmm() == xmm7);
|
||||
EXPECT(zmm7.ymm() == ymm7);
|
||||
EXPECT(zmm7.zmm() == zmm7);
|
||||
|
||||
INFO("Checking x86::Mm register properties");
|
||||
EXPECT(Mm().isReg() == true);
|
||||
EXPECT(mm2.isReg() == true);
|
||||
EXPECT(mm2.id() == 2);
|
||||
EXPECT(mm2.size() == 8);
|
||||
EXPECT(mm2.type() == RegType::kX86_Mm);
|
||||
EXPECT(mm2.group() == RegGroup::kX86_MM);
|
||||
|
||||
INFO("Checking x86::KReg register properties");
|
||||
EXPECT(KReg().isReg() == true);
|
||||
EXPECT(k3.isReg() == true);
|
||||
EXPECT(k3.id() == 3);
|
||||
EXPECT(k3.size() == 0);
|
||||
EXPECT(k3.type() == RegType::kX86_KReg);
|
||||
EXPECT(k3.group() == RegGroup::kX86_K);
|
||||
|
||||
INFO("Checking x86::St register properties");
|
||||
EXPECT(St().isReg() == true);
|
||||
EXPECT(st1.isReg() == true);
|
||||
EXPECT(st1.id() == 1);
|
||||
EXPECT(st1.size() == 10);
|
||||
EXPECT(st1.type() == RegType::kX86_St);
|
||||
EXPECT(st1.group() == RegGroup::kX86_St);
|
||||
|
||||
INFO("Checking if default constructed regs behave as expected");
|
||||
EXPECT(Reg().isValid() == false);
|
||||
EXPECT(Gp().isValid() == false);
|
||||
EXPECT(Xmm().isValid() == false);
|
||||
EXPECT(Ymm().isValid() == false);
|
||||
EXPECT(Zmm().isValid() == false);
|
||||
EXPECT(Mm().isValid() == false);
|
||||
EXPECT(KReg().isValid() == false);
|
||||
EXPECT(SReg().isValid() == false);
|
||||
EXPECT(CReg().isValid() == false);
|
||||
EXPECT(DReg().isValid() == false);
|
||||
EXPECT(St().isValid() == false);
|
||||
EXPECT(Bnd().isValid() == false);
|
||||
|
||||
INFO("Checking x86::Mem operand");
|
||||
Mem m;
|
||||
EXPECT(m == Mem(), "Two default constructed x86::Mem operands must be equal");
|
||||
|
||||
m = ptr(L);
|
||||
EXPECT(m.hasBase() == true);
|
||||
EXPECT(m.hasBaseReg() == false);
|
||||
EXPECT(m.hasBaseLabel() == true);
|
||||
EXPECT(m.hasOffset() == false);
|
||||
EXPECT(m.isOffset64Bit() == false);
|
||||
EXPECT(m.offset() == 0);
|
||||
EXPECT(m.offsetLo32() == 0);
|
||||
|
||||
m = ptr(0x0123456789ABCDEFu);
|
||||
EXPECT(m.hasBase() == false);
|
||||
EXPECT(m.hasBaseReg() == false);
|
||||
EXPECT(m.hasIndex() == false);
|
||||
EXPECT(m.hasIndexReg() == false);
|
||||
EXPECT(m.hasOffset() == true);
|
||||
EXPECT(m.isOffset64Bit() == true);
|
||||
EXPECT(m.offset() == int64_t(0x0123456789ABCDEFu));
|
||||
EXPECT(m.offsetLo32() == int32_t(0x89ABCDEFu));
|
||||
m.addOffset(1);
|
||||
EXPECT(m.offset() == int64_t(0x0123456789ABCDF0u));
|
||||
|
||||
m = ptr(0x0123456789ABCDEFu, rdi, 3);
|
||||
EXPECT(m.hasSegment() == false);
|
||||
EXPECT(m.hasBase() == false);
|
||||
EXPECT(m.hasBaseReg() == false);
|
||||
EXPECT(m.hasIndex() == true);
|
||||
EXPECT(m.hasIndexReg() == true);
|
||||
EXPECT(m.indexType() == rdi.type());
|
||||
EXPECT(m.indexId() == rdi.id());
|
||||
EXPECT(m.shift() == 3);
|
||||
EXPECT(m.hasOffset() == true);
|
||||
EXPECT(m.isOffset64Bit() == true);
|
||||
EXPECT(m.offset() == int64_t(0x0123456789ABCDEFu));
|
||||
EXPECT(m.offsetLo32() == int32_t(0x89ABCDEFu));
|
||||
m.resetIndex();
|
||||
EXPECT(m.hasIndex() == false);
|
||||
EXPECT(m.hasIndexReg() == false);
|
||||
|
||||
m = ptr(rax);
|
||||
EXPECT(m.hasBase() == true);
|
||||
EXPECT(m.hasBaseReg() == true);
|
||||
EXPECT(m.baseType() == rax.type());
|
||||
EXPECT(m.baseId() == rax.id());
|
||||
EXPECT(m.hasIndex() == false);
|
||||
EXPECT(m.hasIndexReg() == false);
|
||||
EXPECT(m.indexType() == RegType::kNone);
|
||||
EXPECT(m.indexId() == 0);
|
||||
EXPECT(m.hasOffset() == false);
|
||||
EXPECT(m.isOffset64Bit() == false);
|
||||
EXPECT(m.offset() == 0);
|
||||
EXPECT(m.offsetLo32() == 0);
|
||||
m.setIndex(rsi);
|
||||
EXPECT(m.hasIndex() == true);
|
||||
EXPECT(m.hasIndexReg() == true);
|
||||
EXPECT(m.indexType() == rsi.type());
|
||||
EXPECT(m.indexId() == rsi.id());
|
||||
}
|
||||
#endif
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // !ASMJIT_NO_X86
|
||||
1085
lib/lepton/asmjit/x86/x86operand.h
Normal file
1085
lib/lepton/asmjit/x86/x86operand.h
Normal file
File diff suppressed because it is too large
Load Diff
1509
lib/lepton/asmjit/x86/x86rapass.cpp
Normal file
1509
lib/lepton/asmjit/x86/x86rapass.cpp
Normal file
File diff suppressed because it is too large
Load Diff
94
lib/lepton/asmjit/x86/x86rapass_p.h
Normal file
94
lib/lepton/asmjit/x86/x86rapass_p.h
Normal file
@ -0,0 +1,94 @@
|
||||
// This file is part of AsmJit project <https://asmjit.com>
|
||||
//
|
||||
// See asmjit.h or LICENSE.md for license and copyright information
|
||||
// SPDX-License-Identifier: Zlib
|
||||
|
||||
#ifndef ASMJIT_X86_X86RAPASS_P_H_INCLUDED
|
||||
#define ASMJIT_X86_X86RAPASS_P_H_INCLUDED
|
||||
|
||||
#include "../core/api-config.h"
|
||||
#ifndef ASMJIT_NO_COMPILER
|
||||
|
||||
#include "../core/compiler.h"
|
||||
#include "../core/rabuilders_p.h"
|
||||
#include "../core/rapass_p.h"
|
||||
#include "../x86/x86assembler.h"
|
||||
#include "../x86/x86compiler.h"
|
||||
#include "../x86/x86emithelper_p.h"
|
||||
|
||||
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
||||
|
||||
//! \cond INTERNAL
|
||||
//! \addtogroup asmjit_x86
|
||||
//! \{
|
||||
|
||||
//! X86 register allocation pass.
|
||||
//!
|
||||
//! Takes care of generating function prologs and epilogs, and also performs register allocation.
|
||||
class X86RAPass : public BaseRAPass {
|
||||
public:
|
||||
ASMJIT_NONCOPYABLE(X86RAPass)
|
||||
typedef BaseRAPass Base;
|
||||
|
||||
EmitHelper _emitHelper;
|
||||
|
||||
//! \name Construction & Destruction
|
||||
//! \{
|
||||
|
||||
X86RAPass() noexcept;
|
||||
virtual ~X86RAPass() noexcept;
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Accessors
|
||||
//! \{
|
||||
|
||||
//! Returns the compiler casted to `x86::Compiler`.
|
||||
inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cb); }
|
||||
|
||||
//! Returns emit helper.
|
||||
inline EmitHelper* emitHelper() noexcept { return &_emitHelper; }
|
||||
|
||||
inline bool avxEnabled() const noexcept { return _emitHelper._avxEnabled; }
|
||||
inline bool avx512Enabled() const noexcept { return _emitHelper._avx512Enabled; }
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Utilities
|
||||
//! \{
|
||||
|
||||
inline uint32_t choose(uint32_t sseInstId, uint32_t avxInstId) noexcept {
|
||||
return avxEnabled() ? avxInstId : sseInstId;
|
||||
}
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Interface
|
||||
//! \{
|
||||
|
||||
void onInit() noexcept override;
|
||||
void onDone() noexcept override;
|
||||
|
||||
Error buildCFG() noexcept override;
|
||||
|
||||
Error _rewrite(BaseNode* first, BaseNode* stop) noexcept override;
|
||||
|
||||
Error emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept override;
|
||||
Error emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept override;
|
||||
|
||||
Error emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept override;
|
||||
Error emitSave(uint32_t workId, uint32_t srcPhysId) noexcept override;
|
||||
|
||||
Error emitJump(const Label& label) noexcept override;
|
||||
Error emitPreCall(InvokeNode* invokeNode) noexcept override;
|
||||
|
||||
//! \}
|
||||
};
|
||||
|
||||
//! \}
|
||||
//! \endcond
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
||||
#endif // !ASMJIT_NO_COMPILER
|
||||
#endif // ASMJIT_X86_X86RAPASS_P_H_INCLUDED
|
||||
Reference in New Issue
Block a user