Send patches - preferably formatted by git format-patch - to patches at archlinux32 dot org.
summaryrefslogtreecommitdiff
path: root/community/luajit
diff options
context:
space:
mode:
authorErich Eckner <git@eckner.net>2023-05-22 20:18:50 +0200
committerErich Eckner <git@eckner.net>2023-05-22 20:18:50 +0200
commite74cde76c104df82b120a7b13964cb786da8f565 (patch)
tree39967e24037929c04aaf79922aa260b437a60a5e /community/luajit
parent73e1d3b448cc583ab38cae4d61a26f313fad946b (diff)
community -> extra
Diffstat (limited to 'community/luajit')
-rw-r--r--community/luajit/PKGBUILD69
-rw-r--r--community/luajit/c7815e1a1b49871e645252bb12e722fb4879df11.patch1668
-rw-r--r--community/luajit/luajit-2.0-505e2c0-i486.patch2366
3 files changed, 0 insertions, 4103 deletions
diff --git a/community/luajit/PKGBUILD b/community/luajit/PKGBUILD
deleted file mode 100644
index a0d9c9af..00000000
--- a/community/luajit/PKGBUILD
+++ /dev/null
@@ -1,69 +0,0 @@
-# Maintainer: Daurnimator <daurnimator@archlinux.org>
-# Maintainer: Lukas Fleischer <lfleischer@archlinux.org>
-# Contributor: Bartłomiej Piotrowski <bpiotrowski@archlinux.org>
-# Contributor: Chris Brannon <chris@the-brannons.com>
-# Contributor: Paulo Matias <matiasΘarchlinux-br·org>
-# Contributor: Anders Bergh <anders1@gmail.com>
-
-pkgname=luajit
-# LuaJIT has abandoned versioned releases and now advises using git HEAD
-# https://github.com/LuaJIT/LuaJIT/issues/665#issuecomment-784452583
-_commit=505e2c03de35e2718eef0d2d3660712e06dadf1f
-pkgver="2.1.0.beta3.r471.g${_commit::8}"
-pkgrel=1
-pkgdesc='Just-in-time compiler and drop-in replacement for Lua 5.1'
-arch=(i486 i686 pentium4 'x86_64')
-url='https://luajit.org/'
-license=('MIT')
-depends=('gcc-libs')
-source=("LuaJIT-${_commit}.tar.gz::https://repo.or.cz/luajit-2.0.git/snapshot/${_commit}.tar.gz")
-md5sums=('0847dc535736846a9a1436e18d8c509d')
-sha256sums=('b89d081aac4189a06b736c667f47cc60e0cc4591933b7ed50db38cf58496386e')
-b2sums=('89bed923ff34d2de813dee17f130496ffeaa6bc5caf9252be1df7d35e87fa7398930f1fe35f95650694d344bc99d5b2c0c4abc4568f1dac318822a832d44c3a4')
-
-build() {
- cd "luajit-2.0-${_commit::7}"
- # Avoid early stripping
- make amalg PREFIX=/usr BUILDMODE=dynamic TARGET_STRIP=" @:"
-}
-
-package() {
- cd "luajit-2.0-${_commit::7}"
-
- make install DESTDIR="$pkgdir" PREFIX=/usr
- install -Dm644 COPYRIGHT "$pkgdir/usr/share/licenses/$pkgname/COPYRIGHT"
-
- ln -sf luajit-2.1.0-beta3 "$pkgdir/usr/bin/luajit"
-}
-# Re-enable x87 support for i686 CPUs (fix from KitsuWhooa)
-if [ "$CARCH" = 'i486' ]; then
- source+=('luajit-2.0-505e2c0-i486.patch')
- md5sums+=('44317c2d006d45b0970cee8b55a4c05e')
- sha256sums+=('6a758da52d9ddd0162ba342276c4aa4454662b2fe8b89c8a7aa987677679fd30')
- b2sums+=('4a467db526fa550942dee7da7dd599f5976f519573773afab74c372bbb2aa243d60384699c50695dadf0be086fc5b54253692d0836c22da4b079a73b0eb7a822')
- eval "$(
- {
- declare -f prepare \
- || printf 'prepare ()\n{\ncd "luajit-2.0-${_commit::7}"\n}\n'
- } \
- | sed '
- $ i patch -p1 -i "$srcdir/luajit-2.0-505e2c0-i486.patch"
- '
- )"
-
-fi
-if [ "$CARCH" = 'i686' ]; then
- source+=('c7815e1a1b49871e645252bb12e722fb4879df11.patch')
- md5sums+=('25a3483026a359e06ec828bc666dc853')
- sha256sums+=('a711e1d7ad7a16d0e6ba044fedc284cc0c4bee710c2d910fd9f0f0af8765c1a7')
- b2sums+=('2d79b2dad25ba3a771348cfd38883334f511de703d2ccfdd00b808867ecf53201d680388c730aaf8941cb5159f6b819020c2da04b75346bc42428973c7f27420')
- eval "$(
- {
- declare -f prepare \
- || printf 'prepare ()\n{\ncd "luajit-2.0-${_commit::7}"\n}\n'
- } \
- | sed '
- $ i patch -p1 -i "$srcdir/c7815e1a1b49871e645252bb12e722fb4879df11.patch"
- '
- )"
-fi
diff --git a/community/luajit/c7815e1a1b49871e645252bb12e722fb4879df11.patch b/community/luajit/c7815e1a1b49871e645252bb12e722fb4879df11.patch
deleted file mode 100644
index 13048730..00000000
--- a/community/luajit/c7815e1a1b49871e645252bb12e722fb4879df11.patch
+++ /dev/null
@@ -1,1668 +0,0 @@
-From c7815e1a1b49871e645252bb12e722fb4879df11 Mon Sep 17 00:00:00 2001
-From: Tasos Sahanidis <tasos@tasossah.com>
-Date: Mon, 30 Jan 2023 22:57:23 +0200
-Subject: [PATCH] Revert "x86: Remove x87 support from interpreter."
-
-This reverts commit 57768cd5882eb8d39c673d9dd8598946ef7c1843.
-JIT is disabled by default and untested
----
- src/Makefile | 13 +-
- src/lib_jit.c | 44 ++-
- src/lj_asm.c | 16 +
- src/lj_jit.h | 18 +-
- src/lj_vm.h | 3 +-
- src/msvcbuild.bat | 1 -
- src/vm_x86.dasc | 798 +++++++++++++++++++++++++++++++++++++++++-----
- 7 files changed, 793 insertions(+), 100 deletions(-)
-
-diff --git a/src/Makefile b/src/Makefile
-index 30d64be2ab..f226cc2dba 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -44,10 +44,13 @@ CCOPT= -O2 -fomit-frame-pointer
- #
- # Target-specific compiler options:
- #
-+# x86 only: it's recommended to compile at least for i686. Better yet,
-+# compile for an architecture that has SSE2, too (-msse -msse2).
-+#
- # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
- # the binaries to a different machine you could also use: -march=native
- #
--CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
-+CCOPT_x86= -march=i686 -msse -mfpmath=sse
- CCOPT_x64=
- CCOPT_arm=
- CCOPT_arm64=
-@@ -102,7 +105,7 @@ XCFLAGS=
- #XCFLAGS+= -DLUAJIT_ENABLE_LUA52COMPAT
- #
- # Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter.
--#XCFLAGS+= -DLUAJIT_DISABLE_JIT
-+XCFLAGS+= -DLUAJIT_DISABLE_JIT
- #
- # Some architectures (e.g. PPC) can use either single-number (1) or
- # dual-number (2) mode. Uncomment one of these lines to override the
-@@ -437,6 +440,11 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs
- ifeq (Windows,$(TARGET_SYS))
- DASM_AFLAGS+= -D WIN
- endif
-+ifeq (x86,$(TARGET_LJARCH))
-+ ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D SSE
-+ endif
-+else
- ifeq (x64,$(TARGET_LJARCH))
- ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH)))
- DASM_ARCH= x86
-@@ -466,6 +474,7 @@ ifeq (ppc,$(TARGET_LJARCH))
- endif
- endif
- endif
-+endif
-
- DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
- DASM_DASC= vm_$(DASM_ARCH).dasc
-diff --git a/src/lib_jit.c b/src/lib_jit.c
-index 2867d4206a..2edecfcc25 100644
---- a/src/lib_jit.c
-+++ b/src/lib_jit.c
-@@ -649,7 +649,7 @@ JIT_PARAMDEF(JIT_PARAMINIT)
- #endif
-
- /* Arch-dependent CPU feature detection. */
--static uint32_t jit_cpudetect(void)
-+static uint32_t jit_cpudetect(lua_State *L)
- {
- uint32_t flags = 0;
- #if LJ_TARGET_X86ORX64
-@@ -657,16 +657,45 @@ static uint32_t jit_cpudetect(void)
- uint32_t vendor[4];
- uint32_t features[4];
- if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
-+#if !LJ_HASJIT
-+#define JIT_F_CMOV 1
-+#define JIT_F_SSE2 2
-+#endif
-+ flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
-+ flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
-+#if LJ_HASJIT
- flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
- flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
-+ if (vendor[2] == 0x6c65746e) { /* Intel. */
-+ if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */
-+ flags |= JIT_F_P4; /* Currently unused. */
-+ else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
-+ flags |= JIT_F_LEA_AGU;
-+ } else if (vendor[2] == 0x444d4163) { /* AMD. */
-+ uint32_t fam = (features[0] & 0x0ff00f00);
-+ if (fam == 0x00000f00) /* K8. */
-+ flags |= JIT_F_SPLIT_XMM;
-+ if (fam >= 0x00000f00) /* K8, K10. */
-+ flags |= JIT_F_PREFER_IMUL;
-+ }
- if (vendor[0] >= 7) {
- uint32_t xfeatures[4];
- lj_vm_cpuid(7, xfeatures);
- flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
- }
-+#endif
- }
-- /* Don't bother checking for SSE2 -- the VM will crash before getting here. */
--
-+ /* Check for required instruction set support on x86 (unnecessary on x64). */
-+#if LJ_TARGET_X86
-+#if !defined(LUAJIT_CPU_NOCMOV)
-+ if (!(flags & JIT_F_CMOV))
-+ luaL_error(L, "CPU not supported");
-+#endif
-+#if defined(LUAJIT_CPU_SSE2)
-+ if (!(flags & JIT_F_SSE2))
-+ luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)");
-+#endif
-+#endif
- #elif LJ_TARGET_ARM
-
- int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */
-@@ -729,7 +758,12 @@ static uint32_t jit_cpudetect(void)
- static void jit_init(lua_State *L)
- {
- jit_State *J = L2J(L);
-- J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT;
-+ uint32_t flags = jit_cpudetect(L);
-+#if LJ_TARGET_X86
-+ /* Silently turn off the JIT compiler on CPUs without SSE2. */
-+ if ((flags & JIT_F_SSE2))
-+#endif
-+ J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
- memcpy(J->param, jit_param_default, sizeof(J->param));
- lj_dispatch_update(G(L));
- }
-@@ -738,7 +772,7 @@ static void jit_init(lua_State *L)
- LUALIB_API int luaopen_jit(lua_State *L)
- {
- #if LJ_HASJIT
-- jit_init(L);
-+ jit_init(L); // FIXME should this be moved back to the bottom?
- #endif
- lua_pushliteral(L, LJ_OS_NAME);
- lua_pushliteral(L, LJ_ARCH_NAME);
-diff --git a/src/lj_asm.c b/src/lj_asm.c
-index 6f5e0c45b1..eda81f1e51 100644
---- a/src/lj_asm.c
-+++ b/src/lj_asm.c
-@@ -2340,6 +2340,22 @@ static void asm_setup_regsp(ASMState *as)
- }
- break;
- #endif
-+/*
-+ case IR_FPMATH:
-+#if LJ_TARGET_X86ORX64
-+ if (ir->op2 == IRFPM_EXP2) { // May be joined to lj_vm_pow_sse.
-+ ir->prev = REGSP_HINT(RID_XMM0);
-+#if !LJ_64
-+ if (as->evenspill < 4) // Leave room for 16 byte scratch area.
-+ as->evenspill = 4;
-+#endif
-+ if (inloop)
-+ as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
-+ continue;
-+ } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
-+ ir->prev = REGSP_HINT(RID_XMM0);
-+>>>>>>> parent of 57768cd5... x86: Remove x87 support from interpreter.
-+ */
- case IR_FPMATH:
- #if LJ_TARGET_X86ORX64
- if (ir->op2 <= IRFPM_TRUNC) {
-diff --git a/src/lj_jit.h b/src/lj_jit.h
-index 7f081730e4..85916b8342 100644
---- a/src/lj_jit.h
-+++ b/src/lj_jit.h
-@@ -20,12 +20,18 @@
-
- #if LJ_TARGET_X86ORX64
-
--#define JIT_F_SSE3 (JIT_F_CPU << 0)
--#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
--#define JIT_F_BMI2 (JIT_F_CPU << 2)
--
--
--#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"
-+#define JIT_F_CMOV (JIT_F_CPU << 0)
-+#define JIT_F_SSE2 (JIT_F_CPU << 1)
-+#define JIT_F_SSE3 (JIT_F_CPU << 2)
-+#define JIT_F_SSE4_1 (JIT_F_CPU << 3)
-+#define JIT_F_P4 (JIT_F_CPU << 4)
-+#define JIT_F_PREFER_IMUL (JIT_F_CPU << 5)
-+#define JIT_F_SPLIT_XMM (JIT_F_CPU << 6)
-+#define JIT_F_LEA_AGU (JIT_F_CPU << 7)
-+#define JIT_F_BMI2 (JIT_F_CPU << 8)
-+
-+
-+#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM\4BMI2"
-
- #elif LJ_TARGET_ARM
-
-diff --git a/src/lj_vm.h b/src/lj_vm.h
-index c66db0049f..9bc6d62fab 100644
---- a/src/lj_vm.h
-+++ b/src/lj_vm.h
-@@ -58,7 +58,8 @@ LJ_ASMF void lj_vm_exit_handler(void);
- LJ_ASMF void lj_vm_exit_interp(void);
-
- /* Internal math helper functions. */
--#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
-+// FIXME: is this correct?
-+#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
- #define lj_vm_floor floor
- #define lj_vm_ceil ceil
- #else
-diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
-index d323d8d44d..67e53574de 100644
---- a/src/msvcbuild.bat
-+++ b/src/msvcbuild.bat
-@@ -41,7 +41,6 @@ if exist minilua.exe.manifest^
- @set DASC=vm_x86.dasc
- @set DASMFLAGS=-D WIN -D JIT -D FFI
- @set LJARCH=x86
--@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
- :X64
- @if "%1" neq "nogc64" goto :GC64
- @shift
-diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
-index 18ca87b545..3efbba6cdd 100644
---- a/src/vm_x86.dasc
-+++ b/src/vm_x86.dasc
-@@ -18,6 +18,7 @@
- |
- |.if P64
- |.define X64, 1
-+|.define SSE, 1
- |.if WIN
- |.define X64WIN, 1
- |.endif
-@@ -439,6 +440,7 @@
- | fpop
- |.endmacro
- |
-+|.macro fdup; fld st0; .endmacro
- |.macro fpop1; fstp st1; .endmacro
- |
- |// Synthesize SSE FP constants.
-@@ -464,6 +466,9 @@
- |.macro sseconst_1, reg, tmp // Synthesize 1.0.
- | sseconst_hi reg, tmp, 3ff00000
- |.endmacro
-+|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
-+| sseconst_hi reg, tmp, bff00000
-+|.endmacro
- |.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
- | sseconst_hi reg, tmp, 43300000
- |.endmacro
-@@ -943,9 +948,13 @@ static void build_subroutines(BuildCtx *ctx)
- |.if DUALNUM
- | mov TMP2, LJ_TISNUM
- | mov TMP1, RC
-- |.else
-+ |.elif SSE
- | cvtsi2sd xmm0, RC
- | movsd TMPQ, xmm0
-+ |.else
-+ | mov ARG4, RC
-+ | fild ARG4
-+ | fstp TMPQ
- |.endif
- | lea RCa, TMPQ // Store temp. TValue in TMPQ.
- | jmp >1
-@@ -1031,9 +1040,13 @@ static void build_subroutines(BuildCtx *ctx)
- |.if DUALNUM
- | mov TMP2, LJ_TISNUM
- | mov TMP1, RC
-- |.else
-+ |.elif SSE
- | cvtsi2sd xmm0, RC
- | movsd TMPQ, xmm0
-+ |.else
-+ | mov ARG4, RC
-+ | fild ARG4
-+ | fstp TMPQ
- |.endif
- | lea RCa, TMPQ // Store temp. TValue in TMPQ.
- | jmp >1
-@@ -1416,6 +1429,19 @@ static void build_subroutines(BuildCtx *ctx)
- | cmp NARGS:RD, 2+1; jb ->fff_fallback
- |.endmacro
- |
-+ |.macro .ffunc_n, name
-+ | .ffunc_1 name
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | fld qword [BASE]
-+ |.endmacro
-+ |
-+ |.macro .ffunc_n, name, op
-+ | .ffunc_1 name
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | op
-+ | fld qword [BASE]
-+ |.endmacro
-+ |
- |.macro .ffunc_nsse, name, op
- | .ffunc_1 name
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-@@ -1426,6 +1452,14 @@ static void build_subroutines(BuildCtx *ctx)
- | .ffunc_nsse name, movsd
- |.endmacro
- |
-+ |.macro .ffunc_nn, name
-+ | .ffunc_2 name
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
-+ | fld qword [BASE]
-+ | fld qword [BASE+8]
-+ |.endmacro
-+ |
- |.macro .ffunc_nnsse, name
- | .ffunc_2 name
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-@@ -1631,7 +1665,11 @@ static void build_subroutines(BuildCtx *ctx)
- |.else
- | jae ->fff_fallback
- |.endif
-+ |.if SSE
- | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
-+ |.else
-+ | fld qword [BASE]; jmp ->fff_resn
-+ |.endif
- |
- |.ffunc_1 tostring
- | // Only handles the string or number case inline.
-@@ -1729,12 +1767,19 @@ static void build_subroutines(BuildCtx *ctx)
- | add RD, 1
- | mov dword [BASE-4], LJ_TISNUM
- | mov dword [BASE-8], RD
-- |.else
-+ |.elif SSE
- | movsd xmm0, qword [BASE+8]
- | sseconst_1 xmm1, RBa
- | addsd xmm0, xmm1
- | cvttsd2si RD, xmm0
- | movsd qword [BASE-8], xmm0
-+ |.else
-+ | fld qword [BASE+8]
-+ | fld1
-+ | faddp st1
-+ | fist ARG1
-+ | fstp qword [BASE-8]
-+ | mov RD, ARG1
- |.endif
- | mov TAB:RB, [BASE]
- | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
-@@ -1783,9 +1828,12 @@ static void build_subroutines(BuildCtx *ctx)
- |.if DUALNUM
- | mov dword [BASE+12], LJ_TISNUM
- | mov dword [BASE+8], 0
-- |.else
-+ |.elif SSE
- | xorps xmm0, xmm0
- | movsd qword [BASE+8], xmm0
-+ |.else
-+ | fldz
-+ | fstp qword [BASE+8]
- |.endif
- | mov RD, 1+3
- | jmp ->fff_res
-@@ -2017,11 +2065,6 @@ static void build_subroutines(BuildCtx *ctx)
- |->fff_resi: // Dummy.
- |.endif
- |
-- |->fff_resn:
-- | mov PC, [BASE-4]
-- | fstp qword [BASE-8]
-- | jmp ->fff_res1
-- |
- | .ffunc_1 math_abs
- |.if DUALNUM
- | cmp dword [BASE+4], LJ_TISNUM; jne >2
-@@ -2044,6 +2087,8 @@ static void build_subroutines(BuildCtx *ctx)
- |.else
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
- |.endif
-+ |
-+ |.if SSE
- | movsd xmm0, qword [BASE]
- | sseconst_abs xmm1, RDa
- | andps xmm0, xmm1
-@@ -2051,6 +2096,15 @@ static void build_subroutines(BuildCtx *ctx)
- | mov PC, [BASE-4]
- | movsd qword [BASE-8], xmm0
- | // fallthrough
-+ |.else
-+ | fld qword [BASE]
-+ | fabs
-+ | // fallthrough
-+ |->fff_resxmm0: // Dummy.
-+ |->fff_resn:
-+ | mov PC, [BASE-4]
-+ | fstp qword [BASE-8]
-+ |.endif
- |
- |->fff_res1:
- | mov RD, 1+1
-@@ -2093,8 +2147,9 @@ static void build_subroutines(BuildCtx *ctx)
- |.else
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
- |.endif
-+ |.if SSE
- | movsd xmm0, qword [BASE]
-- | call ->vm_ .. func .. _sse
-+ | call ->vm_ .. func
- |.if DUALNUM
- | cvttsd2si RB, xmm0
- | cmp RB, 0x80000000
-@@ -2105,29 +2160,61 @@ static void build_subroutines(BuildCtx *ctx)
- | je ->fff_resi
- |.endif
- | jmp ->fff_resxmm0
-+ |.else
-+ | fld qword [BASE]
-+ | call ->vm_ .. func
-+ | .if DUALNUM
-+ | fist ARG1
-+ | mov RB, ARG1
-+ | cmp RB, 0x80000000; jne >2
-+ | fdup
-+ | fild ARG1
-+ | fcomparepp
-+ | jp ->fff_resn
-+ | jne ->fff_resn
-+ |2:
-+ | fpop
-+ | jmp ->fff_resi
-+ | .else
-+ | jmp ->fff_resn
-+ | .endif
-+ |.endif
- |.endmacro
- |
- | math_round floor
- | math_round ceil
- |
-+ |.if SSE
- |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
-+ |.else
-+ |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
-+ |.endif
- |
- |.ffunc math_log
- | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ |.if SSE
- | movsd xmm0, qword [BASE]
-- |.if not X64
-- | movsd FPARG1, xmm0
-- |.endif
-+ | .if not X64
-+ | movsd FPARG1, xmm0
-+ | .endif
- | mov RB, BASE
- | call extern log
- | mov BASE, RB
- | jmp ->fff_resfp
-+ |.else
-+ | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn
-+ |.endif
- |
- |.macro math_extern, func
-+ |.if SSE
- | .ffunc_nsse math_ .. func
-- |.if not X64
-- | movsd FPARG1, xmm0
-+ | .if not X64
-+ | movsd FPARG1, xmm0
-+ | .endif
-+ |.else
-+ | .ffunc_n math_ .. func
-+ | fstp FPARG1
- |.endif
- | mov RB, BASE
- | call extern func
-@@ -2136,10 +2223,16 @@ static void build_subroutines(BuildCtx *ctx)
- |.endmacro
- |
- |.macro math_extern2, func
-- | .ffunc_nnsse math_ .. func
- |.if not X64
-- | movsd FPARG1, xmm0
-- | movsd FPARG3, xmm1
-+ | .if SSE
-+ | .ffunc_nnsse math_ .. func
-+ | movsd FPARG1, xmm0
-+ | movsd FPARG3, xmm1
-+ | .else
-+ | .ffunc_nn math_ .. func
-+ | fstp FPARG3
-+ | fstp FPARG1
-+ | .endif
- |.endif
- | mov RB, BASE
- | call extern func
-@@ -2176,34 +2269,65 @@ static void build_subroutines(BuildCtx *ctx)
- | cmp RB, 0x00200000; jb >4
- |1:
- | shr RB, 21; sub RB, RC // Extract and unbias exponent.
-+ |.if SSE
- | cvtsi2sd xmm0, RB
-+ |.else
-+ | mov TMP1, RB; fild TMP1
-+ |.endif
- | mov RB, [BASE-4]
- | and RB, 0x800fffff // Mask off exponent.
- | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
- | mov [BASE-4], RB
- |2:
-+ |.if SSE
- | movsd qword [BASE], xmm0
-+ |.else
-+ | fstp qword [BASE]
-+ |.endif
- | mov RD, 1+2
- | jmp ->fff_res
- |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
-+ |.if SSE
- | xorps xmm0, xmm0; jmp <2
-+ |.else
-+ | fldz; jmp <2
-+ |.endif
- |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
-+ |.if SSE
- | movsd xmm0, qword [BASE]
- | sseconst_hi xmm1, RBa, 43500000 // 2^54.
- | mulsd xmm0, xmm1
- | movsd qword [BASE-8], xmm0
-+ |.else
-+ | fld qword [BASE]
-+ | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
-+ | fstp qword [BASE-8]
-+ |.endif
- | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
- |
-+ |.if SSE
- |.ffunc_nsse math_modf
-+ |.else
-+ |.ffunc_n math_modf
-+ |.endif
- | mov RB, [BASE+4]
- | mov PC, [BASE-4]
- | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
-+ |.if SSE
- | movaps xmm4, xmm0
-- | call ->vm_trunc_sse
-+ | call ->vm_trunc
- | subsd xmm4, xmm0
- |1:
- | movsd qword [BASE-8], xmm0
- | movsd qword [BASE], xmm4
-+ |.else
-+ | fdup
-+ | call ->vm_trunc
-+ | fsub st1, st0
-+ |1:
-+ | fstp qword [BASE-8]
-+ | fstp qword [BASE]
-+ |.endif
- | mov RC, [BASE-4]; mov RB, [BASE+4]
- | xor RC, RB; js >3 // Need to adjust sign?
- |2:
-@@ -2213,9 +2337,24 @@ static void build_subroutines(BuildCtx *ctx)
- | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
- | jmp <2
- |4:
-+ |.if SSE
- | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
-+ |.else
-+ | fldz; fxch; jmp <1 // Return +-Inf and +-0.
-+ |.endif
-+ |
-+ |.ffunc_nnr math_fmod
-+ |1: ; fprem; fnstsw ax; sahf; jp <1
-+ | fpop1
-+ | jmp ->fff_resn
-+ |
-+ |.if SSE
-+ |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
-+ |.else
-+ |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
-+ |.endif
- |
-- |.macro math_minmax, name, cmovop, sseop
-+ |.macro math_minmax, name, cmovop, fcmovop, sseop
- | .ffunc_1 name
- | mov RA, 2
- | cmp dword [BASE+4], LJ_TISNUM
-@@ -2232,7 +2371,12 @@ static void build_subroutines(BuildCtx *ctx)
- |3:
- | ja ->fff_fallback
- | // Convert intermediate result to number and continue below.
-+ |.if SSE
- | cvtsi2sd xmm0, RB
-+ |.else
-+ | mov TMP1, RB
-+ | fild TMP1
-+ |.endif
- | jmp >6
- |4:
- | ja ->fff_fallback
-@@ -2240,6 +2384,7 @@ static void build_subroutines(BuildCtx *ctx)
- | jae ->fff_fallback
- |.endif
- |
-+ |.if SSE
- | movsd xmm0, qword [BASE]
- |5: // Handle numbers or integers.
- | cmp RA, RD; jae ->fff_resxmm0
-@@ -2258,10 +2403,34 @@ static void build_subroutines(BuildCtx *ctx)
- | sseop xmm0, xmm1
- | add RA, 1
- | jmp <5
-+ |.else
-+ | fld qword [BASE]
-+ |5: // Handle numbers or integers.
-+ | cmp RA, RD; jae ->fff_resn
-+ | cmp dword [BASE+RA*8-4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jb >6
-+ | ja >9
-+ | fild dword [BASE+RA*8-8]
-+ | jmp >7
-+ |.else
-+ | jae >9
-+ |.endif
-+ |6:
-+ | fld qword [BASE+RA*8-8]
-+ |7:
-+ | fucomi st1; fcmovop st1; fpop1
-+ | add RA, 1
-+ | jmp <5
-+ |.endif
- |.endmacro
- |
-- | math_minmax math_min, cmovg, minsd
-- | math_minmax math_max, cmovl, maxsd
-+ | math_minmax math_min, cmovg, fcmovnbe, minsd
-+ | math_minmax math_max, cmovl, fcmovbe, maxsd
-+ |.if not SSE
-+ |9:
-+ | fpop; jmp ->fff_fallback
-+ |.endif
- |
- |//-- String library -----------------------------------------------------
- |
-@@ -2275,8 +2444,10 @@ static void build_subroutines(BuildCtx *ctx)
- | movzx RB, byte STR:RB[1]
- |.if DUALNUM
- | jmp ->fff_resi
-- |.else
-+ |.elif SSE
- | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
-+ |.else
-+ | mov TMP1, RB; fild TMP1; jmp ->fff_resn
- |.endif
- |
- |.ffunc string_char // Only handle the 1-arg case here.
-@@ -2288,11 +2459,16 @@ static void build_subroutines(BuildCtx *ctx)
- | mov RB, dword [BASE]
- | cmp RB, 255; ja ->fff_fallback
- | mov TMP2, RB
-- |.else
-+ |.elif SSE
- | jae ->fff_fallback
- | cvttsd2si RB, qword [BASE]
- | cmp RB, 255; ja ->fff_fallback
- | mov TMP2, RB
-+ |.else
-+ | jae ->fff_fallback
-+ | fld qword [BASE]
-+ | fistp TMP2
-+ | cmp TMP2, 255; ja ->fff_fallback
- |.endif
- |.if X64
- | mov TMP3, 1
-@@ -2331,10 +2507,14 @@ static void build_subroutines(BuildCtx *ctx)
- | jne ->fff_fallback
- | mov RB, dword [BASE+16]
- | mov TMP2, RB
-- |.else
-+ |.elif SSE
- | jae ->fff_fallback
- | cvttsd2si RB, qword [BASE+16]
- | mov TMP2, RB
-+ |.else
-+ | jae ->fff_fallback
-+ | fld qword [BASE+16]
-+ | fistp TMP2
- |.endif
- |1:
- | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
-@@ -2349,8 +2529,12 @@ static void build_subroutines(BuildCtx *ctx)
- | mov RB, STR:RB->len
- |.if DUALNUM
- | mov RA, dword [BASE+8]
-- |.else
-+ |.elif SSE
- | cvttsd2si RA, qword [BASE+8]
-+ |.else
-+ | fld qword [BASE+8]
-+ | fistp ARG3
-+ | mov RA, ARG3
- |.endif
- | mov RC, TMP2
- | cmp RB, RC // len < end? (unsigned compare)
-@@ -2418,10 +2602,16 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |//-- Bit library --------------------------------------------------------
- |
-+ |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
-+ |
- |.macro .ffunc_bit, name, kind, fdef
- | fdef name
- |.if kind == 2
-+ |.if SSE
- | sseconst_tobit xmm1, RBa
-+ |.else
-+ | mov TMP1, TOBIT_BIAS
-+ |.endif
- |.endif
- | cmp dword [BASE+4], LJ_TISNUM
- |.if DUALNUM
-@@ -2437,12 +2627,24 @@ static void build_subroutines(BuildCtx *ctx)
- |.else
- | jae ->fff_fallback
- |.endif
-+ |.if SSE
- | movsd xmm0, qword [BASE]
- |.if kind < 2
- | sseconst_tobit xmm1, RBa
- |.endif
- | addsd xmm0, xmm1
- | movd RB, xmm0
-+ |.else
-+ | fld qword [BASE]
-+ |.if kind < 2
-+ | mov TMP1, TOBIT_BIAS
-+ |.endif
-+ | fadd TMP1
-+ | fstp FPARG1
-+ |.if kind > 0
-+ | mov RB, ARG1
-+ |.endif
-+ |.endif
- |2:
- |.endmacro
- |
-@@ -2451,7 +2653,15 @@ static void build_subroutines(BuildCtx *ctx)
- |.endmacro
- |
- |.ffunc_bit bit_tobit, 0
-+ |.if DUALNUM or SSE
-+ |.if not SSE
-+ | mov RB, ARG1
-+ |.endif
- | jmp ->fff_resbit
-+ |.else
-+ | fild ARG1
-+ | jmp ->fff_resn
-+ |.endif
- |
- |.macro .ffunc_bit_op, name, ins
- | .ffunc_bit name, 2
-@@ -2471,10 +2681,17 @@ static void build_subroutines(BuildCtx *ctx)
- |.else
- | jae ->fff_fallback_bit_op
- |.endif
-+ |.if SSE
- | movsd xmm0, qword [RD]
- | addsd xmm0, xmm1
- | movd RA, xmm0
- | ins RB, RA
-+ |.else
-+ | fld qword [RD]
-+ | fadd TMP1
-+ | fstp FPARG1
-+ | ins RB, ARG1
-+ |.endif
- | sub RD, 8
- | jmp <1
- |.endmacro
-@@ -2491,10 +2708,15 @@ static void build_subroutines(BuildCtx *ctx)
- | not RB
- |.if DUALNUM
- | jmp ->fff_resbit
-- |.else
-+ |.elif SSE
- |->fff_resbit:
- | cvtsi2sd xmm0, RB
- | jmp ->fff_resxmm0
-+ |.else
-+ |->fff_resbit:
-+ | mov ARG1, RB
-+ | fild ARG1
-+ | jmp ->fff_resn
- |.endif
- |
- |->fff_fallback_bit_op:
-@@ -2507,13 +2729,22 @@ static void build_subroutines(BuildCtx *ctx)
- | // Note: no inline conversion from number for 2nd argument!
- | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
- | mov RA, dword [BASE+8]
-- |.else
-+ |.elif SSE
- | .ffunc_nnsse name
- | sseconst_tobit xmm2, RBa
- | addsd xmm0, xmm2
- | addsd xmm1, xmm2
- | movd RB, xmm0
- | movd RA, xmm1
-+ |.else
-+ | .ffunc_nn name
-+ | mov TMP1, TOBIT_BIAS
-+ | fadd TMP1
-+ | fstp FPARG3
-+ | fadd TMP1
-+ | fstp FPARG1
-+ | mov RA, ARG3
-+ | mov RB, ARG1
- |.endif
- | ins RB, cl // Assumes RA is ecx.
- | jmp ->fff_resbit
-@@ -2954,18 +3185,27 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |// FP value rounding. Called by math.floor/math.ceil fast functions
-- |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
-- |.macro vm_round, name, mode, cond
-- |->name:
-- |.if not X64 and cond
-- | movsd xmm0, qword [esp+4]
-- | call ->name .. _sse
-- | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
-- | fld qword [esp+4]
-+ |// and from JIT code.
-+ |
-+ |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified.
-+ |.macro vm_round_x87, mode1, mode2
-+ | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
-+ | mov [esp+8], eax
-+ | mov ax, mode1
-+ | or ax, [esp+4]
-+ |.if mode2 ~= 0xffff
-+ | and ax, mode2
-+ |.endif
-+ | mov [esp+6], ax
-+ | fldcw word [esp+6]
-+ | frndint
-+ | fldcw word [esp+4]
-+ | mov eax, [esp+8]
- | ret
-- |.endif
-+ |.endmacro
- |
-- |->name .. _sse:
-+ |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
-+ |.macro vm_round_sse, mode
- | sseconst_abs xmm2, RDa
- | sseconst_2p52 xmm3, RDa
- | movaps xmm1, xmm0
-@@ -2986,29 +3226,37 @@ static void build_subroutines(BuildCtx *ctx)
- | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
- | subsd xmm1, xmm3
- | orpd xmm1, xmm2 // Merge sign bit back in.
-- | sseconst_1 xmm3, RDa
- | .if mode == 1 // ceil(x)?
-+ | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0.
- | cmpsd xmm0, xmm1, 6 // x > result?
-- | andpd xmm0, xmm3
-- | addsd xmm1, xmm0 // If yes, add 1.
-- | orpd xmm1, xmm2 // Merge sign bit back in (again).
- | .else // floor(x)?
-+ | sseconst_1 xmm2, RDa
- | cmpsd xmm0, xmm1, 1 // x < result?
-- | andpd xmm0, xmm3
-- | subsd xmm1, xmm0 // If yes, subtract 1.
- | .endif
-+ | andpd xmm0, xmm2
-+ | subsd xmm1, xmm0 // If yes, subtract +-1.
- |.endif
- | movaps xmm0, xmm1
- |1:
- | ret
- |.endmacro
- |
-- | vm_round vm_floor, 0, 1
-- | vm_round vm_ceil, 1, JIT
-- | vm_round vm_trunc, 2, JIT
-+ |.macro vm_round, name, ssemode, mode1, mode2, extra // FIXME: EXTRA NOT USED
-+ |->name:
-+ |.if not SSE
-+ | vm_round_x87 mode1, mode2
-+ |.endif
-+ |->name .. _sse:
-+ | vm_round_sse ssemode
-+ |.endmacro
-+ |
-+ | vm_round vm_floor, 0, 0x0400, 0xf7ff, 1
-+ | vm_round vm_ceil, 1, 0x0800, 0xfbff, JIT
-+ | vm_round vm_trunc, 2, 0x0c00, 0xffff, JIT
- |
- |// FP modulo x%y. Called by BC_MOD* and vm_arith.
- |->vm_mod:
-+ |.if SSE
- |// Args in xmm0/xmm1, return value in xmm0.
- |// Caveat: xmm0-xmm5 and RC (eax) modified!
- | movaps xmm5, xmm0
-@@ -3036,6 +3284,243 @@ static void build_subroutines(BuildCtx *ctx)
- | movaps xmm0, xmm5
- | subsd xmm0, xmm1
- | ret
-+ |.else
-+ |// Args/ret on x87 stack (y on top). No xmm registers modified.
-+ |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
-+ | fld st1
-+ | fdiv st1
-+ | fnstcw word [esp+4]
-+ | mov ax, 0x0400
-+ | or ax, [esp+4]
-+ | and ax, 0xf7ff
-+ | mov [esp+6], ax
-+ | fldcw word [esp+6]
-+ | frndint
-+ | fldcw word [esp+4]
-+ | fmulp st1
-+ | fsubp st1
-+ | ret
-+ |.endif
-+ |
-+ |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
-+ | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
-+ | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
-+ |1:
-+ | ret
-+ |2:
-+ | fpop; fldz; ret
-+ |
-+ |// Generic power function x^y. Called by BC_POW, math.pow fast function,
-+ |// and vm_arith.
-+ |// Args/ret on x87 stack (y on top). RC (eax) modified.
-+ |// Caveat: needs 3 slots on x87 stack!
-+ |->vm_pow:
-+ |.if not SSE
-+ | fist dword [esp+4] // Store/reload int before comparison.
-+ | fild dword [esp+4] // Integral exponent used in vm_powi.
-+ | fucomip st1
-+ | jnz >8 // Branch for FP exponents.
-+ | jp >9 // Branch for NaN exponent.
-+ | fpop // Pop y and fallthrough to vm_powi.
-+ |
-+ |// FP/int power function x^i. Arg1/ret on x87 stack.
-+ |// Arg2 (int) on C stack. RC (eax) modified.
-+ |// Caveat: needs 2 slots on x87 stack!
-+ | mov eax, [esp+4]
-+ | cmp eax, 1; jle >6 // i<=1?
-+ | // Now 1 < (unsigned)i <= 0x80000000.
-+ |1: // Handle leading zeros.
-+ | test eax, 1; jnz >2
-+ | fmul st0
-+ | shr eax, 1
-+ | jmp <1
-+ |2:
-+ | shr eax, 1; jz >5
-+ | fdup
-+ |3: // Handle trailing bits.
-+ | fmul st0
-+ | shr eax, 1; jz >4
-+ | jnc <3
-+ | fmul st1, st0
-+ | jmp <3
-+ |4:
-+ | fmulp st1
-+ |5:
-+ | ret
-+ |6:
-+ | je <5 // x^1 ==> x
-+ | jb >7
-+ | fld1; fdivrp st1
-+ | neg eax
-+ | cmp eax, 1; je <5 // x^-1 ==> 1/x
-+ | jmp <1 // x^-i ==> (1/x)^i
-+ |7:
-+ | fpop; fld1 // x^0 ==> 1
-+ | ret
-+ |
-+ |8: // FP/FP power function x^y.
-+ | fst dword [esp+4]
-+ | fxch
-+ | fst dword [esp+8]
-+ | mov eax, [esp+4]; shl eax, 1
-+ | cmp eax, 0xff000000; je >2 // x^+-Inf?
-+ | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
-+ | cmp eax, 0xff000000; je >4 // +-Inf^y?
-+ | fyl2x
-+ | jmp ->vm_exp2raw
-+ |
-+ |9: // Handle x^NaN.
-+ | fld1
-+ | fucomip st2
-+ | je >1 // 1^NaN ==> 1
-+ | fxch // x^NaN ==> NaN
-+ |1:
-+ | fpop
-+ | ret
-+ |
-+ |2: // Handle x^+-Inf.
-+ | fabs
-+ | fld1
-+ | fucomip st1
-+ | je >3 // +-1^+-Inf ==> 1
-+ | fpop; fabs; fldz; mov eax, 0; setc al
-+ | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
-+ | fxch
-+ |3:
-+ | fpop1; fabs
-+ | ret
-+ |
-+ |4: // Handle +-0^y or +-Inf^y.
-+ | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
-+ | fpop; fpop
-+ | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
-+ | fldz // y < 0, +-Inf^y ==> 0
-+ | ret
-+ |5:
-+ | mov dword [esp+4], 0x7f800000 // Return +Inf.
-+ | fld dword [esp+4]
-+ | ret
-+ |.endif
-+ |
-+ |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
-+ |// Needs 16 byte scratch area for x86. Also called from JIT code.
-+ |->vm_pow_sse:
-+ | cvtsd2si eax, xmm1
-+ | cvtsi2sd xmm2, eax
-+ | ucomisd xmm1, xmm2
-+ | jnz >8 // Branch for FP exponents.
-+ | jp >9 // Branch for NaN exponent.
-+ | // Fallthrough to vm_powi_sse.
-+ |
-+ |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
-+ |->vm_powi_sse:
-+ | cmp eax, 1; jle >6 // i<=1?
-+ | // Now 1 < (unsigned)i <= 0x80000000.
-+ |1: // Handle leading zeros.
-+ | test eax, 1; jnz >2
-+ | mulsd xmm0, xmm0
-+ | shr eax, 1
-+ | jmp <1
-+ |2:
-+ | shr eax, 1; jz >5
-+ | movaps xmm1, xmm0
-+ |3: // Handle trailing bits.
-+ | mulsd xmm0, xmm0
-+ | shr eax, 1; jz >4
-+ | jnc <3
-+ | mulsd xmm1, xmm0
-+ | jmp <3
-+ |4:
-+ | mulsd xmm0, xmm1
-+ |5:
-+ | ret
-+ |6:
-+ | je <5 // x^1 ==> x
-+ | jb >7 // x^0 ==> 1
-+ | neg eax
-+ | call <1
-+ | sseconst_1 xmm1, RDa
-+ | divsd xmm1, xmm0
-+ | movaps xmm0, xmm1
-+ | ret
-+ |7:
-+ | sseconst_1 xmm0, RDa
-+ | ret
-+ |
-+ |8: // FP/FP power function x^y.
-+ |.if X64
-+ | movd rax, xmm1; shl rax, 1
-+ | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
-+ | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
-+ | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
-+ | .if X64WIN
-+ | movsd qword [rsp+16], xmm1 // Use scratch area.
-+ | movsd qword [rsp+8], xmm0
-+ | fld qword [rsp+16]
-+ | fld qword [rsp+8]
-+ | .else
-+ | movsd qword [rsp-16], xmm1 // Use red zone.
-+ | movsd qword [rsp-8], xmm0
-+ | fld qword [rsp-16]
-+ | fld qword [rsp-8]
-+ | .endif
-+ |.else
-+ | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
-+ | movsd qword [esp+4], xmm0
-+ | cmp dword [esp+12], 0; jne >1
-+ | mov eax, [esp+16]; shl eax, 1
-+ | cmp eax, 0xffe00000; je >2 // x^+-Inf?
-+ |1:
-+ | cmp dword [esp+4], 0; jne >1
-+ | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
-+ | cmp eax, 0xffe00000; je >5 // +-Inf^y?
-+ |1:
-+ | fld qword [esp+12]
-+ | fld qword [esp+4]
-+ |.endif
-+ | fyl2x // y*log2(x)
-+ | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
-+ | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
-+ |.if X64WIN
-+ | fstp qword [rsp+8] // Use scratch area.
-+ | movsd xmm0, qword [rsp+8]
-+ |.elif X64
-+ | fstp qword [rsp-8] // Use red zone.
-+ | movsd xmm0, qword [rsp-8]
-+ |.else
-+ | fstp qword [esp+4] // Needs 8 byte scratch area.
-+ | movsd xmm0, qword [esp+4]
-+ |.endif
-+ | ret
-+ |
-+ |9: // Handle x^NaN.
-+ | sseconst_1 xmm2, RDa
-+ | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
-+ | movaps xmm0, xmm1 // x^NaN ==> NaN
-+ |1:
-+ | ret
-+ |
-+ |2: // Handle x^+-Inf.
-+ | sseconst_abs xmm2, RDa
-+ | andpd xmm0, xmm2 // |x|
-+ | sseconst_1 xmm2, RDa
-+ | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
-+ | movmskpd eax, xmm1
-+ | xorps xmm0, xmm0
-+ | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
-+ |3:
-+ | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
-+ | ret
-+ |
-+ |4: // Handle +-0^y.
-+ | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
-+ | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
-+ | ret
-+ |
-+ |5: // Handle +-Inf^y.
-+ | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
-+ | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
-+ | ret
- |
- |//-----------------------------------------------------------------------
- |//-- Miscellaneous functions --------------------------------------------
-@@ -3429,12 +3914,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // RA is a number.
- | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
- | // RA is a number, RD is an integer.
-+ |.if SSE
- | cvtsi2sd xmm0, dword [BASE+RD*8]
- | jmp >2
-+ |.else
-+ | fld qword [BASE+RA*8]
-+ | fild dword [BASE+RD*8]
-+ | jmp >3
-+ |.endif
- |
- |8: // RA is an integer, RD is not an integer.
- | ja ->vmeta_comp
- | // RA is an integer, RD is a number.
-+ |.if SSE
- | cvtsi2sd xmm1, dword [BASE+RA*8]
- | movsd xmm0, qword [BASE+RD*8]
- | add PC, 4
-@@ -3442,15 +3934,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | jmp_comp jbe, ja, jb, jae, <9
- | jmp <6
- |.else
-+ | fild dword [BASE+RA*8]
-+ | jmp >2
-+ |.endif
-+ |.else
- | checknum RA, ->vmeta_comp
- | checknum RD, ->vmeta_comp
- |.endif
-+ |.if SSE
- |1:
- | movsd xmm0, qword [BASE+RD*8]
- |2:
- | add PC, 4
- | ucomisd xmm0, qword [BASE+RA*8]
- |3:
-+ |.else
-+ |1:
-+ | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
-+ |2:
-+ | fld qword [BASE+RD*8]
-+ |3:
-+ | add PC, 4
-+ | fcomparepp
-+ |.endif
- | // Unordered: all of ZF CF PF set, ordered: PF clear.
- | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
- |.if DUALNUM
-@@ -3490,25 +3996,43 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // RD is a number.
- | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
- | // RD is a number, RA is an integer.
-+ |.if SSE
- | cvtsi2sd xmm0, dword [BASE+RA*8]
-+ |.else
-+ | fild dword [BASE+RA*8]
-+ |.endif
- | jmp >2
- |
- |8: // RD is an integer, RA is not an integer.
- | ja >5
- | // RD is an integer, RA is a number.
-+ |.if SSE
- | cvtsi2sd xmm0, dword [BASE+RD*8]
- | ucomisd xmm0, qword [BASE+RA*8]
-+ |.else
-+ | fild dword [BASE+RD*8]
-+ | fld qword [BASE+RA*8]
-+ |.endif
- | jmp >4
- |
- |.else
- | cmp RB, LJ_TISNUM; jae >5
- | checknum RA, >5
- |.endif
-+ |.if SSE
- |1:
- | movsd xmm0, qword [BASE+RA*8]
- |2:
- | ucomisd xmm0, qword [BASE+RD*8]
- |4:
-+ |.else
-+ |1:
-+ | fld qword [BASE+RA*8]
-+ |2:
-+ | fld qword [BASE+RD*8]
-+ |4:
-+ | fcomparepp
-+ |.endif
- iseqne_fp:
- if (vk) {
- | jp >2 // Unordered means not equal.
-@@ -3631,21 +4155,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // RA is a number.
- | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
- | // RA is a number, RD is an integer.
-+ |.if SSE
- | cvtsi2sd xmm0, dword [KBASE+RD*8]
-+ |.else
-+ | fild dword [KBASE+RD*8]
-+ |.endif
- | jmp >2
- |
- |8: // RA is an integer, RD is a number.
-+ |.if SSE
- | cvtsi2sd xmm0, dword [BASE+RA*8]
- | ucomisd xmm0, qword [KBASE+RD*8]
-+ |.else
-+ | fild dword [BASE+RA*8]
-+ | fld qword [KBASE+RD*8]
-+ |.endif
- | jmp >4
- |.else
- | cmp RB, LJ_TISNUM; jae >3
- |.endif
-+ |.if SSE
- |1:
- | movsd xmm0, qword [KBASE+RD*8]
- |2:
- | ucomisd xmm0, qword [BASE+RA*8]
- |4:
-+ |.else
-+ |1:
-+ | fld qword [KBASE+RD*8]
-+ |2:
-+ | fld qword [BASE+RA*8]
-+ |4:
-+ | fcomparepp
-+ |.endif
- goto iseqne_fp;
- case BC_ISEQP: case BC_ISNEP:
- vk = op == BC_ISEQP;
-@@ -3751,10 +4293,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.else
- | checknum RD, ->vmeta_unm
- |.endif
-+ |.if SSE
- | movsd xmm0, qword [BASE+RD*8]
- | sseconst_sign xmm1, RDa
- | xorps xmm0, xmm1
- | movsd qword [BASE+RA*8], xmm0
-+ |.else
-+ | fld qword [BASE+RD*8]
-+ | fchs
-+ | fstp qword [BASE+RA*8]
-+ |.endif
- |.if DUALNUM
- | jmp <9
- |.else
-@@ -3770,11 +4318,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |1:
- | mov dword [BASE+RA*8+4], LJ_TISNUM
- | mov dword [BASE+RA*8], RD
-- |.else
-+ |.elif SSE
- | xorps xmm0, xmm0
- | cvtsi2sd xmm0, dword STR:RD->len
- |1:
- | movsd qword [BASE+RA*8], xmm0
-+ |.else
-+ | fild dword STR:RD->len
-+ |1:
-+ | fstp qword [BASE+RA*8]
- |.endif
- | ins_next
- |2:
-@@ -3792,8 +4344,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // Length of table returned in eax (RD).
- |.if DUALNUM
- | // Nothing to do.
-- |.else
-+ |.elif SSE
- | cvtsi2sd xmm0, RD
-+ |.else
-+ | mov ARG1, RD
-+ | fild ARG1
- |.endif
- | mov BASE, RB // Restore BASE.
- | movzx RA, PC_RA
-@@ -3808,7 +4363,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- /* -- Binary ops -------------------------------------------------------- */
-
-- |.macro ins_arithpre, sseins, ssereg
-+ |.macro ins_arithpre, x87ins, sseins, ssereg
- | ins_ABC
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
-@@ -3817,22 +4372,37 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | .if DUALNUM
- | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
- | .endif
-- | movsd xmm0, qword [BASE+RB*8]
-- | sseins ssereg, qword [KBASE+RC*8]
-+ | .if SSE
-+ | movsd xmm0, qword [BASE+RB*8]
-+ | sseins ssereg, qword [KBASE+RC*8]
-+ | .else
-+ | fld qword [BASE+RB*8]
-+ | x87ins qword [KBASE+RC*8]
-+ | .endif
- || break;
- ||case 1:
- | checknum RB, ->vmeta_arith_nv
- | .if DUALNUM
- | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
- | .endif
-- | movsd xmm0, qword [KBASE+RC*8]
-- | sseins ssereg, qword [BASE+RB*8]
-+ | .if SSE
-+ | movsd xmm0, qword [KBASE+RC*8]
-+ | sseins ssereg, qword [BASE+RB*8]
-+ | .else
-+ | fld qword [KBASE+RC*8]
-+ | x87ins qword [BASE+RB*8]
-+ | .endif
- || break;
- ||default:
- | checknum RB, ->vmeta_arith_vv
- | checknum RC, ->vmeta_arith_vv
-- | movsd xmm0, qword [BASE+RB*8]
-- | sseins ssereg, qword [BASE+RC*8]
-+ | .if SSE
-+ | movsd xmm0, qword [BASE+RB*8]
-+ | sseins ssereg, qword [BASE+RC*8]
-+ | .else
-+ | fld qword [BASE+RB*8]
-+ | x87ins qword [BASE+RC*8]
-+ | .endif
- || break;
- ||}
- |.endmacro
-@@ -3870,62 +4440,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.endmacro
- |
- |.macro ins_arithpost
-+ |.if SSE
- | movsd qword [BASE+RA*8], xmm0
-+ |.else
-+ | fstp qword [BASE+RA*8]
-+ |.endif
- |.endmacro
- |
-- |.macro ins_arith, sseins
-- | ins_arithpre sseins, xmm0
-+ |.macro ins_arith, x87ins, sseins
-+ | ins_arithpre x87ins, sseins, xmm0
- | ins_arithpost
- | ins_next
- |.endmacro
- |
-- |.macro ins_arith, intins, sseins
-+ |.macro ins_arith, intins, x87ins, sseins
- |.if DUALNUM
- | ins_arithdn intins
- |.else
-- | ins_arith, sseins
-+ | ins_arith, x87ins, sseins
- |.endif
- |.endmacro
-
- | // RA = dst, RB = src1 or num const, RC = src2 or num const
- case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
-- | ins_arith add, addsd
-+ | ins_arith add, fadd, addsd
- break;
- case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
-- | ins_arith sub, subsd
-+ | ins_arith sub, fsub, subsd
- break;
- case BC_MULVN: case BC_MULNV: case BC_MULVV:
-- | ins_arith imul, mulsd
-+ | ins_arith imul, fmul, mulsd
- break;
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
-- | ins_arith divsd
-+ | ins_arith fdiv, divsd
- break;
- case BC_MODVN:
-- | ins_arithpre movsd, xmm1
-+ | ins_arithpre fld, movsd, xmm1
- |->BC_MODVN_Z:
- | call ->vm_mod
- | ins_arithpost
- | ins_next
- break;
- case BC_MODNV: case BC_MODVV:
-- | ins_arithpre movsd, xmm1
-+ | ins_arithpre fld, movsd, xmm1
- | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
- break;
- case BC_POW:
-- | ins_arithpre movsd, xmm1
-- | mov RB, BASE
-- |.if not X64
-- | movsd FPARG1, xmm0
-- | movsd FPARG3, xmm1
-- |.endif
-- | call extern pow
-- | movzx RA, PC_RA
-- | mov BASE, RB
-- |.if X64
-+ | ins_arithpre fld, movsd, xmm1 // FIXME: THIS SHOULD NOT BE FLD. Whole thing is broken
-+ | call ->vm_pow
- | ins_arithpost
-- |.else
-- | fstp qword [BASE+RA*8]
-- |.endif
- | ins_next
- break;
-
-@@ -3993,17 +4556,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | movsx RD, RDW
- | mov dword [BASE+RA*8+4], LJ_TISNUM
- | mov dword [BASE+RA*8], RD
-- |.else
-+ |.elif SSE
- | movsx RD, RDW // Sign-extend literal.
- | cvtsi2sd xmm0, RD
- | movsd qword [BASE+RA*8], xmm0
-+ |.else
-+ | fild PC_RD // Refetch signed RD from instruction.
-+ | fstp qword [BASE+RA*8]
- |.endif
- | ins_next
- break;
- case BC_KNUM:
- | ins_AD // RA = dst, RD = num const
-+ |.if SSE
- | movsd xmm0, qword [KBASE+RD*8]
- | movsd qword [BASE+RA*8], xmm0
-+ |.else
-+ | fld qword [KBASE+RD*8]
-+ | fstp qword [BASE+RA*8]
-+ |.endif
- | ins_next
- break;
- case BC_KPRI:
-@@ -4110,10 +4681,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_USETN:
- | ins_AD // RA = upvalue #, RD = num const
- | mov LFUNC:RB, [BASE-8]
-+ |.if SSE
- | movsd xmm0, qword [KBASE+RD*8]
-+ |.else
-+ | fld qword [KBASE+RD*8]
-+ |.endif
- | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
- | mov RA, UPVAL:RB->v
-+ |.if SSE
- | movsd qword [RA], xmm0
-+ |.else
-+ | fstp qword [RA]
-+ |.endif
- | ins_next
- break;
- case BC_USETP:
-@@ -4267,10 +4846,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.else
- | // Convert number to int and back and compare.
- | checknum RC, >5
-+ |.if SSE
- | movsd xmm0, qword [BASE+RC*8]
- | cvttsd2si RC, xmm0
- | cvtsi2sd xmm1, RC
- | ucomisd xmm0, xmm1
-+ |.else
-+ | fld qword [BASE+RC*8]
-+ | fist ARG1
-+ | fild ARG1
-+ | fcomparepp
-+ | mov RC, ARG1
-+ |.endif
- | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
- |.endif
- | cmp RC, TAB:RB->asize // Takes care of unordered, too.
-@@ -4399,8 +4986,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | mov TAB:RB, [BASE+RB*8]
- |.if DUALNUM
- | mov RC, dword [BASE+RC*8]
-- |.else
-+ |.elif SSE
- | cvttsd2si RC, qword [BASE+RC*8]
-+ |.else
-+ | fld qword [BASE+RC*8]
-+ | fistp TMP1
-+ | mov RC, TMP1
- |.endif
- | cmp RC, TAB:RB->asize
- | jae ->vmeta_tgetr // Not in array part? Use fallback.
-@@ -4433,10 +5024,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.else
- | // Convert number to int and back and compare.
- | checknum RC, >5
-+ |.if SSE
- | movsd xmm0, qword [BASE+RC*8]
- | cvttsd2si RC, xmm0
- | cvtsi2sd xmm1, RC
- | ucomisd xmm0, xmm1
-+ |.else
-+ | fld qword [BASE+RC*8]
-+ | fist ARG1
-+ | fild ARG1
-+ | fcomparepp
-+ | mov RC, ARG1
-+ |.endif
- | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
- |.endif
- | cmp RC, TAB:RB->asize // Takes care of unordered, too.
-@@ -4611,8 +5210,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | mov TAB:RB, [BASE+RB*8]
- |.if DUALNUM
- | mov RC, dword [BASE+RC*8]
-- |.else
-+ |.elif SSE
- | cvttsd2si RC, qword [BASE+RC*8]
-+ |.else
-+ | fld qword [BASE+RC*8]
-+ | fistp TMP1
-+ | mov RC, TMP1
- |.endif
- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
- | jnz >7
-@@ -4833,8 +5436,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.if DUALNUM
- | mov dword [BASE+RA*8+4], LJ_TISNUM
- | mov dword [BASE+RA*8], RC
-- |.else
-+ |.elif SSE
- | cvtsi2sd xmm0, RC
-+ |.else
-+ | fild dword [BASE+RA*8-8]
- |.endif
- | // Copy array slot to returned value.
- |.if X64
-@@ -4850,8 +5455,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // Return array index as a numeric key.
- |.if DUALNUM
- | // See above.
-- |.else
-+ |.elif SSE
- | movsd qword [BASE+RA*8], xmm0
-+ |.else
-+ | fstp qword [BASE+RA*8]
- |.endif
- | mov [BASE+RA*8-8], RC // Update control var.
- |2:
-@@ -4864,6 +5471,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |
- |4: // Skip holes in array part.
- | add RC, 1
-+ |.if not (DUALNUM or SSE)
-+ | mov [BASE+RA*8-8], RC
-+ |.endif
- | jmp <1
- |
- |5: // Traverse hash part.
-@@ -5211,6 +5821,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- if (!vk) {
- | cmp RB, LJ_TISNUM; jae ->vmeta_for
- }
-+ |.if SSE
- | movsd xmm0, qword FOR_IDX
- | movsd xmm1, qword FOR_STOP
- if (vk) {
-@@ -5223,6 +5834,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ucomisd xmm1, xmm0
- |1:
- | movsd qword FOR_EXT, xmm0
-+ |.else
-+ | fld qword FOR_STOP
-+ | fld qword FOR_IDX
-+ if (vk) {
-+ | fadd qword FOR_STEP // nidx = idx + step
-+ | fst qword FOR_IDX
-+ | fst qword FOR_EXT
-+ | test RB, RB; js >1
-+ } else {
-+ | fst qword FOR_EXT
-+ | jl >1
-+ }
-+ | fxch // Swap lim/(n)idx if step non-negative.
-+ |1:
-+ | fcomparepp
-+ |.endif
- if (op == BC_FORI) {
- |.if DUALNUM
- | jnb <7
-@@ -5250,10 +5877,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |2:
- | ins_next
- |.endif
-- |
-+ |.if SSE
- |3: // Invert comparison if step is negative.
- | ucomisd xmm0, xmm1
- | jmp <1
-+ |.endif
- break;
-
- case BC_ITERL:
diff --git a/community/luajit/luajit-2.0-505e2c0-i486.patch b/community/luajit/luajit-2.0-505e2c0-i486.patch
deleted file mode 100644
index dd6cf5a1..00000000
--- a/community/luajit/luajit-2.0-505e2c0-i486.patch
+++ /dev/null
@@ -1,2366 +0,0 @@
-diff -rauN luajit-2.0-505e2c0/src/lib_jit.c luajit-2.0-505e2c0-i486-patch/src/lib_jit.c
---- luajit-2.0-505e2c0/src/lib_jit.c 2023-02-21 17:07:37.000000000 +0100
-+++ luajit-2.0-505e2c0-i486-patch/src/lib_jit.c 2023-03-26 18:16:32.558477950 +0200
-@@ -649,7 +649,7 @@
- #endif
-
- /* Arch-dependent CPU feature detection. */
--static uint32_t jit_cpudetect(void)
-+static uint32_t jit_cpudetect(lua_State *L)
- {
- uint32_t flags = 0;
- #if LJ_TARGET_X86ORX64
-@@ -657,16 +657,45 @@
- uint32_t vendor[4];
- uint32_t features[4];
- if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
-+#if !LJ_HASJIT
-+#define JIT_F_CMOV 1
-+#define JIT_F_SSE2 2
-+#endif
-+ flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
-+ flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
-+#if LJ_HASJIT
- flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
- flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
-+ if (vendor[2] == 0x6c65746e) { /* Intel. */
-+ if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */
-+ flags |= JIT_F_P4; /* Currently unused. */
-+ else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
-+ flags |= JIT_F_LEA_AGU;
-+ } else if (vendor[2] == 0x444d4163) { /* AMD. */
-+ uint32_t fam = (features[0] & 0x0ff00f00);
-+ if (fam == 0x00000f00) /* K8. */
-+ flags |= JIT_F_SPLIT_XMM;
-+ if (fam >= 0x00000f00) /* K8, K10. */
-+ flags |= JIT_F_PREFER_IMUL;
-+ }
- if (vendor[0] >= 7) {
- uint32_t xfeatures[4];
- lj_vm_cpuid(7, xfeatures);
- flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
- }
-+#endif
- }
-- /* Don't bother checking for SSE2 -- the VM will crash before getting here. */
--
-+ /* Check for required instruction set support on x86 (unnecessary on x64). */
-+#if LJ_TARGET_X86
-+#if !defined(LUAJIT_CPU_NOCMOV)
-+ if (!(flags & JIT_F_CMOV))
-+ luaL_error(L, "CPU not supported");
-+#endif
-+#if defined(LUAJIT_CPU_SSE2)
-+ if (!(flags & JIT_F_SSE2))
-+ luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)");
-+#endif
-+#endif
- #elif LJ_TARGET_ARM
-
- int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */
-@@ -729,7 +758,12 @@
- static void jit_init(lua_State *L)
- {
- jit_State *J = L2J(L);
-- J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT;
-+ uint32_t flags = jit_cpudetect(L);
-+#if LJ_TARGET_X86
-+ /* Silently turn off the JIT compiler on CPUs without SSE2. */
-+ if ((flags & JIT_F_SSE2))
-+#endif
-+ J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
- memcpy(J->param, jit_param_default, sizeof(J->param));
- lj_dispatch_update(G(L));
- }
-@@ -738,7 +772,7 @@
- LUALIB_API int luaopen_jit(lua_State *L)
- {
- #if LJ_HASJIT
-- jit_init(L);
-+ jit_init(L); // FIXME should this be moved back to the bottom?
- #endif
- lua_pushliteral(L, LJ_OS_NAME);
- lua_pushliteral(L, LJ_ARCH_NAME);
-diff -rauN luajit-2.0-505e2c0/src/lj_asm.c luajit-2.0-505e2c0-i486-patch/src/lj_asm.c
---- luajit-2.0-505e2c0/src/lj_asm.c 2023-02-21 17:07:37.000000000 +0100
-+++ luajit-2.0-505e2c0-i486-patch/src/lj_asm.c 2023-03-26 18:16:32.558477950 +0200
-@@ -2340,6 +2340,22 @@
- }
- break;
- #endif
-+/*
-+ case IR_FPMATH:
-+#if LJ_TARGET_X86ORX64
-+ if (ir->op2 == IRFPM_EXP2) { // May be joined to lj_vm_pow_sse.
-+ ir->prev = REGSP_HINT(RID_XMM0);
-+#if !LJ_64
-+ if (as->evenspill < 4) // Leave room for 16 byte scratch area.
-+ as->evenspill = 4;
-+#endif
-+ if (inloop)
-+ as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
-+ continue;
-+ } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
-+ ir->prev = REGSP_HINT(RID_XMM0);
-+>>>>>>> parent of 57768cd5... x86: Remove x87 support from interpreter.
-+ */
- case IR_FPMATH:
- #if LJ_TARGET_X86ORX64
- if (ir->op2 <= IRFPM_TRUNC) {
-diff -rauN luajit-2.0-505e2c0/src/lj_jit.h luajit-2.0-505e2c0-i486-patch/src/lj_jit.h
---- luajit-2.0-505e2c0/src/lj_jit.h 2023-02-21 17:07:37.000000000 +0100
-+++ luajit-2.0-505e2c0-i486-patch/src/lj_jit.h 2023-03-26 18:16:32.558477950 +0200
-@@ -20,12 +20,18 @@
-
- #if LJ_TARGET_X86ORX64
-
--#define JIT_F_SSE3 (JIT_F_CPU << 0)
--#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
--#define JIT_F_BMI2 (JIT_F_CPU << 2)
-+#define JIT_F_CMOV (JIT_F_CPU << 0)
-+#define JIT_F_SSE2 (JIT_F_CPU << 1)
-+#define JIT_F_SSE3 (JIT_F_CPU << 2)
-+#define JIT_F_SSE4_1 (JIT_F_CPU << 3)
-+#define JIT_F_P4 (JIT_F_CPU << 4)
-+#define JIT_F_PREFER_IMUL (JIT_F_CPU << 5)
-+#define JIT_F_SPLIT_XMM (JIT_F_CPU << 6)
-+#define JIT_F_LEA_AGU (JIT_F_CPU << 7)
-+#define JIT_F_BMI2 (JIT_F_CPU << 8)
-
-
--#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"
-+#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM\4BMI2"
-
- #elif LJ_TARGET_ARM
-
-diff -rauN luajit-2.0-505e2c0/src/lj_vm.h luajit-2.0-505e2c0-i486-patch/src/lj_vm.h
---- luajit-2.0-505e2c0/src/lj_vm.h 2023-02-21 17:07:37.000000000 +0100
-+++ luajit-2.0-505e2c0-i486-patch/src/lj_vm.h 2023-03-26 18:16:32.558477950 +0200
-@@ -58,7 +58,8 @@
- LJ_ASMF void lj_vm_exit_interp(void);
-
- /* Internal math helper functions. */
--#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
-+// FIXME: is this correct?
-+#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
- #define lj_vm_floor floor
- #define lj_vm_ceil ceil
- #else
-diff -rauN luajit-2.0-505e2c0/src/Makefile luajit-2.0-505e2c0-i486-patch/src/Makefile
---- luajit-2.0-505e2c0/src/Makefile 2023-02-21 17:07:37.000000000 +0100
-+++ luajit-2.0-505e2c0-i486-patch/src/Makefile 2023-03-26 18:16:32.558477950 +0200
-@@ -47,7 +47,7 @@
- # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
- # the binaries to a different machine you could also use: -march=native
- #
--CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
-+CCOPT_x86= -march=i486 -mfpmath=387
- CCOPT_x64=
- CCOPT_arm=
- CCOPT_arm64=
-@@ -102,7 +102,7 @@
- #XCFLAGS+= -DLUAJIT_ENABLE_LUA52COMPAT
- #
- # Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter.
--#XCFLAGS+= -DLUAJIT_DISABLE_JIT
-+XCFLAGS+= -DLUAJIT_DISABLE_JIT
- #
- # Some architectures (e.g. PPC) can use either single-number (1) or
- # dual-number (2) mode. Uncomment one of these lines to override the
-@@ -437,6 +437,11 @@
- ifeq (Windows,$(TARGET_SYS))
- DASM_AFLAGS+= -D WIN
- endif
-+ifeq (x86,$(TARGET_LJARCH))
-+ ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D SSE
-+ endif
-+else
- ifeq (x64,$(TARGET_LJARCH))
- ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH)))
- DASM_ARCH= x86
-@@ -466,6 +471,7 @@
- endif
- endif
- endif
-+endif
-
- DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
- DASM_DASC= vm_$(DASM_ARCH).dasc
-diff -rauN luajit-2.0-505e2c0/src/Makefile.orig luajit-2.0-505e2c0-i486-patch/src/Makefile.orig
---- luajit-2.0-505e2c0/src/Makefile.orig 1970-01-01 01:00:00.000000000 +0100
-+++ luajit-2.0-505e2c0-i486-patch/src/Makefile.orig 2023-03-26 18:05:15.245707757 +0200
-@@ -0,0 +1,726 @@
-+##############################################################################
-+# LuaJIT Makefile. Requires GNU Make.
-+#
-+# Please read doc/install.html before changing any variables!
-+#
-+# Suitable for POSIX platforms (Linux, *BSD, OSX etc.).
-+# Also works with MinGW and Cygwin on Windows.
-+# Please check msvcbuild.bat for building with MSVC on Windows.
-+#
-+# Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
-+##############################################################################
-+
-+MAJVER= 2
-+MINVER= 1
-+RELVER= 0
-+ABIVER= 5.1
-+NODOTABIVER= 51
-+
-+##############################################################################
-+############################# COMPILER OPTIONS #############################
-+##############################################################################
-+# These options mainly affect the speed of the JIT compiler itself, not the
-+# speed of the JIT-compiled code. Turn any of the optional settings on by
-+# removing the '#' in front of them. Make sure you force a full recompile
-+# with "make clean", followed by "make" if you change any options.
-+#
-+DEFAULT_CC = gcc
-+#
-+# LuaJIT builds as a native 32 or 64 bit binary by default.
-+CC= $(DEFAULT_CC)
-+#
-+# Use this if you want to force a 32 bit build on a 64 bit multilib OS.
-+#CC= $(DEFAULT_CC) -m32
-+#
-+# Since the assembler part does NOT maintain a frame pointer, it's pointless
-+# to slow down the C part by not omitting it. Debugging, tracebacks and
-+# unwinding are not affected -- the assembler part has frame unwind
-+# information and GCC emits it where needed (x64) or with -g (see CCDEBUG).
-+CCOPT= -O2 -fomit-frame-pointer
-+# Use this if you want to generate a smaller binary (but it's slower):
-+#CCOPT= -Os -fomit-frame-pointer
-+# Note: it's no longer recommended to use -O3 with GCC 4.x.
-+# The I-Cache bloat usually outweighs the benefits from aggressive inlining.
-+#
-+# Target-specific compiler options:
-+#
-+# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
-+# the binaries to a different machine you could also use: -march=native
-+#
-+CCOPT_x86= -march=i486 -mfpmath=387
-+CCOPT_x64=
-+CCOPT_arm=
-+CCOPT_arm64=
-+CCOPT_ppc=
-+CCOPT_mips=
-+#
-+CCDEBUG=
-+# Uncomment the next line to generate debug information:
-+#CCDEBUG= -g
-+#
-+CCWARN= -Wall
-+# Uncomment the next line to enable more warnings:
-+#CCWARN+= -Wextra -Wdeclaration-after-statement -Wredundant-decls -Wshadow -Wpointer-arith
-+#
-+##############################################################################
-+
-+##############################################################################
-+################################ BUILD MODE ################################
-+##############################################################################
-+# The default build mode is mixed mode on POSIX. On Windows this is the same
-+# as dynamic mode.
-+#
-+# Mixed mode creates a static + dynamic library and a statically linked luajit.
-+BUILDMODE= mixed
-+#
-+# Static mode creates a static library and a statically linked luajit.
-+#BUILDMODE= static
-+#
-+# Dynamic mode creates a dynamic library and a dynamically linked luajit.
-+# Note: this executable will only run when the library is installed!
-+#BUILDMODE= dynamic
-+#
-+##############################################################################
-+
-+##############################################################################
-+################################# FEATURES #################################
-+##############################################################################
-+# Enable/disable these features as needed, but make sure you force a full
-+# recompile with "make clean", followed by "make".
-+XCFLAGS=
-+#
-+# Permanently disable the FFI extension to reduce the size of the LuaJIT
-+# executable. But please consider that the FFI library is compiled-in,
-+# but NOT loaded by default. It only allocates any memory, if you actually
-+# make use of it.
-+#XCFLAGS+= -DLUAJIT_DISABLE_FFI
-+#
-+# Features from Lua 5.2 that are unlikely to break existing code are
-+# enabled by default. Some other features that *might* break some existing
-+# code (e.g. __pairs or os.execute() return values) can be enabled here.
-+# Note: this does not provide full compatibility with Lua 5.2 at this time.
-+#XCFLAGS+= -DLUAJIT_ENABLE_LUA52COMPAT
-+#
-+# Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter.
-+#XCFLAGS+= -DLUAJIT_DISABLE_JIT
-+#
-+# Some architectures (e.g. PPC) can use either single-number (1) or
-+# dual-number (2) mode. Uncomment one of these lines to override the
-+# default mode. Please see LJ_ARCH_NUMMODE in lj_arch.h for details.
-+#XCFLAGS+= -DLUAJIT_NUMMODE=1
-+#XCFLAGS+= -DLUAJIT_NUMMODE=2
-+#
-+# Disable LJ_GC64 mode for x64.
-+#XCFLAGS+= -DLUAJIT_DISABLE_GC64
-+#
-+##############################################################################
-+
-+##############################################################################
-+############################ DEBUGGING SUPPORT #############################
-+##############################################################################
-+# Enable these options as needed, but make sure you force a full recompile
-+# with "make clean", followed by "make".
-+# Note that most of these are NOT suitable for benchmarking or release mode!
-+#
-+# Use the system provided memory allocator (realloc) instead of the
-+# bundled memory allocator. This is slower, but sometimes helpful for
-+# debugging. This option cannot be enabled on x64 without GC64, since
-+# realloc usually doesn't return addresses in the right address range.
-+# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
-+# the only way to get useful results from it for all other architectures.
-+#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
-+#
-+# This define is required to run LuaJIT under Valgrind. The Valgrind
-+# header files must be installed. You should enable debug information, too.
-+#XCFLAGS+= -DLUAJIT_USE_VALGRIND
-+#
-+# This is the client for the GDB JIT API. GDB 7.0 or higher is required
-+# to make use of it. See lj_gdbjit.c for details. Enabling this causes
-+# a non-negligible overhead, even when not running under GDB.
-+#XCFLAGS+= -DLUAJIT_USE_GDBJIT
-+#
-+# Turn on assertions for the Lua/C API to debug problems with lua_* calls.
-+# This is rather slow -- use only while developing C libraries/embeddings.
-+#XCFLAGS+= -DLUA_USE_APICHECK
-+#
-+# Turn on assertions for the whole LuaJIT VM. This significantly slows down
-+# everything. Use only if you suspect a problem with LuaJIT itself.
-+#XCFLAGS+= -DLUA_USE_ASSERT
-+#
-+##############################################################################
-+# You probably don't need to change anything below this line!
-+##############################################################################
-+
-+##############################################################################
-+# Host system detection.
-+##############################################################################
-+
-+ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM))
-+ HOST_SYS= Windows
-+else
-+ HOST_SYS:= $(shell uname -s)
-+ ifneq (,$(findstring MINGW,$(HOST_SYS)))
-+ HOST_SYS= Windows
-+ HOST_MSYS= mingw
-+ endif
-+ ifneq (,$(findstring MSYS,$(HOST_SYS)))
-+ HOST_SYS= Windows
-+ HOST_MSYS= mingw
-+ endif
-+ ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
-+ HOST_SYS= Windows
-+ HOST_MSYS= cygwin
-+ endif
-+endif
-+
-+##############################################################################
-+# Flags and options for host and target.
-+##############################################################################
-+
-+# You can override the following variables at the make command line:
-+# CC HOST_CC STATIC_CC DYNAMIC_CC
-+# CFLAGS HOST_CFLAGS TARGET_CFLAGS
-+# LDFLAGS HOST_LDFLAGS TARGET_LDFLAGS TARGET_SHLDFLAGS
-+# LIBS HOST_LIBS TARGET_LIBS
-+# CROSS HOST_SYS TARGET_SYS TARGET_FLAGS
-+#
-+# Cross-compilation examples:
-+# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
-+# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
-+
-+ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
-+CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
-+LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
-+
-+HOST_CC= $(CC)
-+HOST_RM?= rm -f
-+# If left blank, minilua is built and used. You can supply an installed
-+# copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua
-+HOST_LUA=
-+
-+HOST_XCFLAGS= -I.
-+HOST_XLDFLAGS=
-+HOST_XLIBS=
-+HOST_ACFLAGS= $(CCOPTIONS) $(HOST_XCFLAGS) $(TARGET_ARCH) $(HOST_CFLAGS)
-+HOST_ALDFLAGS= $(LDOPTIONS) $(HOST_XLDFLAGS) $(HOST_LDFLAGS)
-+HOST_ALIBS= $(HOST_XLIBS) $(LIBS) $(HOST_LIBS)
-+
-+STATIC_CC = $(CROSS)$(CC)
-+DYNAMIC_CC = $(CROSS)$(CC) -fPIC
-+TARGET_CC= $(STATIC_CC)
-+TARGET_STCC= $(STATIC_CC)
-+TARGET_DYNCC= $(DYNAMIC_CC)
-+TARGET_LD= $(CROSS)$(CC)
-+TARGET_AR= $(CROSS)ar rcus
-+TARGET_STRIP= $(CROSS)strip
-+
-+TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib)
-+TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER)
-+TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib
-+TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME)
-+TARGET_DLLNAME= lua$(NODOTABIVER).dll
-+TARGET_DLLDOTANAME= libluajit-$(ABIVER).dll.a
-+TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME)
-+TARGET_DYNXLDOPTS=
-+
-+TARGET_LFSFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
-+TARGET_XCFLAGS= $(TARGET_LFSFLAGS) -U_FORTIFY_SOURCE
-+TARGET_XLDFLAGS=
-+TARGET_XLIBS= -lm
-+TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
-+TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
-+TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
-+TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
-+TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
-+TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
-+
-+TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
-+ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH)))
-+ TARGET_LJARCH= x64
-+else
-+ifneq (,$(findstring LJ_TARGET_X86 ,$(TARGET_TESTARCH)))
-+ TARGET_LJARCH= x86
-+else
-+ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
-+ TARGET_LJARCH= arm
-+else
-+ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
-+ ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
-+ TARGET_ARCH= -D__AARCH64EB__=1
-+ endif
-+ TARGET_LJARCH= arm64
-+else
-+ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
-+ ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
-+ TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE
-+ else
-+ TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE
-+ endif
-+ TARGET_LJARCH= ppc
-+else
-+ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
-+ ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
-+ TARGET_ARCH= -D__MIPSEL__=1
-+ endif
-+ ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH)))
-+ TARGET_LJARCH= mips64
-+ else
-+ TARGET_LJARCH= mips
-+ endif
-+else
-+ $(error Unsupported target architecture)
-+endif
-+endif
-+endif
-+endif
-+endif
-+endif
-+
-+ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
-+ TARGET_SYS= PS3
-+ TARGET_ARCH+= -D__CELLOS_LV2__
-+ TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
-+ TARGET_XLIBS+= -lpthread
-+endif
-+
-+TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH))
-+TARGET_ARCH+= $(patsubst %,-DLUAJIT_TARGET=LUAJIT_ARCH_%,$(TARGET_LJARCH))
-+
-+ifneq (,$(PREFIX))
-+ifneq (/usr/local,$(PREFIX))
-+ TARGET_XCFLAGS+= -DLUA_ROOT=\"$(PREFIX)\"
-+ ifneq (/usr,$(PREFIX))
-+ TARGET_DYNXLDOPTS= -Wl,-rpath,$(TARGET_LIBPATH)
-+ endif
-+endif
-+endif
-+ifneq (,$(MULTILIB))
-+ TARGET_XCFLAGS+= -DLUA_MULTILIB=\"$(MULTILIB)\"
-+endif
-+ifneq (,$(LMULTILIB))
-+ TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\"
-+endif
-+
-+##############################################################################
-+# Target system detection.
-+##############################################################################
-+
-+TARGET_SYS?= $(HOST_SYS)
-+ifeq (Windows,$(TARGET_SYS))
-+ TARGET_STRIP+= --strip-unneeded
-+ TARGET_XSHLDFLAGS= -shared -Wl,--out-implib,$(TARGET_DLLDOTANAME)
-+ TARGET_DYNXLDOPTS=
-+else
-+ TARGET_AR+= 2>/dev/null
-+ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
-+ TARGET_XCFLAGS+= -fno-stack-protector
-+endif
-+ifeq (Darwin,$(TARGET_SYS))
-+ ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
-+ $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY)
-+ endif
-+ TARGET_STRIP+= -x
-+ TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
-+ TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
-+ TARGET_DYNXLDOPTS=
-+ TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
-+else
-+ifeq (iOS,$(TARGET_SYS))
-+ TARGET_STRIP+= -x
-+ TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
-+ TARGET_DYNXLDOPTS=
-+ TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
-+ ifeq (arm64,$(TARGET_LJARCH))
-+ TARGET_XCFLAGS+= -fno-omit-frame-pointer
-+ endif
-+else
-+ ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
-+ # Find out whether the target toolchain always generates unwind tables.
-+ TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o)
-+ ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
-+ TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
-+ endif
-+ endif
-+ ifneq (SunOS,$(TARGET_SYS))
-+ ifneq (PS3,$(TARGET_SYS))
-+ TARGET_XLDFLAGS+= -Wl,-E
-+ endif
-+ endif
-+ ifeq (Linux,$(TARGET_SYS))
-+ TARGET_XLIBS+= -ldl
-+ endif
-+ ifeq (GNU/kFreeBSD,$(TARGET_SYS))
-+ TARGET_XLIBS+= -ldl
-+ endif
-+endif
-+endif
-+endif
-+
-+ifneq ($(HOST_SYS),$(TARGET_SYS))
-+ ifeq (Windows,$(TARGET_SYS))
-+ HOST_XCFLAGS+= -malign-double -DLUAJIT_OS=LUAJIT_OS_WINDOWS
-+ else
-+ ifeq (Linux,$(TARGET_SYS))
-+ HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_LINUX
-+ else
-+ ifeq (Darwin,$(TARGET_SYS))
-+ HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX
-+ else
-+ ifeq (iOS,$(TARGET_SYS))
-+ HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX -DTARGET_OS_IPHONE=1
-+ else
-+ HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER
-+ endif
-+ endif
-+ endif
-+ endif
-+endif
-+
-+ifneq (,$(CCDEBUG))
-+ TARGET_STRIP= @:
-+endif
-+
-+##############################################################################
-+# Files and pathnames.
-+##############################################################################
-+
-+MINILUA_O= host/minilua.o
-+MINILUA_LIBS= -lm
-+MINILUA_T= host/minilua
-+MINILUA_X= $(MINILUA_T)
-+
-+ifeq (,$(HOST_LUA))
-+ HOST_LUA= $(MINILUA_X)
-+ DASM_DEP= $(MINILUA_T)
-+endif
-+
-+DASM_DIR= ../dynasm
-+DASM= $(HOST_LUA) $(DASM_DIR)/dynasm.lua
-+DASM_XFLAGS=
-+DASM_AFLAGS=
-+DASM_ARCH= $(TARGET_LJARCH)
-+
-+ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D ENDIAN_LE
-+else
-+ DASM_AFLAGS+= -D ENDIAN_BE
-+endif
-+ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D P64
-+endif
-+ifneq (,$(findstring LJ_HASJIT 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D JIT
-+endif
-+ifneq (,$(findstring LJ_HASFFI 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D FFI
-+endif
-+ifneq (,$(findstring LJ_DUALNUM 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D DUALNUM
-+endif
-+ifneq (,$(findstring LJ_ARCH_HASFPU 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D FPU
-+ TARGET_ARCH+= -DLJ_ARCH_HASFPU=1
-+else
-+ TARGET_ARCH+= -DLJ_ARCH_HASFPU=0
-+endif
-+ifeq (,$(findstring LJ_ABI_SOFTFP 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D HFABI
-+ TARGET_ARCH+= -DLJ_ABI_SOFTFP=0
-+else
-+ TARGET_ARCH+= -DLJ_ABI_SOFTFP=1
-+endif
-+ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D NO_UNWIND
-+ TARGET_ARCH+= -DLUAJIT_NO_UNWIND
-+endif
-+DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
-+ifeq (Windows,$(TARGET_SYS))
-+ DASM_AFLAGS+= -D WIN
-+endif
-+ifeq (x64,$(TARGET_LJARCH))
-+ ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH)))
-+ DASM_ARCH= x86
-+ endif
-+else
-+ifeq (arm,$(TARGET_LJARCH))
-+ ifeq (iOS,$(TARGET_SYS))
-+ DASM_AFLAGS+= -D IOS
-+ endif
-+else
-+ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D MIPSR6
-+endif
-+ifeq (ppc,$(TARGET_LJARCH))
-+ ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D SQRT
-+ endif
-+ ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D ROUND
-+ endif
-+ ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D GPR64
-+ endif
-+ ifeq (PS3,$(TARGET_SYS))
-+ DASM_AFLAGS+= -D PPE -D TOC
-+ endif
-+endif
-+endif
-+endif
-+
-+DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
-+DASM_DASC= vm_$(DASM_ARCH).dasc
-+
-+BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \
-+ host/buildvm_lib.o host/buildvm_fold.o
-+BUILDVM_T= host/buildvm
-+BUILDVM_X= $(BUILDVM_T)
-+
-+HOST_O= $(MINILUA_O) $(BUILDVM_O)
-+HOST_T= $(MINILUA_T) $(BUILDVM_T)
-+
-+LJVM_S= lj_vm.S
-+LJVM_O= lj_vm.o
-+LJVM_BOUT= $(LJVM_S)
-+LJVM_MODE= elfasm
-+
-+LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
-+ lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \
-+ lib_buffer.o
-+LJLIB_C= $(LJLIB_O:.o=.c)
-+
-+LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
-+ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
-+ lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \
-+ lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \
-+ lj_api.o lj_profile.o \
-+ lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
-+ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
-+ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
-+ lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
-+ lj_asm.o lj_trace.o lj_gdbjit.o \
-+ lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
-+ lj_carith.o lj_clib.o lj_cparse.o \
-+ lj_lib.o lj_alloc.o lib_aux.o \
-+ $(LJLIB_O) lib_init.o
-+
-+LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
-+LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o)
-+
-+LIB_VMDEF= jit/vmdef.lua
-+LIB_VMDEFP= $(LIB_VMDEF)
-+
-+LUAJIT_O= luajit.o
-+LUAJIT_A= libluajit.a
-+LUAJIT_SO= libluajit.so
-+LUAJIT_T= luajit
-+
-+ALL_T= $(LUAJIT_T) $(LUAJIT_A) $(LUAJIT_SO) $(HOST_T)
-+ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \
-+ host/buildvm_arch.h
-+ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP)
-+WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk
-+ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM)
-+
-+##############################################################################
-+# Build mode handling.
-+##############################################################################
-+
-+# Mixed mode defaults.
-+TARGET_O= $(LUAJIT_A)
-+TARGET_T= $(LUAJIT_T) $(LUAJIT_SO)
-+TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO)
-+
-+ifeq (Windows,$(TARGET_SYS))
-+ TARGET_DYNCC= $(STATIC_CC)
-+ LJVM_MODE= peobj
-+ LJVM_BOUT= $(LJVM_O)
-+ LUAJIT_T= luajit.exe
-+ ifeq (cygwin,$(HOST_MSYS))
-+ LUAJIT_SO= cyg$(TARGET_DLLNAME)
-+ else
-+ LUAJIT_SO= $(TARGET_DLLNAME)
-+ endif
-+ # Mixed mode is not supported on Windows. And static mode doesn't work well.
-+ # C modules cannot be loaded, because they bind to lua51.dll.
-+ ifneq (static,$(BUILDMODE))
-+ BUILDMODE= dynamic
-+ TARGET_XCFLAGS+= -DLUA_BUILD_AS_DLL
-+ endif
-+endif
-+ifeq (Darwin,$(TARGET_SYS))
-+ LJVM_MODE= machasm
-+endif
-+ifeq (iOS,$(TARGET_SYS))
-+ LJVM_MODE= machasm
-+endif
-+ifeq (SunOS,$(TARGET_SYS))
-+ BUILDMODE= static
-+endif
-+ifeq (PS3,$(TARGET_SYS))
-+ BUILDMODE= static
-+endif
-+
-+ifeq (Windows,$(HOST_SYS))
-+ MINILUA_T= host/minilua.exe
-+ BUILDVM_T= host/buildvm.exe
-+ ifeq (,$(HOST_MSYS))
-+ MINILUA_X= host\minilua
-+ BUILDVM_X= host\buildvm
-+ ALL_RM:= $(subst /,\,$(ALL_RM))
-+ HOST_RM= del
-+ endif
-+endif
-+
-+ifeq (static,$(BUILDMODE))
-+ TARGET_DYNCC= @:
-+ TARGET_T= $(LUAJIT_T)
-+ TARGET_DEP= $(LIB_VMDEF)
-+else
-+ifeq (dynamic,$(BUILDMODE))
-+ ifneq (Windows,$(TARGET_SYS))
-+ TARGET_CC= $(DYNAMIC_CC)
-+ endif
-+ TARGET_DYNCC= @:
-+ LJVMCORE_DYNO= $(LJVMCORE_O)
-+ TARGET_O= $(LUAJIT_SO)
-+ TARGET_XLDFLAGS+= $(TARGET_DYNXLDOPTS)
-+else
-+ifeq (Darwin,$(TARGET_SYS))
-+ TARGET_DYNCC= @:
-+ LJVMCORE_DYNO= $(LJVMCORE_O)
-+endif
-+ifeq (iOS,$(TARGET_SYS))
-+ TARGET_DYNCC= @:
-+ LJVMCORE_DYNO= $(LJVMCORE_O)
-+endif
-+endif
-+endif
-+
-+Q= @
-+E= @echo
-+#Q=
-+#E= @:
-+
-+##############################################################################
-+# Make targets.
-+##############################################################################
-+
-+default all: $(TARGET_T)
-+
-+amalg:
-+ $(MAKE) all "LJCORE_O=ljamalg.o"
-+
-+clean:
-+ $(HOST_RM) $(ALL_RM)
-+
-+libbc:
-+ ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
-+ $(MAKE) all
-+
-+depend:
-+ @for file in $(ALL_HDRGEN); do \
-+ test -f $$file || touch $$file; \
-+ done
-+ @$(HOST_CC) $(HOST_ACFLAGS) -MM *.c host/*.c | \
-+ sed -e "s| [^ ]*/dasm_\S*\.h||g" \
-+ -e "s|^\([^l ]\)|host/\1|" \
-+ -e "s| lj_target_\S*\.h| lj_target_*.h|g" \
-+ -e "s| lj_emit_\S*\.h| lj_emit_*.h|g" \
-+ -e "s| lj_asm_\S*\.h| lj_asm_*.h|g" >Makefile.dep
-+ @for file in $(ALL_HDRGEN); do \
-+ test -s $$file || $(HOST_RM) $$file; \
-+ done
-+
-+.PHONY: default all amalg clean libbc depend
-+
-+##############################################################################
-+# Rules for generated files.
-+##############################################################################
-+
-+$(MINILUA_T): $(MINILUA_O)
-+ $(E) "HOSTLINK $@"
-+ $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS)
-+
-+host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua lj_arch.h lua.h luaconf.h
-+ $(E) "DYNASM $@"
-+ $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC)
-+
-+host/buildvm.o: $(DASM_DIR)/dasm_*.h
-+
-+$(BUILDVM_T): $(BUILDVM_O)
-+ $(E) "HOSTLINK $@"
-+ $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(BUILDVM_O) $(HOST_ALIBS)
-+
-+$(LJVM_BOUT): $(BUILDVM_T)
-+ $(E) "BUILDVM $@"
-+ $(Q)$(BUILDVM_X) -m $(LJVM_MODE) -o $@
-+
-+lj_bcdef.h: $(BUILDVM_T) $(LJLIB_C)
-+ $(E) "BUILDVM $@"
-+ $(Q)$(BUILDVM_X) -m bcdef -o $@ $(LJLIB_C)
-+
-+lj_ffdef.h: $(BUILDVM_T) $(LJLIB_C)
-+ $(E) "BUILDVM $@"
-+ $(Q)$(BUILDVM_X) -m ffdef -o $@ $(LJLIB_C)
-+
-+lj_libdef.h: $(BUILDVM_T) $(LJLIB_C)
-+ $(E) "BUILDVM $@"
-+ $(Q)$(BUILDVM_X) -m libdef -o $@ $(LJLIB_C)
-+
-+lj_recdef.h: $(BUILDVM_T) $(LJLIB_C)
-+ $(E) "BUILDVM $@"
-+ $(Q)$(BUILDVM_X) -m recdef -o $@ $(LJLIB_C)
-+
-+$(LIB_VMDEF): $(BUILDVM_T) $(LJLIB_C)
-+ $(E) "BUILDVM $@"
-+ $(Q)$(BUILDVM_X) -m vmdef -o $(LIB_VMDEFP) $(LJLIB_C)
-+
-+lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
-+ $(E) "BUILDVM $@"
-+ $(Q)$(BUILDVM_X) -m folddef -o $@ lj_opt_fold.c
-+
-+##############################################################################
-+# Object file rules.
-+##############################################################################
-+
-+%.o: %.c
-+ $(E) "CC $@"
-+ $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
-+ $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
-+
-+%.o: %.S
-+ $(E) "ASM $@"
-+ $(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $<
-+ $(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $<
-+
-+$(LUAJIT_O):
-+ $(E) "CC $@"
-+ $(Q)$(TARGET_STCC) $(TARGET_ACFLAGS) -c -o $@ $<
-+
-+$(HOST_O): %.o: %.c
-+ $(E) "HOSTCC $@"
-+ $(Q)$(HOST_CC) $(HOST_ACFLAGS) -c -o $@ $<
-+
-+include Makefile.dep
-+
-+##############################################################################
-+# Target file rules.
-+##############################################################################
-+
-+$(LUAJIT_A): $(LJVMCORE_O)
-+ $(E) "AR $@"
-+ $(Q)$(TARGET_AR) $@ $(LJVMCORE_O)
-+
-+# The dependency on _O, but linking with _DYNO is intentional.
-+$(LUAJIT_SO): $(LJVMCORE_O)
-+ $(E) "DYNLINK $@"
-+ $(Q)$(TARGET_LD) $(TARGET_ASHLDFLAGS) -o $@ $(LJVMCORE_DYNO) $(TARGET_ALIBS)
-+ $(Q)$(TARGET_STRIP) $@
-+
-+$(LUAJIT_T): $(TARGET_O) $(LUAJIT_O) $(TARGET_DEP)
-+ $(E) "LINK $@"
-+ $(Q)$(TARGET_LD) $(TARGET_ALDFLAGS) -o $@ $(LUAJIT_O) $(TARGET_O) $(TARGET_ALIBS)
-+ $(Q)$(TARGET_STRIP) $@
-+ $(E) "OK Successfully built LuaJIT"
-+
-+##############################################################################
-diff -rauN luajit-2.0-505e2c0/src/msvcbuild.bat luajit-2.0-505e2c0-i486-patch/src/msvcbuild.bat
---- luajit-2.0-505e2c0/src/msvcbuild.bat 2023-02-21 17:07:37.000000000 +0100
-+++ luajit-2.0-505e2c0-i486-patch/src/msvcbuild.bat 2023-03-26 18:16:32.558477950 +0200
-@@ -41,7 +41,6 @@
- @set DASC=vm_x86.dasc
- @set DASMFLAGS=-D WIN -D JIT -D FFI
- @set LJARCH=x86
--@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
- :X64
- @if "%1" neq "nogc64" goto :GC64
- @shift
-diff -rauN luajit-2.0-505e2c0/src/vm_x86.dasc luajit-2.0-505e2c0-i486-patch/src/vm_x86.dasc
---- luajit-2.0-505e2c0/src/vm_x86.dasc 2023-02-21 17:07:37.000000000 +0100
-+++ luajit-2.0-505e2c0-i486-patch/src/vm_x86.dasc 2023-03-26 18:16:32.561811273 +0200
-@@ -18,6 +18,7 @@
- |
- |.if P64
- |.define X64, 1
-+|.define SSE, 1
- |.if WIN
- |.define X64WIN, 1
- |.endif
-@@ -439,6 +440,7 @@
- | fpop
- |.endmacro
- |
-+|.macro fdup; fld st0; .endmacro
- |.macro fpop1; fstp st1; .endmacro
- |
- |// Synthesize SSE FP constants.
-@@ -464,6 +466,9 @@
- |.macro sseconst_1, reg, tmp // Synthesize 1.0.
- | sseconst_hi reg, tmp, 3ff00000
- |.endmacro
-+|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
-+| sseconst_hi reg, tmp, bff00000
-+|.endmacro
- |.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
- | sseconst_hi reg, tmp, 43300000
- |.endmacro
-@@ -943,9 +948,13 @@
- |.if DUALNUM
- | mov TMP2, LJ_TISNUM
- | mov TMP1, RC
-- |.else
-+ |.elif SSE
- | cvtsi2sd xmm0, RC
- | movsd TMPQ, xmm0
-+ |.else
-+ | mov ARG4, RC
-+ | fild ARG4
-+ | fstp TMPQ
- |.endif
- | lea RCa, TMPQ // Store temp. TValue in TMPQ.
- | jmp >1
-@@ -1031,9 +1040,13 @@
- |.if DUALNUM
- | mov TMP2, LJ_TISNUM
- | mov TMP1, RC
-- |.else
-+ |.elif SSE
- | cvtsi2sd xmm0, RC
- | movsd TMPQ, xmm0
-+ |.else
-+ | mov ARG4, RC
-+ | fild ARG4
-+ | fstp TMPQ
- |.endif
- | lea RCa, TMPQ // Store temp. TValue in TMPQ.
- | jmp >1
-@@ -1416,6 +1429,19 @@
- | cmp NARGS:RD, 2+1; jb ->fff_fallback
- |.endmacro
- |
-+ |.macro .ffunc_n, name
-+ | .ffunc_1 name
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | fld qword [BASE]
-+ |.endmacro
-+ |
-+ |.macro .ffunc_n, name, op
-+ | .ffunc_1 name
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | op
-+ | fld qword [BASE]
-+ |.endmacro
-+ |
- |.macro .ffunc_nsse, name, op
- | .ffunc_1 name
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-@@ -1426,6 +1452,14 @@
- | .ffunc_nsse name, movsd
- |.endmacro
- |
-+ |.macro .ffunc_nn, name
-+ | .ffunc_2 name
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
-+ | fld qword [BASE]
-+ | fld qword [BASE+8]
-+ |.endmacro
-+ |
- |.macro .ffunc_nnsse, name
- | .ffunc_2 name
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-@@ -1631,7 +1665,11 @@
- |.else
- | jae ->fff_fallback
- |.endif
-+ |.if SSE
- | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
-+ |.else
-+ | fld qword [BASE]; jmp ->fff_resn
-+ |.endif
- |
- |.ffunc_1 tostring
- | // Only handles the string or number case inline.
-@@ -1729,12 +1767,19 @@
- | add RD, 1
- | mov dword [BASE-4], LJ_TISNUM
- | mov dword [BASE-8], RD
-- |.else
-+ |.elif SSE
- | movsd xmm0, qword [BASE+8]
- | sseconst_1 xmm1, RBa
- | addsd xmm0, xmm1
- | cvttsd2si RD, xmm0
- | movsd qword [BASE-8], xmm0
-+ |.else
-+ | fld qword [BASE+8]
-+ | fld1
-+ | faddp st1
-+ | fist ARG1
-+ | fstp qword [BASE-8]
-+ | mov RD, ARG1
- |.endif
- | mov TAB:RB, [BASE]
- | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
-@@ -1783,9 +1828,12 @@
- |.if DUALNUM
- | mov dword [BASE+12], LJ_TISNUM
- | mov dword [BASE+8], 0
-- |.else
-+ |.elif SSE
- | xorps xmm0, xmm0
- | movsd qword [BASE+8], xmm0
-+ |.else
-+ | fldz
-+ | fstp qword [BASE+8]
- |.endif
- | mov RD, 1+3
- | jmp ->fff_res
-@@ -2017,11 +2065,6 @@
- |->fff_resi: // Dummy.
- |.endif
- |
-- |->fff_resn:
-- | mov PC, [BASE-4]
-- | fstp qword [BASE-8]
-- | jmp ->fff_res1
-- |
- | .ffunc_1 math_abs
- |.if DUALNUM
- | cmp dword [BASE+4], LJ_TISNUM; jne >2
-@@ -2044,6 +2087,8 @@
- |.else
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
- |.endif
-+ |
-+ |.if SSE
- | movsd xmm0, qword [BASE]
- | sseconst_abs xmm1, RDa
- | andps xmm0, xmm1
-@@ -2051,6 +2096,15 @@
- | mov PC, [BASE-4]
- | movsd qword [BASE-8], xmm0
- | // fallthrough
-+ |.else
-+ | fld qword [BASE]
-+ | fabs
-+ | // fallthrough
-+ |->fff_resxmm0: // Dummy.
-+ |->fff_resn:
-+ | mov PC, [BASE-4]
-+ | fstp qword [BASE-8]
-+ |.endif
- |
- |->fff_res1:
- | mov RD, 1+1
-@@ -2093,8 +2147,9 @@
- |.else
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
- |.endif
-+ |.if SSE
- | movsd xmm0, qword [BASE]
-- | call ->vm_ .. func .. _sse
-+ | call ->vm_ .. func
- |.if DUALNUM
- | cvttsd2si RB, xmm0
- | cmp RB, 0x80000000
-@@ -2105,29 +2160,61 @@
- | je ->fff_resi
- |.endif
- | jmp ->fff_resxmm0
-+ |.else
-+ | fld qword [BASE]
-+ | call ->vm_ .. func
-+ | .if DUALNUM
-+ | fist ARG1
-+ | mov RB, ARG1
-+ | cmp RB, 0x80000000; jne >2
-+ | fdup
-+ | fild ARG1
-+ | fcomparepp
-+ | jp ->fff_resn
-+ | jne ->fff_resn
-+ |2:
-+ | fpop
-+ | jmp ->fff_resi
-+ | .else
-+ | jmp ->fff_resn
-+ | .endif
-+ |.endif
- |.endmacro
- |
- | math_round floor
- | math_round ceil
- |
-+ |.if SSE
- |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
-+ |.else
-+ |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
-+ |.endif
- |
- |.ffunc math_log
- | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ |.if SSE
- | movsd xmm0, qword [BASE]
-- |.if not X64
-- | movsd FPARG1, xmm0
-- |.endif
-+ | .if not X64
-+ | movsd FPARG1, xmm0
-+ | .endif
- | mov RB, BASE
- | call extern log
- | mov BASE, RB
- | jmp ->fff_resfp
-+ |.else
-+ | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn
-+ |.endif
- |
- |.macro math_extern, func
-+ |.if SSE
- | .ffunc_nsse math_ .. func
-- |.if not X64
-- | movsd FPARG1, xmm0
-+ | .if not X64
-+ | movsd FPARG1, xmm0
-+ | .endif
-+ |.else
-+ | .ffunc_n math_ .. func
-+ | fstp FPARG1
- |.endif
- | mov RB, BASE
- | call extern func
-@@ -2136,10 +2223,16 @@
- |.endmacro
- |
- |.macro math_extern2, func
-- | .ffunc_nnsse math_ .. func
- |.if not X64
-- | movsd FPARG1, xmm0
-- | movsd FPARG3, xmm1
-+ | .if SSE
-+ | .ffunc_nnsse math_ .. func
-+ | movsd FPARG1, xmm0
-+ | movsd FPARG3, xmm1
-+ | .else
-+ | .ffunc_nn math_ .. func
-+ | fstp FPARG3
-+ | fstp FPARG1
-+ | .endif
- |.endif
- | mov RB, BASE
- | call extern func
-@@ -2176,34 +2269,65 @@
- | cmp RB, 0x00200000; jb >4
- |1:
- | shr RB, 21; sub RB, RC // Extract and unbias exponent.
-+ |.if SSE
- | cvtsi2sd xmm0, RB
-+ |.else
-+ | mov TMP1, RB; fild TMP1
-+ |.endif
- | mov RB, [BASE-4]
- | and RB, 0x800fffff // Mask off exponent.
- | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
- | mov [BASE-4], RB
- |2:
-+ |.if SSE
- | movsd qword [BASE], xmm0
-+ |.else
-+ | fstp qword [BASE]
-+ |.endif
- | mov RD, 1+2
- | jmp ->fff_res
- |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
-+ |.if SSE
- | xorps xmm0, xmm0; jmp <2
-+ |.else
-+ | fldz; jmp <2
-+ |.endif
- |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
-+ |.if SSE
- | movsd xmm0, qword [BASE]
- | sseconst_hi xmm1, RBa, 43500000 // 2^54.
- | mulsd xmm0, xmm1
- | movsd qword [BASE-8], xmm0
-+ |.else
-+ | fld qword [BASE]
-+ | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
-+ | fstp qword [BASE-8]
-+ |.endif
- | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
- |
-+ |.if SSE
- |.ffunc_nsse math_modf
-+ |.else
-+ |.ffunc_n math_modf
-+ |.endif
- | mov RB, [BASE+4]
- | mov PC, [BASE-4]
- | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
-+ |.if SSE
- | movaps xmm4, xmm0
-- | call ->vm_trunc_sse
-+ | call ->vm_trunc
- | subsd xmm4, xmm0
- |1:
- | movsd qword [BASE-8], xmm0
- | movsd qword [BASE], xmm4
-+ |.else
-+ | fdup
-+ | call ->vm_trunc
-+ | fsub st1, st0
-+ |1:
-+ | fstp qword [BASE-8]
-+ | fstp qword [BASE]
-+ |.endif
- | mov RC, [BASE-4]; mov RB, [BASE+4]
- | xor RC, RB; js >3 // Need to adjust sign?
- |2:
-@@ -2213,9 +2337,24 @@
- | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
- | jmp <2
- |4:
-+ |.if SSE
- | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
-+ |.else
-+ | fldz; fxch; jmp <1 // Return +-Inf and +-0.
-+ |.endif
-+ |
-+ |.ffunc_nnr math_fmod
-+ |1: ; fprem; fnstsw ax; sahf; jp <1
-+ | fpop1
-+ | jmp ->fff_resn
-+ |
-+ |.if SSE
-+ |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
-+ |.else
-+ |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
-+ |.endif
- |
-- |.macro math_minmax, name, cmovop, sseop
-+ |.macro math_minmax, name, cmovop, fcmovop, sseop
- | .ffunc_1 name
- | mov RA, 2
- | cmp dword [BASE+4], LJ_TISNUM
-@@ -2232,7 +2371,12 @@
- |3:
- | ja ->fff_fallback
- | // Convert intermediate result to number and continue below.
-+ |.if SSE
- | cvtsi2sd xmm0, RB
-+ |.else
-+ | mov TMP1, RB
-+ | fild TMP1
-+ |.endif
- | jmp >6
- |4:
- | ja ->fff_fallback
-@@ -2240,6 +2384,7 @@
- | jae ->fff_fallback
- |.endif
- |
-+ |.if SSE
- | movsd xmm0, qword [BASE]
- |5: // Handle numbers or integers.
- | cmp RA, RD; jae ->fff_resxmm0
-@@ -2258,10 +2403,34 @@
- | sseop xmm0, xmm1
- | add RA, 1
- | jmp <5
-+ |.else
-+ | fld qword [BASE]
-+ |5: // Handle numbers or integers.
-+ | cmp RA, RD; jae ->fff_resn
-+ | cmp dword [BASE+RA*8-4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jb >6
-+ | ja >9
-+ | fild dword [BASE+RA*8-8]
-+ | jmp >7
-+ |.else
-+ | jae >9
-+ |.endif
-+ |6:
-+ | fld qword [BASE+RA*8-8]
-+ |7:
-+ | fucomi st1; fcmovop st1; fpop1
-+ | add RA, 1
-+ | jmp <5
-+ |.endif
- |.endmacro
- |
-- | math_minmax math_min, cmovg, minsd
-- | math_minmax math_max, cmovl, maxsd
-+ | math_minmax math_min, cmovg, fcmovnbe, minsd
-+ | math_minmax math_max, cmovl, fcmovbe, maxsd
-+ |.if not SSE
-+ |9:
-+ | fpop; jmp ->fff_fallback
-+ |.endif
- |
- |//-- String library -----------------------------------------------------
- |
-@@ -2275,8 +2444,10 @@
- | movzx RB, byte STR:RB[1]
- |.if DUALNUM
- | jmp ->fff_resi
-- |.else
-+ |.elif SSE
- | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
-+ |.else
-+ | mov TMP1, RB; fild TMP1; jmp ->fff_resn
- |.endif
- |
- |.ffunc string_char // Only handle the 1-arg case here.
-@@ -2288,11 +2459,16 @@
- | mov RB, dword [BASE]
- | cmp RB, 255; ja ->fff_fallback
- | mov TMP2, RB
-- |.else
-+ |.elif SSE
- | jae ->fff_fallback
- | cvttsd2si RB, qword [BASE]
- | cmp RB, 255; ja ->fff_fallback
- | mov TMP2, RB
-+ |.else
-+ | jae ->fff_fallback
-+ | fld qword [BASE]
-+ | fistp TMP2
-+ | cmp TMP2, 255; ja ->fff_fallback
- |.endif
- |.if X64
- | mov TMP3, 1
-@@ -2331,10 +2507,14 @@
- | jne ->fff_fallback
- | mov RB, dword [BASE+16]
- | mov TMP2, RB
-- |.else
-+ |.elif SSE
- | jae ->fff_fallback
- | cvttsd2si RB, qword [BASE+16]
- | mov TMP2, RB
-+ |.else
-+ | jae ->fff_fallback
-+ | fld qword [BASE+16]
-+ | fistp TMP2
- |.endif
- |1:
- | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
-@@ -2349,8 +2529,12 @@
- | mov RB, STR:RB->len
- |.if DUALNUM
- | mov RA, dword [BASE+8]
-- |.else
-+ |.elif SSE
- | cvttsd2si RA, qword [BASE+8]
-+ |.else
-+ | fld qword [BASE+8]
-+ | fistp ARG3
-+ | mov RA, ARG3
- |.endif
- | mov RC, TMP2
- | cmp RB, RC // len < end? (unsigned compare)
-@@ -2418,10 +2602,16 @@
- |
- |//-- Bit library --------------------------------------------------------
- |
-+ |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
-+ |
- |.macro .ffunc_bit, name, kind, fdef
- | fdef name
- |.if kind == 2
-+ |.if SSE
- | sseconst_tobit xmm1, RBa
-+ |.else
-+ | mov TMP1, TOBIT_BIAS
-+ |.endif
- |.endif
- | cmp dword [BASE+4], LJ_TISNUM
- |.if DUALNUM
-@@ -2437,12 +2627,24 @@
- |.else
- | jae ->fff_fallback
- |.endif
-+ |.if SSE
- | movsd xmm0, qword [BASE]
- |.if kind < 2
- | sseconst_tobit xmm1, RBa
- |.endif
- | addsd xmm0, xmm1
- | movd RB, xmm0
-+ |.else
-+ | fld qword [BASE]
-+ |.if kind < 2
-+ | mov TMP1, TOBIT_BIAS
-+ |.endif
-+ | fadd TMP1
-+ | fstp FPARG1
-+ |.if kind > 0
-+ | mov RB, ARG1
-+ |.endif
-+ |.endif
- |2:
- |.endmacro
- |
-@@ -2451,7 +2653,15 @@
- |.endmacro
- |
- |.ffunc_bit bit_tobit, 0
-+ |.if DUALNUM or SSE
-+ |.if not SSE
-+ | mov RB, ARG1
-+ |.endif
- | jmp ->fff_resbit
-+ |.else
-+ | fild ARG1
-+ | jmp ->fff_resn
-+ |.endif
- |
- |.macro .ffunc_bit_op, name, ins
- | .ffunc_bit name, 2
-@@ -2471,10 +2681,17 @@
- |.else
- | jae ->fff_fallback_bit_op
- |.endif
-+ |.if SSE
- | movsd xmm0, qword [RD]
- | addsd xmm0, xmm1
- | movd RA, xmm0
- | ins RB, RA
-+ |.else
-+ | fld qword [RD]
-+ | fadd TMP1
-+ | fstp FPARG1
-+ | ins RB, ARG1
-+ |.endif
- | sub RD, 8
- | jmp <1
- |.endmacro
-@@ -2491,10 +2708,15 @@
- | not RB
- |.if DUALNUM
- | jmp ->fff_resbit
-- |.else
-+ |.elif SSE
- |->fff_resbit:
- | cvtsi2sd xmm0, RB
- | jmp ->fff_resxmm0
-+ |.else
-+ |->fff_resbit:
-+ | mov ARG1, RB
-+ | fild ARG1
-+ | jmp ->fff_resn
- |.endif
- |
- |->fff_fallback_bit_op:
-@@ -2507,13 +2729,22 @@
- | // Note: no inline conversion from number for 2nd argument!
- | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
- | mov RA, dword [BASE+8]
-- |.else
-+ |.elif SSE
- | .ffunc_nnsse name
- | sseconst_tobit xmm2, RBa
- | addsd xmm0, xmm2
- | addsd xmm1, xmm2
- | movd RB, xmm0
- | movd RA, xmm1
-+ |.else
-+ | .ffunc_nn name
-+ | mov TMP1, TOBIT_BIAS
-+ | fadd TMP1
-+ | fstp FPARG3
-+ | fadd TMP1
-+ | fstp FPARG1
-+ | mov RA, ARG3
-+ | mov RB, ARG1
- |.endif
- | ins RB, cl // Assumes RA is ecx.
- | jmp ->fff_resbit
-@@ -2954,18 +3185,27 @@
- |//-----------------------------------------------------------------------
- |
- |// FP value rounding. Called by math.floor/math.ceil fast functions
-- |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
-- |.macro vm_round, name, mode, cond
-- |->name:
-- |.if not X64 and cond
-- | movsd xmm0, qword [esp+4]
-- | call ->name .. _sse
-- | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
-- | fld qword [esp+4]
-+ |// and from JIT code.
-+ |
-+ |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified.
-+ |.macro vm_round_x87, mode1, mode2
-+ | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
-+ | mov [esp+8], eax
-+ | mov ax, mode1
-+ | or ax, [esp+4]
-+ |.if mode2 ~= 0xffff
-+ | and ax, mode2
-+ |.endif
-+ | mov [esp+6], ax
-+ | fldcw word [esp+6]
-+ | frndint
-+ | fldcw word [esp+4]
-+ | mov eax, [esp+8]
- | ret
-- |.endif
-+ |.endmacro
- |
-- |->name .. _sse:
-+ |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
-+ |.macro vm_round_sse, mode
- | sseconst_abs xmm2, RDa
- | sseconst_2p52 xmm3, RDa
- | movaps xmm1, xmm0
-@@ -2986,29 +3226,37 @@
- | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
- | subsd xmm1, xmm3
- | orpd xmm1, xmm2 // Merge sign bit back in.
-- | sseconst_1 xmm3, RDa
- | .if mode == 1 // ceil(x)?
-+ | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0.
- | cmpsd xmm0, xmm1, 6 // x > result?
-- | andpd xmm0, xmm3
-- | addsd xmm1, xmm0 // If yes, add 1.
-- | orpd xmm1, xmm2 // Merge sign bit back in (again).
- | .else // floor(x)?
-+ | sseconst_1 xmm2, RDa
- | cmpsd xmm0, xmm1, 1 // x < result?
-- | andpd xmm0, xmm3
-- | subsd xmm1, xmm0 // If yes, subtract 1.
- | .endif
-+ | andpd xmm0, xmm2
-+ | subsd xmm1, xmm0 // If yes, subtract +-1.
- |.endif
- | movaps xmm0, xmm1
- |1:
- | ret
- |.endmacro
- |
-- | vm_round vm_floor, 0, 1
-- | vm_round vm_ceil, 1, JIT
-- | vm_round vm_trunc, 2, JIT
-+ |.macro vm_round, name, ssemode, mode1, mode2, extra // FIXME: EXTRA NOT USED
-+ |->name:
-+ |.if not SSE
-+ | vm_round_x87 mode1, mode2
-+ |.endif
-+ |->name .. _sse:
-+ | vm_round_sse ssemode
-+ |.endmacro
-+ |
-+ | vm_round vm_floor, 0, 0x0400, 0xf7ff, 1
-+ | vm_round vm_ceil, 1, 0x0800, 0xfbff, JIT
-+ | vm_round vm_trunc, 2, 0x0c00, 0xffff, JIT
- |
- |// FP modulo x%y. Called by BC_MOD* and vm_arith.
- |->vm_mod:
-+ |.if SSE
- |// Args in xmm0/xmm1, return value in xmm0.
- |// Caveat: xmm0-xmm5 and RC (eax) modified!
- | movaps xmm5, xmm0
-@@ -3036,6 +3284,243 @@
- | movaps xmm0, xmm5
- | subsd xmm0, xmm1
- | ret
-+ |.else
-+ |// Args/ret on x87 stack (y on top). No xmm registers modified.
-+ |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
-+ | fld st1
-+ | fdiv st1
-+ | fnstcw word [esp+4]
-+ | mov ax, 0x0400
-+ | or ax, [esp+4]
-+ | and ax, 0xf7ff
-+ | mov [esp+6], ax
-+ | fldcw word [esp+6]
-+ | frndint
-+ | fldcw word [esp+4]
-+ | fmulp st1
-+ | fsubp st1
-+ | ret
-+ |.endif
-+ |
-+ |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
-+ | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
-+ | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
-+ |1:
-+ | ret
-+ |2:
-+ | fpop; fldz; ret
-+ |
-+ |// Generic power function x^y. Called by BC_POW, math.pow fast function,
-+ |// and vm_arith.
-+ |// Args/ret on x87 stack (y on top). RC (eax) modified.
-+ |// Caveat: needs 3 slots on x87 stack!
-+ |->vm_pow:
-+ |.if not SSE
-+ | fist dword [esp+4] // Store/reload int before comparison.
-+ | fild dword [esp+4] // Integral exponent used in vm_powi.
-+ | fucomip st1
-+ | jnz >8 // Branch for FP exponents.
-+ | jp >9 // Branch for NaN exponent.
-+ | fpop // Pop y and fallthrough to vm_powi.
-+ |
-+ |// FP/int power function x^i. Arg1/ret on x87 stack.
-+ |// Arg2 (int) on C stack. RC (eax) modified.
-+ |// Caveat: needs 2 slots on x87 stack!
-+ | mov eax, [esp+4]
-+ | cmp eax, 1; jle >6 // i<=1?
-+ | // Now 1 < (unsigned)i <= 0x80000000.
-+ |1: // Handle leading zeros.
-+ | test eax, 1; jnz >2
-+ | fmul st0
-+ | shr eax, 1
-+ | jmp <1
-+ |2:
-+ | shr eax, 1; jz >5
-+ | fdup
-+ |3: // Handle trailing bits.
-+ | fmul st0
-+ | shr eax, 1; jz >4
-+ | jnc <3
-+ | fmul st1, st0
-+ | jmp <3
-+ |4:
-+ | fmulp st1
-+ |5:
-+ | ret
-+ |6:
-+ | je <5 // x^1 ==> x
-+ | jb >7
-+ | fld1; fdivrp st1
-+ | neg eax
-+ | cmp eax, 1; je <5 // x^-1 ==> 1/x
-+ | jmp <1 // x^-i ==> (1/x)^i
-+ |7:
-+ | fpop; fld1 // x^0 ==> 1
-+ | ret
-+ |
-+ |8: // FP/FP power function x^y.
-+ | fst dword [esp+4]
-+ | fxch
-+ | fst dword [esp+8]
-+ | mov eax, [esp+4]; shl eax, 1
-+ | cmp eax, 0xff000000; je >2 // x^+-Inf?
-+ | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
-+ | cmp eax, 0xff000000; je >4 // +-Inf^y?
-+ | fyl2x
-+ | jmp ->vm_exp2raw
-+ |
-+ |9: // Handle x^NaN.
-+ | fld1
-+ | fucomip st2
-+ | je >1 // 1^NaN ==> 1
-+ | fxch // x^NaN ==> NaN
-+ |1:
-+ | fpop
-+ | ret
-+ |
-+ |2: // Handle x^+-Inf.
-+ | fabs
-+ | fld1
-+ | fucomip st1
-+ | je >3 // +-1^+-Inf ==> 1
-+ | fpop; fabs; fldz; mov eax, 0; setc al
-+ | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
-+ | fxch
-+ |3:
-+ | fpop1; fabs
-+ | ret
-+ |
-+ |4: // Handle +-0^y or +-Inf^y.
-+ | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
-+ | fpop; fpop
-+ | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
-+ | fldz // y < 0, +-Inf^y ==> 0
-+ | ret
-+ |5:
-+ | mov dword [esp+4], 0x7f800000 // Return +Inf.
-+ | fld dword [esp+4]
-+ | ret
-+ |.endif
-+ |
-+ |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
-+ |// Needs 16 byte scratch area for x86. Also called from JIT code.
-+ |->vm_pow_sse:
-+ | cvtsd2si eax, xmm1
-+ | cvtsi2sd xmm2, eax
-+ | ucomisd xmm1, xmm2
-+ | jnz >8 // Branch for FP exponents.
-+ | jp >9 // Branch for NaN exponent.
-+ | // Fallthrough to vm_powi_sse.
-+ |
-+ |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
-+ |->vm_powi_sse:
-+ | cmp eax, 1; jle >6 // i<=1?
-+ | // Now 1 < (unsigned)i <= 0x80000000.
-+ |1: // Handle leading zeros.
-+ | test eax, 1; jnz >2
-+ | mulsd xmm0, xmm0
-+ | shr eax, 1
-+ | jmp <1
-+ |2:
-+ | shr eax, 1; jz >5
-+ | movaps xmm1, xmm0
-+ |3: // Handle trailing bits.
-+ | mulsd xmm0, xmm0
-+ | shr eax, 1; jz >4
-+ | jnc <3
-+ | mulsd xmm1, xmm0
-+ | jmp <3
-+ |4:
-+ | mulsd xmm0, xmm1
-+ |5:
-+ | ret
-+ |6:
-+ | je <5 // x^1 ==> x
-+ | jb >7 // x^0 ==> 1
-+ | neg eax
-+ | call <1
-+ | sseconst_1 xmm1, RDa
-+ | divsd xmm1, xmm0
-+ | movaps xmm0, xmm1
-+ | ret
-+ |7:
-+ | sseconst_1 xmm0, RDa
-+ | ret
-+ |
-+ |8: // FP/FP power function x^y.
-+ |.if X64
-+ | movd rax, xmm1; shl rax, 1
-+ | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
-+ | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
-+ | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
-+ | .if X64WIN
-+ | movsd qword [rsp+16], xmm1 // Use scratch area.
-+ | movsd qword [rsp+8], xmm0
-+ | fld qword [rsp+16]
-+ | fld qword [rsp+8]
-+ | .else
-+ | movsd qword [rsp-16], xmm1 // Use red zone.
-+ | movsd qword [rsp-8], xmm0
-+ | fld qword [rsp-16]
-+ | fld qword [rsp-8]
-+ | .endif
-+ |.else
-+ | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
-+ | movsd qword [esp+4], xmm0
-+ | cmp dword [esp+12], 0; jne >1
-+ | mov eax, [esp+16]; shl eax, 1
-+ | cmp eax, 0xffe00000; je >2 // x^+-Inf?
-+ |1:
-+ | cmp dword [esp+4], 0; jne >1
-+ | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
-+ | cmp eax, 0xffe00000; je >5 // +-Inf^y?
-+ |1:
-+ | fld qword [esp+12]
-+ | fld qword [esp+4]
-+ |.endif
-+ | fyl2x // y*log2(x)
-+ | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
-+ | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
-+ |.if X64WIN
-+ | fstp qword [rsp+8] // Use scratch area.
-+ | movsd xmm0, qword [rsp+8]
-+ |.elif X64
-+ | fstp qword [rsp-8] // Use red zone.
-+ | movsd xmm0, qword [rsp-8]
-+ |.else
-+ | fstp qword [esp+4] // Needs 8 byte scratch area.
-+ | movsd xmm0, qword [esp+4]
-+ |.endif
-+ | ret
-+ |
-+ |9: // Handle x^NaN.
-+ | sseconst_1 xmm2, RDa
-+ | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
-+ | movaps xmm0, xmm1 // x^NaN ==> NaN
-+ |1:
-+ | ret
-+ |
-+ |2: // Handle x^+-Inf.
-+ | sseconst_abs xmm2, RDa
-+ | andpd xmm0, xmm2 // |x|
-+ | sseconst_1 xmm2, RDa
-+ | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
-+ | movmskpd eax, xmm1
-+ | xorps xmm0, xmm0
-+ | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
-+ |3:
-+ | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
-+ | ret
-+ |
-+ |4: // Handle +-0^y.
-+ | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
-+ | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
-+ | ret
-+ |
-+ |5: // Handle +-Inf^y.
-+ | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
-+ | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
-+ | ret
- |
- |//-----------------------------------------------------------------------
- |//-- Miscellaneous functions --------------------------------------------
-@@ -3429,12 +3914,19 @@
- | // RA is a number.
- | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
- | // RA is a number, RD is an integer.
-+ |.if SSE
- | cvtsi2sd xmm0, dword [BASE+RD*8]
- | jmp >2
-+ |.else
-+ | fld qword [BASE+RA*8]
-+ | fild dword [BASE+RD*8]
-+ | jmp >3
-+ |.endif
- |
- |8: // RA is an integer, RD is not an integer.
- | ja ->vmeta_comp
- | // RA is an integer, RD is a number.
-+ |.if SSE
- | cvtsi2sd xmm1, dword [BASE+RA*8]
- | movsd xmm0, qword [BASE+RD*8]
- | add PC, 4
-@@ -3442,15 +3934,29 @@
- | jmp_comp jbe, ja, jb, jae, <9
- | jmp <6
- |.else
-+ | fild dword [BASE+RA*8]
-+ | jmp >2
-+ |.endif
-+ |.else
- | checknum RA, ->vmeta_comp
- | checknum RD, ->vmeta_comp
- |.endif
-+ |.if SSE
- |1:
- | movsd xmm0, qword [BASE+RD*8]
- |2:
- | add PC, 4
- | ucomisd xmm0, qword [BASE+RA*8]
- |3:
-+ |.else
-+ |1:
-+ | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
-+ |2:
-+ | fld qword [BASE+RD*8]
-+ |3:
-+ | add PC, 4
-+ | fcomparepp
-+ |.endif
- | // Unordered: all of ZF CF PF set, ordered: PF clear.
- | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
- |.if DUALNUM
-@@ -3490,25 +3996,43 @@
- | // RD is a number.
- | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
- | // RD is a number, RA is an integer.
-+ |.if SSE
- | cvtsi2sd xmm0, dword [BASE+RA*8]
-+ |.else
-+ | fild dword [BASE+RA*8]
-+ |.endif
- | jmp >2
- |
- |8: // RD is an integer, RA is not an integer.
- | ja >5
- | // RD is an integer, RA is a number.
-+ |.if SSE
- | cvtsi2sd xmm0, dword [BASE+RD*8]
- | ucomisd xmm0, qword [BASE+RA*8]
-+ |.else
-+ | fild dword [BASE+RD*8]
-+ | fld qword [BASE+RA*8]
-+ |.endif
- | jmp >4
- |
- |.else
- | cmp RB, LJ_TISNUM; jae >5
- | checknum RA, >5
- |.endif
-+ |.if SSE
- |1:
- | movsd xmm0, qword [BASE+RA*8]
- |2:
- | ucomisd xmm0, qword [BASE+RD*8]
- |4:
-+ |.else
-+ |1:
-+ | fld qword [BASE+RA*8]
-+ |2:
-+ | fld qword [BASE+RD*8]
-+ |4:
-+ | fcomparepp
-+ |.endif
- iseqne_fp:
- if (vk) {
- | jp >2 // Unordered means not equal.
-@@ -3631,21 +4155,39 @@
- | // RA is a number.
- | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
- | // RA is a number, RD is an integer.
-+ |.if SSE
- | cvtsi2sd xmm0, dword [KBASE+RD*8]
-+ |.else
-+ | fild dword [KBASE+RD*8]
-+ |.endif
- | jmp >2
- |
- |8: // RA is an integer, RD is a number.
-+ |.if SSE
- | cvtsi2sd xmm0, dword [BASE+RA*8]
- | ucomisd xmm0, qword [KBASE+RD*8]
-+ |.else
-+ | fild dword [BASE+RA*8]
-+ | fld qword [KBASE+RD*8]
-+ |.endif
- | jmp >4
- |.else
- | cmp RB, LJ_TISNUM; jae >3
- |.endif
-+ |.if SSE
- |1:
- | movsd xmm0, qword [KBASE+RD*8]
- |2:
- | ucomisd xmm0, qword [BASE+RA*8]
- |4:
-+ |.else
-+ |1:
-+ | fld qword [KBASE+RD*8]
-+ |2:
-+ | fld qword [BASE+RA*8]
-+ |4:
-+ | fcomparepp
-+ |.endif
- goto iseqne_fp;
- case BC_ISEQP: case BC_ISNEP:
- vk = op == BC_ISEQP;
-@@ -3751,10 +4293,16 @@
- |.else
- | checknum RD, ->vmeta_unm
- |.endif
-+ |.if SSE
- | movsd xmm0, qword [BASE+RD*8]
- | sseconst_sign xmm1, RDa
- | xorps xmm0, xmm1
- | movsd qword [BASE+RA*8], xmm0
-+ |.else
-+ | fld qword [BASE+RD*8]
-+ | fchs
-+ | fstp qword [BASE+RA*8]
-+ |.endif
- |.if DUALNUM
- | jmp <9
- |.else
-@@ -3770,11 +4318,15 @@
- |1:
- | mov dword [BASE+RA*8+4], LJ_TISNUM
- | mov dword [BASE+RA*8], RD
-- |.else
-+ |.elif SSE
- | xorps xmm0, xmm0
- | cvtsi2sd xmm0, dword STR:RD->len
- |1:
- | movsd qword [BASE+RA*8], xmm0
-+ |.else
-+ | fild dword STR:RD->len
-+ |1:
-+ | fstp qword [BASE+RA*8]
- |.endif
- | ins_next
- |2:
-@@ -3792,8 +4344,11 @@
- | // Length of table returned in eax (RD).
- |.if DUALNUM
- | // Nothing to do.
-- |.else
-+ |.elif SSE
- | cvtsi2sd xmm0, RD
-+ |.else
-+ | mov ARG1, RD
-+ | fild ARG1
- |.endif
- | mov BASE, RB // Restore BASE.
- | movzx RA, PC_RA
-@@ -3808,7 +4363,7 @@
-
- /* -- Binary ops -------------------------------------------------------- */
-
-- |.macro ins_arithpre, sseins, ssereg
-+ |.macro ins_arithpre, x87ins, sseins, ssereg
- | ins_ABC
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
-@@ -3817,22 +4372,37 @@
- | .if DUALNUM
- | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
- | .endif
-- | movsd xmm0, qword [BASE+RB*8]
-- | sseins ssereg, qword [KBASE+RC*8]
-+ | .if SSE
-+ | movsd xmm0, qword [BASE+RB*8]
-+ | sseins ssereg, qword [KBASE+RC*8]
-+ | .else
-+ | fld qword [BASE+RB*8]
-+ | x87ins qword [KBASE+RC*8]
-+ | .endif
- || break;
- ||case 1:
- | checknum RB, ->vmeta_arith_nv
- | .if DUALNUM
- | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
- | .endif
-- | movsd xmm0, qword [KBASE+RC*8]
-- | sseins ssereg, qword [BASE+RB*8]
-+ | .if SSE
-+ | movsd xmm0, qword [KBASE+RC*8]
-+ | sseins ssereg, qword [BASE+RB*8]
-+ | .else
-+ | fld qword [KBASE+RC*8]
-+ | x87ins qword [BASE+RB*8]
-+ | .endif
- || break;
- ||default:
- | checknum RB, ->vmeta_arith_vv
- | checknum RC, ->vmeta_arith_vv
-- | movsd xmm0, qword [BASE+RB*8]
-- | sseins ssereg, qword [BASE+RC*8]
-+ | .if SSE
-+ | movsd xmm0, qword [BASE+RB*8]
-+ | sseins ssereg, qword [BASE+RC*8]
-+ | .else
-+ | fld qword [BASE+RB*8]
-+ | x87ins qword [BASE+RC*8]
-+ | .endif
- || break;
- ||}
- |.endmacro
-@@ -3870,62 +4440,55 @@
- |.endmacro
- |
- |.macro ins_arithpost
-+ |.if SSE
- | movsd qword [BASE+RA*8], xmm0
-+ |.else
-+ | fstp qword [BASE+RA*8]
-+ |.endif
- |.endmacro
- |
-- |.macro ins_arith, sseins
-- | ins_arithpre sseins, xmm0
-+ |.macro ins_arith, x87ins, sseins
-+ | ins_arithpre x87ins, sseins, xmm0
- | ins_arithpost
- | ins_next
- |.endmacro
- |
-- |.macro ins_arith, intins, sseins
-+ |.macro ins_arith, intins, x87ins, sseins
- |.if DUALNUM
- | ins_arithdn intins
- |.else
-- | ins_arith, sseins
-+ | ins_arith, x87ins, sseins
- |.endif
- |.endmacro
-
- | // RA = dst, RB = src1 or num const, RC = src2 or num const
- case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
-- | ins_arith add, addsd
-+ | ins_arith add, fadd, addsd
- break;
- case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
-- | ins_arith sub, subsd
-+ | ins_arith sub, fsub, subsd
- break;
- case BC_MULVN: case BC_MULNV: case BC_MULVV:
-- | ins_arith imul, mulsd
-+ | ins_arith imul, fmul, mulsd
- break;
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
-- | ins_arith divsd
-+ | ins_arith fdiv, divsd
- break;
- case BC_MODVN:
-- | ins_arithpre movsd, xmm1
-+ | ins_arithpre fld, movsd, xmm1
- |->BC_MODVN_Z:
- | call ->vm_mod
- | ins_arithpost
- | ins_next
- break;
- case BC_MODNV: case BC_MODVV:
-- | ins_arithpre movsd, xmm1
-+ | ins_arithpre fld, movsd, xmm1
- | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
- break;
- case BC_POW:
-- | ins_arithpre movsd, xmm1
-- | mov RB, BASE
-- |.if not X64
-- | movsd FPARG1, xmm0
-- | movsd FPARG3, xmm1
-- |.endif
-- | call extern pow
-- | movzx RA, PC_RA
-- | mov BASE, RB
-- |.if X64
-+ | ins_arithpre fld, movsd, xmm1 // FIXME: THIS SHOULD NOT BE FLD. Whole thing is broken
-+ | call ->vm_pow
- | ins_arithpost
-- |.else
-- | fstp qword [BASE+RA*8]
-- |.endif
- | ins_next
- break;
-
-@@ -3993,17 +4556,25 @@
- | movsx RD, RDW
- | mov dword [BASE+RA*8+4], LJ_TISNUM
- | mov dword [BASE+RA*8], RD
-- |.else
-+ |.elif SSE
- | movsx RD, RDW // Sign-extend literal.
- | cvtsi2sd xmm0, RD
- | movsd qword [BASE+RA*8], xmm0
-+ |.else
-+ | fild PC_RD // Refetch signed RD from instruction.
-+ | fstp qword [BASE+RA*8]
- |.endif
- | ins_next
- break;
- case BC_KNUM:
- | ins_AD // RA = dst, RD = num const
-+ |.if SSE
- | movsd xmm0, qword [KBASE+RD*8]
- | movsd qword [BASE+RA*8], xmm0
-+ |.else
-+ | fld qword [KBASE+RD*8]
-+ | fstp qword [BASE+RA*8]
-+ |.endif
- | ins_next
- break;
- case BC_KPRI:
-@@ -4110,10 +4681,18 @@
- case BC_USETN:
- | ins_AD // RA = upvalue #, RD = num const
- | mov LFUNC:RB, [BASE-8]
-+ |.if SSE
- | movsd xmm0, qword [KBASE+RD*8]
-+ |.else
-+ | fld qword [KBASE+RD*8]
-+ |.endif
- | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
- | mov RA, UPVAL:RB->v
-+ |.if SSE
- | movsd qword [RA], xmm0
-+ |.else
-+ | fstp qword [RA]
-+ |.endif
- | ins_next
- break;
- case BC_USETP:
-@@ -4267,10 +4846,18 @@
- |.else
- | // Convert number to int and back and compare.
- | checknum RC, >5
-+ |.if SSE
- | movsd xmm0, qword [BASE+RC*8]
- | cvttsd2si RC, xmm0
- | cvtsi2sd xmm1, RC
- | ucomisd xmm0, xmm1
-+ |.else
-+ | fld qword [BASE+RC*8]
-+ | fist ARG1
-+ | fild ARG1
-+ | fcomparepp
-+ | mov RC, ARG1
-+ |.endif
- | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
- |.endif
- | cmp RC, TAB:RB->asize // Takes care of unordered, too.
-@@ -4399,8 +4986,12 @@
- | mov TAB:RB, [BASE+RB*8]
- |.if DUALNUM
- | mov RC, dword [BASE+RC*8]
-- |.else
-+ |.elif SSE
- | cvttsd2si RC, qword [BASE+RC*8]
-+ |.else
-+ | fld qword [BASE+RC*8]
-+ | fistp TMP1
-+ | mov RC, TMP1
- |.endif
- | cmp RC, TAB:RB->asize
- | jae ->vmeta_tgetr // Not in array part? Use fallback.
-@@ -4433,10 +5024,18 @@
- |.else
- | // Convert number to int and back and compare.
- | checknum RC, >5
-+ |.if SSE
- | movsd xmm0, qword [BASE+RC*8]
- | cvttsd2si RC, xmm0
- | cvtsi2sd xmm1, RC
- | ucomisd xmm0, xmm1
-+ |.else
-+ | fld qword [BASE+RC*8]
-+ | fist ARG1
-+ | fild ARG1
-+ | fcomparepp
-+ | mov RC, ARG1
-+ |.endif
- | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
- |.endif
- | cmp RC, TAB:RB->asize // Takes care of unordered, too.
-@@ -4611,8 +5210,12 @@
- | mov TAB:RB, [BASE+RB*8]
- |.if DUALNUM
- | mov RC, dword [BASE+RC*8]
-- |.else
-+ |.elif SSE
- | cvttsd2si RC, qword [BASE+RC*8]
-+ |.else
-+ | fld qword [BASE+RC*8]
-+ | fistp TMP1
-+ | mov RC, TMP1
- |.endif
- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
- | jnz >7
-@@ -4833,8 +5436,10 @@
- |.if DUALNUM
- | mov dword [BASE+RA*8+4], LJ_TISNUM
- | mov dword [BASE+RA*8], RC
-- |.else
-+ |.elif SSE
- | cvtsi2sd xmm0, RC
-+ |.else
-+ | fild dword [BASE+RA*8-8]
- |.endif
- | // Copy array slot to returned value.
- |.if X64
-@@ -4850,8 +5455,10 @@
- | // Return array index as a numeric key.
- |.if DUALNUM
- | // See above.
-- |.else
-+ |.elif SSE
- | movsd qword [BASE+RA*8], xmm0
-+ |.else
-+ | fstp qword [BASE+RA*8]
- |.endif
- | mov [BASE+RA*8-8], RC // Update control var.
- |2:
-@@ -4864,6 +5471,9 @@
- |
- |4: // Skip holes in array part.
- | add RC, 1
-+ |.if not (DUALNUM or SSE)
-+ | mov [BASE+RA*8-8], RC
-+ |.endif
- | jmp <1
- |
- |5: // Traverse hash part.
-@@ -5211,6 +5821,7 @@
- if (!vk) {
- | cmp RB, LJ_TISNUM; jae ->vmeta_for
- }
-+ |.if SSE
- | movsd xmm0, qword FOR_IDX
- | movsd xmm1, qword FOR_STOP
- if (vk) {
-@@ -5223,6 +5834,22 @@
- | ucomisd xmm1, xmm0
- |1:
- | movsd qword FOR_EXT, xmm0
-+ |.else
-+ | fld qword FOR_STOP
-+ | fld qword FOR_IDX
-+ if (vk) {
-+ | fadd qword FOR_STEP // nidx = idx + step
-+ | fst qword FOR_IDX
-+ | fst qword FOR_EXT
-+ | test RB, RB; js >1
-+ } else {
-+ | fst qword FOR_EXT
-+ | jl >1
-+ }
-+ | fxch // Swap lim/(n)idx if step non-negative.
-+ |1:
-+ | fcomparepp
-+ |.endif
- if (op == BC_FORI) {
- |.if DUALNUM
- | jnb <7
-@@ -5250,10 +5877,11 @@
- |2:
- | ins_next
- |.endif
-- |
-+ |.if SSE
- |3: // Invert comparison if step is negative.
- | ucomisd xmm0, xmm1
- | jmp <1
-+ |.endif
- break;
-
- case BC_ITERL: