mirror of
https://github.com/ptitSeb/box64.git
synced 2025-05-09 00:21:32 +08:00
[DYNAREC] Introduce BOX64_DYNAREC_X87DOUBLE=2 to handle Low Precision x87 ([ARM64_DYNAREC] only for now)
This commit is contained in:
parent
7f569247d5
commit
d7eb87129e
@ -218,6 +218,7 @@ Force the use of float/double for x87 emulation.
|
||||
|
||||
* 0: Try to use float when possible for x87 emulation. [Default]
|
||||
* 1: Only use Double for x87 emulation.
|
||||
* 2: Check Precision Control low precision on x87 emulation.
|
||||
|
||||
### BOX64_EXIT
|
||||
|
||||
|
@ -339,12 +339,13 @@ Tweak the memory barriers to reduce the performance impact by strong memory emua
|
||||
* 2 : All in 1, plus disabled the last write barriers.
|
||||
|
||||
|
||||
=item B<BOX64_DYNAREC_X87DOUBLE> =I<0|1>
|
||||
=item B<BOX64_DYNAREC_X87DOUBLE> =I<0|1|2>
|
||||
|
||||
Force the use of float/double for x87 emulation.
|
||||
|
||||
* 0 : Try to use float when possible for x87 emulation. [Default]
|
||||
* 1 : Only use Double for x87 emulation.
|
||||
* 2 : Check Precision Control low precision on x87 emulation.
|
||||
|
||||
|
||||
=item B<BOX64_EMULATED_LIBS> =I<XXXX|XXXX:YYYY:ZZZZ>
|
||||
|
@ -681,6 +681,11 @@
|
||||
"key": "1",
|
||||
"description": "Only use Double for x87 emulation.",
|
||||
"default": false
|
||||
},
|
||||
{
|
||||
"key": "2",
|
||||
"description": "Check Precision Control low precision on x87 emulation.",
|
||||
"default": false
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -89,7 +89,8 @@ p0-p3 are used to pass scalable predicate arguments to a subroutine and to retur
|
||||
#define x4 4
|
||||
#define x5 5
|
||||
#define x6 6
|
||||
#define x7 7
|
||||
#define x87pc 7
|
||||
// x87 can be a scratch, but check if it's used as x87 PC and restore if needed in that case
|
||||
// 32bits version of scratch
|
||||
#define w1 x1
|
||||
#define w2 x2
|
||||
@ -97,7 +98,7 @@ p0-p3 are used to pass scalable predicate arguments to a subroutine and to retur
|
||||
#define w4 x4
|
||||
#define w5 x5
|
||||
#define w6 x6
|
||||
#define w7 x7
|
||||
#define w87pc x87pc
|
||||
// emu is r0
|
||||
#define xEmu 0
|
||||
// ARM64 LR
|
||||
|
@ -1420,8 +1420,8 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
|
||||
CSELw(x2, x2, x3, cLT); // x2 is min(lmem, lreg)
|
||||
// x2 is min length 0-n_packed
|
||||
MVNw_REG(x4, xZR);
|
||||
LSLw_REG(x7, x4, x2);
|
||||
BICw_REG(x1, x1, x7);
|
||||
LSLw_REG(x87pc, x4, x2);
|
||||
BICw_REG(x1, x1, x87pc);
|
||||
LSLw_REG(x4, x4, x5);
|
||||
ORRw_REG(x1, x1, x4);
|
||||
ANDw_mask(x1, x1, 0, (u8&1)?7:15);
|
||||
@ -1474,6 +1474,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
|
||||
BFCw(xFlags, F_PF, 1);
|
||||
}
|
||||
}
|
||||
ARM64_CHECK_PRECISION(); // to regen x87 if it has been used
|
||||
} else {
|
||||
SETFLAGS(X_ALL, SF_SET_DF);
|
||||
if(gd>7) // no need to reflect cache as xmm0-xmm7 will be saved before the function call anyway
|
||||
|
@ -56,6 +56,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FADDS(v1, v1, v2);
|
||||
} else {
|
||||
FADDD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -77,6 +78,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FMULS(v1, v1, v2);
|
||||
} else {
|
||||
FMULD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -135,6 +137,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FSUBS(v1, v1, v2);
|
||||
} else {
|
||||
FSUBD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -156,6 +159,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FSUBS(v1, v2, v1);
|
||||
} else {
|
||||
FSUBD(v1, v2, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -177,6 +181,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FDIVS(v1, v1, v2);
|
||||
} else {
|
||||
FDIVD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -198,6 +203,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FDIVS(v1, v2, v1);
|
||||
} else {
|
||||
FDIVD(v1, v2, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -220,6 +226,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
} else {
|
||||
FCVT_D_S(s0, s0);
|
||||
FADDD(v1, v1, s0);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -237,6 +244,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
} else {
|
||||
FCVT_D_S(s0, s0);
|
||||
FMULD(v1, v1, s0);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -252,6 +260,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
} else {
|
||||
FCVT_D_S(s0, s0);
|
||||
FCMPD(v1, s0);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
FCOM(x1, x2, x3);
|
||||
break;
|
||||
@ -266,6 +275,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
} else {
|
||||
FCVT_D_S(s0, s0);
|
||||
FCMPD(v1, s0);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
FCOM(x1, x2, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
@ -283,6 +293,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
} else {
|
||||
FCVT_D_S(s0, s0);
|
||||
FSUBD(v1, v1, s0);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -300,6 +311,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
} else {
|
||||
FCVT_D_S(s0, s0);
|
||||
FSUBD(v1, s0, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -317,6 +329,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
} else {
|
||||
FCVT_D_S(s0, s0);
|
||||
FDIVD(v1, v1, s0);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -334,6 +347,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
} else {
|
||||
FCVT_D_S(s0, s0);
|
||||
FDIVD(v1, s0, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
|
@ -430,6 +430,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FSQRTS(v1, v1);
|
||||
} else {
|
||||
FSQRTD(v1, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -509,7 +510,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
switch((nextop>>3)&7) {
|
||||
case 0:
|
||||
INST_NAME("FLD ST0, float[ED]");
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, BOX64ENV(dynarec_x87double)?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, (BOX64ENV(dynarec_x87double)==1)?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
|
||||
VLD32(v1, ed, fixedaddress);
|
||||
if(!ST_IS_F(0)) {
|
||||
|
@ -150,6 +150,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
u8 = x87_setround(dyn, ninst, x1, x5, x4);
|
||||
FADDD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
@ -164,6 +165,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
u8 = x87_setround(dyn, ninst, x1, x5, x4);
|
||||
FMULD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
@ -201,6 +203,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
u8 = x87_setround(dyn, ninst, x1, x5, x4);
|
||||
FSUBD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
@ -215,6 +218,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
u8 = x87_setround(dyn, ninst, x1, x5, x4);
|
||||
FSUBD(v1, v2, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
@ -229,6 +233,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
u8 = x87_setround(dyn, ninst, x1, x5, x4);
|
||||
FDIVD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
@ -243,6 +248,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
u8 = x87_setround(dyn, ninst, x1, x5, x4);
|
||||
FDIVD(v1, v2, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
|
@ -54,6 +54,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FADDS(v1, v1, v2);
|
||||
} else {
|
||||
FADDD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -75,6 +76,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FMULS(v1, v1, v2);
|
||||
} else {
|
||||
FMULD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -94,6 +96,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FCMPS(v1, v2);
|
||||
} else {
|
||||
FCMPD(v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
FCOM(x1, x2, x3);
|
||||
break;
|
||||
@ -112,6 +115,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FCMPS(v1, v2);
|
||||
} else {
|
||||
FCMPD(v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
FCOM(x1, x2, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
@ -133,6 +137,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FSUBS(v1, v2, v1);
|
||||
} else {
|
||||
FSUBD(v1, v2, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -154,6 +159,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FSUBS(v1, v1, v2);
|
||||
} else {
|
||||
FSUBD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -175,6 +181,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FDIVS(v1, v2, v1);
|
||||
} else {
|
||||
FDIVD(v1, v2, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -196,6 +203,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FDIVS(v1, v1, v2);
|
||||
} else {
|
||||
FDIVD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -214,6 +222,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
u8 = x87_setround(dyn, ninst, x1, x2, x4);
|
||||
FADDD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
@ -226,6 +235,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
u8 = x87_setround(dyn, ninst, x1, x2, x4);
|
||||
FMULD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
@ -257,6 +267,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
u8 = x87_setround(dyn, ninst, x1, x2, x4);
|
||||
FSUBD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
@ -269,6 +280,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
u8 = x87_setround(dyn, ninst, x1, x2, x4);
|
||||
FSUBD(v1, v2, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
@ -281,6 +293,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
u8 = x87_setround(dyn, ninst, x1, x2, x4);
|
||||
FDIVD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
@ -293,6 +306,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
u8 = x87_setround(dyn, ninst, x1, x2, x4);
|
||||
FDIVD(v1, v2, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
|
@ -54,6 +54,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FADDS(v1, v1, v2);
|
||||
} else {
|
||||
FADDD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -76,6 +77,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FMULS(v1, v1, v2);
|
||||
} else {
|
||||
FMULD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -130,6 +132,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FSUBS(v1, v2, v1);
|
||||
} else {
|
||||
FSUBD(v1, v2, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -152,6 +155,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FSUBS(v1, v1, v2);
|
||||
} else {
|
||||
FSUBD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -174,6 +178,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FDIVS(v1, v2, v1);
|
||||
} else {
|
||||
FDIVD(v1, v2, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -196,6 +201,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
FDIVS(v1, v1, v2);
|
||||
} else {
|
||||
FDIVD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
}
|
||||
if(!BOX64ENV(dynarec_fastround))
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
@ -216,6 +222,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
SXTL_32(v2, v2);
|
||||
SCVTFDD(v2, v2);
|
||||
FADDD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
break;
|
||||
case 1:
|
||||
INST_NAME("FIMUL ST0, word[ED]");
|
||||
@ -227,6 +234,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
SXTL_32(v2, v2);
|
||||
SCVTFDD(v2, v2);
|
||||
FMULD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
break;
|
||||
case 2:
|
||||
INST_NAME("FICOM ST0, word[ED]");
|
||||
@ -263,6 +271,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
SXTL_32(v2, v2);
|
||||
SCVTFDD(v2, v2);
|
||||
FSUBD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
break;
|
||||
case 5:
|
||||
INST_NAME("FISUBR ST0, word[ED]");
|
||||
@ -274,6 +283,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
SXTL_32(v2, v2);
|
||||
SCVTFDD(v2, v2);
|
||||
FSUBD(v1, v2, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
break;
|
||||
case 6:
|
||||
INST_NAME("FIDIV ST0, word[ED]");
|
||||
@ -285,6 +295,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
SXTL_32(v2, v2);
|
||||
SCVTFDD(v2, v2);
|
||||
FDIVD(v1, v1, v2);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
break;
|
||||
case 7:
|
||||
INST_NAME("FIDIVR ST0, word[ED]");
|
||||
@ -296,6 +307,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
SXTL_32(v2, v2);
|
||||
SCVTFDD(v2, v2);
|
||||
FDIVD(v1, v2, v1);
|
||||
X87_CHECK_PRECISION(v1);
|
||||
break;
|
||||
}
|
||||
return addr;
|
||||
|
@ -766,7 +766,7 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save
|
||||
dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_UNUSABLE;
|
||||
#endif
|
||||
if(savereg==0)
|
||||
savereg = 7;
|
||||
savereg = x87pc;
|
||||
if(saveflags) {
|
||||
STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
|
||||
}
|
||||
@ -804,6 +804,9 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save
|
||||
if(saveflags) {
|
||||
LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
|
||||
}
|
||||
if(reg==x87pc && savereg!=x87pc && dyn->need_x87check) {
|
||||
ARM64_CHECK_PRECISION(); // regen x87 mask
|
||||
}
|
||||
//SET_NODF();
|
||||
}
|
||||
|
||||
@ -813,7 +816,7 @@ void call_i(dynarec_arm_t* dyn, int ninst, void* fnc)
|
||||
#if STEP == 0
|
||||
dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_UNUSABLE;
|
||||
#endif
|
||||
STPx_S7_preindex(x6, x7, xSP, -16);
|
||||
STPx_S7_preindex(x6, x87pc, xSP, -16);
|
||||
STPx_S7_preindex(x4, x5, xSP, -16);
|
||||
STPx_S7_preindex(x2, x3, xSP, -16);
|
||||
STPx_S7_preindex(xEmu, x1, xSP, -16); // ARM64 stack needs to be 16byte aligned
|
||||
@ -823,10 +826,10 @@ void call_i(dynarec_arm_t* dyn, int ninst, void* fnc)
|
||||
STPx_S7_offset(xRSI, xRDI, xEmu, offsetof(x64emu_t, regs[_SI]));
|
||||
STPx_S7_offset(xR8, xR9, xEmu, offsetof(x64emu_t, regs[_R8]));
|
||||
STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
|
||||
fpu_pushcache(dyn, ninst, x7, 0);
|
||||
fpu_pushcache(dyn, ninst, x87pc, 0);
|
||||
|
||||
TABLE64(x7, (uintptr_t)fnc);
|
||||
BLR(x7);
|
||||
TABLE64(x87pc, (uintptr_t)fnc);
|
||||
BLR(x87pc);
|
||||
LDPx_S7_postindex(xEmu, x1, xSP, 16);
|
||||
LDPx_S7_postindex(x2, x3, xSP, 16);
|
||||
LDPx_S7_postindex(x4, x5, xSP, 16);
|
||||
@ -838,8 +841,8 @@ void call_i(dynarec_arm_t* dyn, int ninst, void* fnc)
|
||||
GO(R8, R9);
|
||||
#undef GO
|
||||
LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
|
||||
fpu_popcache(dyn, ninst, x7, 0); // savereg will not be used
|
||||
LDPx_S7_postindex(x6, x7, xSP, 16);
|
||||
fpu_popcache(dyn, ninst, x87pc, 0); // savereg will not be used
|
||||
LDPx_S7_postindex(x6, x87pc, xSP, 16);
|
||||
//SET_NODF();
|
||||
}
|
||||
|
||||
@ -859,12 +862,12 @@ void call_n(dynarec_arm_t* dyn, int ninst, void* fnc, int w)
|
||||
if(abs(w)>1) {
|
||||
MESSAGE(LOG_DUMP, "Getting %d XMM args\n", abs(w)-1);
|
||||
for(int i=0; i<abs(w)-1; ++i) {
|
||||
sse_get_reg(dyn, ninst, x7, i, w);
|
||||
sse_get_reg(dyn, ninst, x3, i, w);
|
||||
}
|
||||
}
|
||||
if(w<0) {
|
||||
MESSAGE(LOG_DUMP, "Return in XMM0\n");
|
||||
sse_get_reg_empty(dyn, ninst, x7, 0);
|
||||
sse_get_reg_empty(dyn, ninst, x3, 0);
|
||||
}
|
||||
// prepare regs for native call
|
||||
MOVx_REG(0, xRDI);
|
||||
|
@ -721,13 +721,13 @@
|
||||
|
||||
// CALL will use x7 for the call address. Return value can be put in ret (unless ret is -1)
|
||||
// R0 will not be pushed/popd if ret is -2
|
||||
#define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1, 0)
|
||||
#define CALL(F, ret) call_c(dyn, ninst, F, x87pc, ret, 1, 0)
|
||||
// CALL_ will use x7 for the call address. Return value can be put in ret (unless ret is -1)
|
||||
// R0 will not be pushed/popd if ret is -2
|
||||
#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x7, ret, 1, reg)
|
||||
#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x87pc, ret, 1, reg)
|
||||
// CALL_S will use x7 for the call address. Return value can be put in ret (unless ret is -1)
|
||||
// R0 will not be pushed/popd if ret is -2. Flags are not save/restored
|
||||
#define CALL_S(F, ret) call_c(dyn, ninst, F, x7, ret, 0, 0)
|
||||
#define CALL_S(F, ret) call_c(dyn, ninst, F, x87pc, ret, 0, 0)
|
||||
// CALL_ will use x7 for the call address.
|
||||
// All regs are saved, including scratch. This is use to call internal function that should not change state
|
||||
#define CALL_I(F) call_i(dyn, ninst, F)
|
||||
@ -998,6 +998,21 @@
|
||||
#define CALLRET_LOOP() NOP
|
||||
#endif
|
||||
|
||||
#ifndef ARM64_CHECK_PRECISION
|
||||
#define ARM64_CHECK_PRECISION() \
|
||||
if(dyn->need_x87check) { \
|
||||
LDRH_U12(x87pc, xEmu, offsetof(x64emu_t, cw)); \
|
||||
UBFXw(x87pc, x87pc, 8, 2); \
|
||||
}
|
||||
#endif
|
||||
#ifndef X87_CHECK_PRECISION
|
||||
#define X87_CHECK_PRECISION(A) \
|
||||
if(dyn->need_x87check) { \
|
||||
CBNZw(x87pc, 4+8); \
|
||||
FCVT_S_D(A, A); \
|
||||
FCVT_D_S(A, A); \
|
||||
}
|
||||
#endif
|
||||
#define STORE_REG(A) STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
|
||||
#define STP_REGS(A, B) STPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A]))
|
||||
#define LDP_REGS(A, B) LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A]))
|
||||
|
@ -72,3 +72,6 @@
|
||||
// mark opcode as "unaligned" possible only if the current address is not marked as already unaligned
|
||||
#define IF_UNALIGNED(A) if((dyn->insts[ninst].unaligned=is_addr_unaligned(A)))
|
||||
#define IF_ALIGNED(A) if(!(dyn->insts[ninst].unaligned=is_addr_unaligned(A)))
|
||||
|
||||
#define ARM64_CHECK_PRECISION()
|
||||
#define X87_CHECK_PRECISION(A)
|
||||
|
@ -22,3 +22,8 @@
|
||||
dyn->insts[ninst].f_exit = dyn->f
|
||||
|
||||
#define INST_NAME(name)
|
||||
|
||||
#define ARM64_CHECK_PRECISION()
|
||||
#define X87_CHECK_PRECISION(A) \
|
||||
if(dyn->need_x87check) \
|
||||
dyn->need_x87check=2
|
||||
|
@ -171,6 +171,7 @@ typedef struct dynarec_arm_s {
|
||||
uint8_t always_test;
|
||||
uint8_t abort; // abort the creation of the block
|
||||
void* gdbjit_block;
|
||||
uint32_t need_x87check; // needs x87 precision control check if non-null, or 0 if not
|
||||
} dynarec_arm_t;
|
||||
|
||||
void add_next(dynarec_arm_t *dyn, uintptr_t addr);
|
||||
|
@ -24,11 +24,11 @@ typedef struct dynablock_s {
|
||||
uint8_t dirty; // if need to be tested as soon as it's created
|
||||
uint8_t always_test:1;
|
||||
uint8_t is32bits:1;
|
||||
int callret_size; // size of the array
|
||||
int isize;
|
||||
size_t arch_size; // size of of arch dependant infos
|
||||
instsize_t* instsize;
|
||||
void* arch; // arch dependant per inst info (can be NULL)
|
||||
size_t arch_size; // size of of arch dependant infos
|
||||
int callret_size; // size of the array
|
||||
callret_t* callrets; // array of callret return, with NOP / UDF depending if the block is clean or dirty
|
||||
void* jmpnext; // a branch jmpnext code when block is marked
|
||||
} dynablock_t;
|
||||
|
@ -35,6 +35,7 @@ extern uint32_t arm64_crc(void* p, uint32_t len);
|
||||
|
||||
#define ARCH_NOP 0b11010101000000110010000000011111
|
||||
#define ARCH_UDF 0xcafe
|
||||
#define ARCH_PRECISION() ARM64_CHECK_PRECISION()
|
||||
#elif defined(LA64)
|
||||
|
||||
#define instruction_native_t instruction_la64_t
|
||||
|
@ -636,6 +636,11 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
|
||||
CancelBlock64(0);
|
||||
return NULL;
|
||||
}
|
||||
#ifdef ARCH_PRECISION
|
||||
if(BOX64ENV(dynarec_x87double)==2) {
|
||||
helper.need_x87check = 1;
|
||||
}
|
||||
#endif
|
||||
// basic checks
|
||||
if(!helper.size) {
|
||||
dynarec_log(LOG_INFO, "Warning, null-sized dynarec block (%p)\n", (void*)addr);
|
||||
@ -768,6 +773,12 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
|
||||
CancelBlock64(0);
|
||||
return NULL;
|
||||
}
|
||||
#ifdef ARCH_PRECISION
|
||||
if(BOX64ENV(dynarec_x87double)==2) {
|
||||
if(helper.need_x87check==1)
|
||||
helper.need_x87check = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// pass 2, instruction size
|
||||
helper.callrets = static_callrets;
|
||||
@ -796,7 +807,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
|
||||
size_t insts_rsize = (helper.insts_size+2)*sizeof(instsize_t);
|
||||
insts_rsize = (insts_rsize+7)&~7; // round the size...
|
||||
size_t arch_size = ARCH_SIZE(&helper);
|
||||
size_t callret_size = helper.callret_size*4;
|
||||
size_t callret_size = helper.callret_size*sizeof(callret_t);
|
||||
// ok, now allocate mapped memory, with executable flag on
|
||||
size_t sz = sizeof(void*) + native_size + helper.table64size*sizeof(uint64_t) + 4*sizeof(void*) + insts_rsize + arch_size + callret_size;
|
||||
// dynablock_t* block (arm insts) table64 jmpnext code instsize arch callrets
|
||||
|
@ -83,6 +83,11 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#ifdef ARCH_PRECISION
|
||||
if(!ninst && dyn->need_x87check) {
|
||||
ARCH_PRECISION();
|
||||
}
|
||||
#endif
|
||||
fpu_propagate_stack(dyn, ninst);
|
||||
ip = addr;
|
||||
if (reset_n!=-1) {
|
||||
|
@ -442,7 +442,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
|
||||
switch ((nextop >> 3) & 7) {
|
||||
case 0:
|
||||
INST_NAME("FLD ST0, float[ED]");
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, BOX64ENV(dynarec_x87double) ? EXT_CACHE_ST_D : EXT_CACHE_ST_F);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, (BOX64ENV(dynarec_x87double)==1) ? EXT_CACHE_ST_D : EXT_CACHE_ST_F);
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
|
||||
FLW(v1, ed, fixedaddress);
|
||||
if (!ST_IS_F(0)) {
|
||||
|
@ -59,7 +59,7 @@ extern char* ftrace_name;
|
||||
BOOLEAN(BOX64_DYNAREC_TRACE, dynarec_trace, 0) \
|
||||
BOOLEAN(BOX64_DYNAREC_WAIT, dynarec_wait, 1) \
|
||||
BOOLEAN(BOX64_DYNAREC_WEAKBARRIER, dynarec_weakbarrier, 1) \
|
||||
BOOLEAN(BOX64_DYNAREC_X87DOUBLE, dynarec_x87double, 0) \
|
||||
INTEGER(BOX64_DYNAREC_X87DOUBLE, dynarec_x87double, 0, 0, 2) \
|
||||
STRING(BOX64_EMULATED_LIBS, emulated_libs) \
|
||||
STRING(BOX64_ENV, env) \
|
||||
STRING(BOX64_ENV1, env1) \
|
||||
|
@ -538,6 +538,13 @@ BOX64_DYNAREC_BIGBLOCK=3
|
||||
BOX64_DYNAREC_CALLRET=1
|
||||
BOX64_SHAEXT=0 #buggy openssl version in the game
|
||||
|
||||
[gta3.exe]
|
||||
#BOX64_DYNAREC_SAFEFLAGS=2 #not needed
|
||||
BOX64_DYNAREC_DIRTY=1
|
||||
BOX64_DYNAREC_BIGBLOCK=3
|
||||
BOX64_DYNAREC_CALLRET=1
|
||||
BOX64_DYNAREC_X87DOUBLE=2
|
||||
|
||||
[Hades.exe]
|
||||
BOX64_DYNAREC_BIGBLOCK=3
|
||||
BOX64_DYNAREC_CALLRET=1
|
||||
|
Loading…
x
Reference in New Issue
Block a user