[DYNAREC] Some more x87 handling improvments backported from box64

This commit is contained in:
ptitSeb 2024-05-19 17:56:59 +02:00
parent b145080d37
commit 4f21d83e62
5 changed files with 47 additions and 7 deletions

View File

@ -366,7 +366,9 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_do_push_empty(dyn, ninst, 0);
x87_forget(dyn, ninst, x1, x2, 1);
s0 = x87_stackcount(dyn, ninst, x3);
CALL(arm_fxtract, -1, 0);
x87_unstackcount(dyn, ninst, x3, s0);
// C1 set only if stack under/overflow occurs
break;
case 0xF5:

View File

@ -268,16 +268,14 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, 0, NULL);
if(PK(0)==0xDB && ((PK(1)>>3)&7)==7) {
// the FLD is immediatly followed by an FSTP
LDR_IMM9(x2, ed, 0);
LDR_IMM9(x3, ed, 4);
LDM(ed, (1<<x2)|(1<<x3));
LDRH_IMM8(x14, ed, 8);
// no persistant scratch register, so unrool both instruction here...
MESSAGE(LOG_DUMP, "\tHack: FSTP tbyte\n");
nextop = F8; //0xDB
nextop = F8; //modrm
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, 0, NULL);
STR_IMM9(x2, ed, 0);
STR_IMM9(x3, ed, 4);
STM(ed, (1<<x2)|(1<<x3));
STRH_IMM8(x14, ed, 8);
} else {
if(box86_x87_no80bits) {

View File

@ -251,7 +251,9 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, 0, NULL);
if(ed!=x1) {MOV_REG(x1, ed);}
s0 = x87_stackcount(dyn, ninst, x3);
CALL(fpu_fbld, -1, 0);
x87_unstackcount(dyn, ninst, x3, s0);
break;
case 5:
INST_NAME("FILD ST0, i64");
@ -314,7 +316,9 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
x87_forget(dyn, ninst, x1, x2, 0);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, 0, NULL);
if(ed!=x1) {MOV_REG(x1, ed);}
s0 = x87_stackcount(dyn, ninst, x3);
CALL(fpu_fbst, -1, 0);
x87_unstackcount(dyn, ninst, x3, s0);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 7: // could be inlined for most thing, but is it usefull?

View File

@ -601,10 +601,10 @@ void grab_fsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg)
}
// x87 stuffs
void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch)
int x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch)
{
if(!dyn->n.x87stack)
return;
return 0;
if(dyn->n.mmxcount)
mmx_purgecache(dyn, ninst, 0, scratch);
MESSAGE(LOG_DUMP, "\tSynch x87 Stackcount (%d)\n", dyn->n.x87stack);
@ -631,6 +631,39 @@ void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch)
dyn->n.stack_next -= dyn->n.stack;
dyn->n.stack = 0;
MESSAGE(LOG_DUMP, "\t------x87 Stackcount\n");
return a;
}
void x87_unstackcount(dynarec_arm_t* dyn, int ninst, int scratch, int count)
{
if(!count)
return;
if(dyn->n.mmxcount)
mmx_purgecache(dyn, ninst, 0, scratch);
MESSAGE(LOG_DUMP, "\tSynch x87 Unstackcount (%d)\n", dyn->n.x87stack);
int a = -count;
// Add x87stack to emu fpu_stack
LDR_IMM9(scratch, xEmu, offsetof(x86emu_t, fpu_stack));
if(a>0) {
ADD_IMM8(scratch, scratch, a);
} else {
SUB_IMM8(scratch, scratch, -a);
}
STR_IMM9(scratch, xEmu, offsetof(x86emu_t, fpu_stack));
// Sub x87stack to top, with and 7
LDR_IMM9(scratch, xEmu, offsetof(x86emu_t, top));
if(a>0) {
SUB_IMM8(scratch, scratch, a);
} else {
ADD_IMM8(scratch, scratch, -a);
}
AND_IMM8(scratch, scratch, 7);
STR_IMM9(scratch, xEmu, offsetof(x86emu_t, top));
// reset x87stack, but not the stack count of neoncache
dyn->n.x87stack = count;
dyn->n.stack = count;
dyn->n.stack_next += dyn->n.stack;
MESSAGE(LOG_DUMP, "\t------x87 Unstackcount\n");
}
int neoncache_st_coherency(dynarec_arm_t* dyn, int ninst, int a, int b)

View File

@ -606,6 +606,7 @@ void* arm_next(x86emu_t* emu, uintptr_t addr);
#define x87_forget STEPNAME(x87_forget)
#define x87_reget_st STEPNAME(x87_reget_st)
#define x87_stackcount STEPNAME(x87_stackcount)
#define x87_unstackcount STEPNAME(x87_unstackcount)
#define x87_setround STEPNAME(x87_setround)
#define x87_setround_reset STEPNAME(x87_setround_reset)
#define x87_restoreround STEPNAME(x87_restoreround)
@ -748,7 +749,9 @@ void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
// x87 helper
// cache of the local stack counter, to avoid upadte at every call
void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch);
int x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch);
// restore the local stack counter
void x87_unstackcount(dynarec_arm_t* dyn, int ninst, int scratch, int count);
// fpu push. Return the Dd value to be used
int x87_do_push(dynarec_arm_t* dyn, int ninst, int s1, int t);
// fpu push. Do not allocate a cache register. Needs a scratch register to do x87stack synch (or 0 to not do it)