[ARM64_DYNAREC] Various fixes and improvments to a few random opcodes

This commit is contained in:
ptitSeb 2024-11-15 21:05:41 +01:00
parent 5c49954f69
commit 385a31ec11
9 changed files with 161 additions and 68 deletions

View File

@ -183,6 +183,7 @@ int convert_bitmask(uint64_t bitmask);
#define ADDw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, 0, Rn, Rd))
#define ADDSw_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd))
#define ADDw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, lsl, Rn, Rd))
#define ADDSw_REG_LSL(Rd, Rn, Rm, lsl) FEMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, lsl, Rn, Rd))
#define ADDxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b00, Rm, 0, Rn, Rd))
#define ADDz_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.is32bits?0:1, 0, 0, 0b00, Rm, 0, Rn, Rd))
#define ADDSxw_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(rex.w, 0, 1, 0b00, Rm, 0, Rn, Rd))

View File

@ -39,6 +39,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
int q0, q1;
int d0, d1;
int s0;
int mask;
uint64_t tmp64u;
int64_t j64;
int64_t fixedaddress;
@ -1643,6 +1644,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
LDxw(x1, x3, fixedaddress);
ed = x1;
}
IFX(X_CF) {
if(rex.w) {
ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f
} else {
@ -1650,6 +1652,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
}
LSRxw_REG(x4, ed, x2);
BFIw(xFlags, x4, F_CF, 1);
}
break;
case 0xA4:
nextop = F8;
@ -2044,8 +2047,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
IFX(X_CF) {
BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
MOV32w(x4, 1);
ORRxw_REG_LSL(ed, ed, x4, u8);
mask = convert_bitmask_xw(1LL<<u8);
ORRxw_mask(ed, ed, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F);
if(wback) {
STxw(ed, wback, fixedaddress);
SMWRITE();
@ -2093,8 +2096,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
IFX(X_CF) {
BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
MOV32w(x4, 1);
EORxw_REG_LSL(ed, ed, x4, u8);
mask = convert_bitmask_xw(1LL<<u8);
EORxw_mask(ed, ed, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F);
if(wback) {
STxw(ed, wback, fixedaddress);
SMWRITE();

View File

@ -154,9 +154,9 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
INST_NAME("ADC AX, Iw");
READFLAGS(X_CF);
SETFLAGS(X_ALL, SF_SET_PENDING);
i16 = F16;
u16 = F16;
UXTHw(x1, xRAX);
MOV32w(x2, i16);
MOV32w(x2, u16);
emit_adc16(dyn, ninst, x1, x2, x3, x4);
BFIz(xRAX, x1, 0, 16);
break;
@ -500,7 +500,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
SETFLAGS(X_ALL, SF_SET_PENDING);
GETEW(x1, (opcode==0x81)?2:1);
if(opcode==0x81) i16 = F16S; else i16 = F8S;
MOVZw(x5, i16);
MOVZw(x5, (uint16_t)i16);
emit_adc16(dyn, ninst, x1, x5, x2, x4);
EWBACK;
break;
@ -510,7 +510,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
SETFLAGS(X_ALL, SF_SET_PENDING);
GETEW(x1, (opcode==0x81)?2:1);
if(opcode==0x81) i16 = F16S; else i16 = F8S;
MOVZw(x5, i16);
MOVZw(x5, (uint16_t)i16);
emit_sbb16(dyn, ninst, x1, x5, x2, x4);
EWBACK;
break;
@ -705,8 +705,8 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
break;
case 0x99:
INST_NAME("CWD");
SXTHw(x1, xRAX);
BFXILx(xRDX, x1, 16, 16);
SXTHw(x1, xRAX); // sign extend ax to x1
BFXILx(xRDX, x1, 16, 16); // insert high 16bits of x1 as low DX
break;
case 0x9C:
INST_NAME("PUSHF");

View File

@ -34,6 +34,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
uint8_t eb1, eb2;
int64_t j64;
uint64_t tmp64u, tmp64u2;
int mask;
int v0, v1;
int q0, q1;
int d0, d1;
@ -2328,19 +2329,27 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
LDH(x1, x3, fixedaddress);
ed = x1;
}
IFX(X_CF) {
ANDw_mask(x2, gd, 0, 0b000011); // mask=0x0f
LSRw_REG(x1, ed, x2);
BFIw(xFlags, x1, F_CF, 1);
}
break;
case 0xA4:
INST_NAME("SHLD Ew, Gw, Ib");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
u8 = geted_ib(dyn, addr, ninst, nextop)&0x1f;
if(u8) {
SETFLAGS(X_ALL, SF_SET_PENDING);
GETEW(x1, 1);
GETGW(x2);
u8 = F8;
emit_shld16c(dyn, ninst, ed, gd, u8, x4, x5);
EWBACK;
} else {
FAKEED;
F8;
}
break;
case 0xA5:
nextop = F8;
@ -2390,12 +2399,18 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
case 0xAC:
nextop = F8;
INST_NAME("SHRD Ew, Gw, Ib");
u8 = geted_ib(dyn, addr, ninst, nextop)&0x1f;
if(u8) {
SETFLAGS(X_ALL, SF_SET_PENDING);
GETEW(x1, 1);
GETGW(x2);
u8 = F8;
emit_shrd16c(dyn, ninst, ed, gd, u8, x4, x5);
EWBACK;
} else {
FAKEED;
F8;
}
break;
case 0xAD:
nextop = F8;
@ -2516,8 +2531,8 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
IFX(X_CF) {
BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
MOV32w(x4, 1);
BFIxw(ed, x4, u8, 1);
mask = convert_bitmask_xw(1<<u8);
ORRxw_mask(ed, ed, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F);
EWBACK;
break;
case 6:
@ -2543,8 +2558,8 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
IFX(X_CF) {
BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
MOV32w(x4, 1);
EORxw_REG_LSL(ed, ed, x4, u8);
mask = convert_bitmask_xw(1<<u8);
EORxw_mask(ed, ed, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F);
EWBACK;
break;
default:

View File

@ -310,7 +310,7 @@ void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
IFX(X_PEND) {
STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
STRB_U12(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s3, d_add8);
SET_DF(s3, d_add8b);
} else IFX(X_ALL) {
SET_DFNONE(s3);
}
@ -318,27 +318,52 @@ void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
ORRw_REG(s3, s1, s2); // s3 = op1 | op2
ANDw_REG(s4, s1, s2); // s4 = op1 & op2
}
ADDw_REG(s1, s1, s2);
IFX(X_AF|X_OF) {
BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res
ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res)
IFX(X_CF|X_SF|X_OF|X_ZF) {
LSLw(s1, s1, 24);
ADDSw_REG_LSL(s1, s1, s2, 24);
IFX(X_AF) {
BICw_REG_LSR(s3, s3, s1, 24); // s3 = (op1 | op2) & ~ res
ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res)
LSRw(s4, s3, 3);
BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08
}
IFX(X_OF) {
LSRw(s4, s3, 6);
EORw_REG_LSR(s4, s4, s4, 1);
BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
IFX(X_ZF) {
IFNATIVE(NF_EQ) {} else {
CSETw(s4, cEQ);
BFIw(xFlags, s4, F_ZF, 1);
}
}
IFX(X_CF) {
BFXILw(xFlags, s1, 8, 1);
IFNATIVE(NF_CF) {} else {
CSETw(s4, cCS);
BFIw(xFlags, s4, F_CF, 1);
}
}
IFX(X_OF) {
IFNATIVE(NF_VF) {} else {
CSETw(s4, cVS);
BFIw(xFlags, s4, F_OF, 1);
}
}
IFX(X_SF) {
IFNATIVE(NF_SF) {} else {
CSETw(s4, cMI);
BFIx(xFlags, s4, F_SF, 1);
}
}
LSRw(s1, s1, 24);
} else {
ADDw_REG(s1, s1, s2);
IFX(X_AF) {
BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res
ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res)
LSRw(s4, s3, 3);
BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08
}
}
IFX(X_PEND) {
STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
}
COMP_ZFSF(s1, 8)
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s4);
}
@ -488,36 +513,60 @@ void emit_add16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
IFX(X_PEND) {
STRH_U12(s1, xEmu, offsetof(x64emu_t, op1));
STRH_U12(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s3, d_add16);
SET_DF(s3, d_add16b);
} else IFX(X_ALL) {
SET_DFNONE(s3);
}
IFX(X_AF | X_OF) {
IFX(X_AF) {
ORRw_REG(s3, s1, s2); // s3 = op1 | op2
ANDw_REG(s4, s1, s2); // s4 = op1 & op2
}
ADDw_REG(s1, s1, s2);
IFX(X_AF|X_OF) {
BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res
ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res)
IFX(X_CF|X_SF|X_OF|X_ZF) {
LSLw(s1, s1, 16);
ADDSw_REG_LSL(s1, s1, s2, 16);
IFX(X_AF) {
BICw_REG_LSR(s3, s3, s1, 16); // s3 = (op1 | op2) & ~ res
ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res)
LSRw(s4, s3, 3);
BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08
}
IFX(X_OF) {
LSRw(s4, s3, 14);
EORw_REG_LSR(s4, s4, s4, 1);
BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
IFX(X_ZF) {
IFNATIVE(NF_EQ) {} else {
CSETw(s4, cEQ);
BFIw(xFlags, s4, F_ZF, 1);
}
}
IFX(X_CF) {
BFXILw(xFlags, s1, 16, 1);
IFNATIVE(NF_CF) {} else {
CSETw(s4, cCS);
BFIw(xFlags, s4, F_CF, 1);
}
}
IFX(X_OF) {
IFNATIVE(NF_VF) {} else {
CSETw(s4, cVS);
BFIw(xFlags, s4, F_OF, 1);
}
}
IFX(X_SF) {
IFNATIVE(NF_SF) {} else {
CSETw(s4, cMI);
BFIx(xFlags, s4, F_SF, 1);
}
}
LSRw(s1, s1, 16);
} else {
ADDw_REG(s1, s1, s2);
IFX(X_AF) {
BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res
ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res)
LSRw(s4, s3, 3);
BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08
}
}
IFX(X_PEND) {
STRw_U12(s1, xEmu, offsetof(x64emu_t, res));
STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
}
COMP_ZFSF(s1, 16)
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s4);
}
@ -1133,7 +1182,7 @@ void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
}
// emit ADC8 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch
void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int8_t c, int s3, int s4, int s5)
void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, uint8_t c, int s3, int s4, int s5)
{
MAYUSE(s5);
MOV32w(s5, c);
@ -1470,7 +1519,7 @@ void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
}
// emit SBB8 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch
void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int8_t c, int s3, int s4, int s5)
void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, uint8_t c, int s3, int s4, int s5)
{
MAYUSE(s5);
MOV32w(s5, c);

View File

@ -868,8 +868,13 @@ static uint8_t getNativeFlagsUsed(dynarec_arm_t* dyn, int start, uint8_t flags)
used_flags|=dyn->insts[ninst].use_nat_flags_before&flags;
// if the opcode generate flags, return
if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && (start!=ninst)) {
if(used_flags&~dyn->insts[ninst].set_nat_flags) // check partial changes that would destroy flag state
if(used_flags&~dyn->insts[ninst].set_nat_flags) {
// check partial changes that would destroy flag state
if(dyn->insts[ninst].use_nat_flags_before&flags)
return used_flags;
// but also check if there is before needed
return 0;
}
return used_flags;
}
// check if there is a callret barrier

View File

@ -1489,13 +1489,13 @@ void emit_dec8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
void emit_adc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
//void emit_adc32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int8_t c, int s3, int s4, int s5);
void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, uint8_t c, int s3, int s4, int s5);
void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_adc16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
//void emit_sbb32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int8_t c, int s3, int s4, int s5);
void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, uint8_t c, int s3, int s4, int s5);
void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_sbb16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
void emit_neg32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4);

View File

@ -216,6 +216,15 @@ void UpdateFlags(x64emu_t *emu)
CONDITIONAL_SET_FLAG(XOR2(cc >> 6), F_OF);
CONDITIONAL_SET_FLAG(cc & 0x8, F_AF);
break;
case d_add8b:
CONDITIONAL_SET_FLAG(((uint16_t)emu->op1.u8+emu->op2.u8) & 0x100, F_CF);
CONDITIONAL_SET_FLAG(!emu->res.u8, F_ZF);
CONDITIONAL_SET_FLAG(emu->res.u8 & 0x80, F_SF);
CONDITIONAL_SET_FLAG(PARITY(emu->res.u8), F_PF);
cc = (emu->op1.u8 & emu->op2.u8) | ((~emu->res.u8) & (emu->op1.u8 | emu->op2.u8));
CONDITIONAL_SET_FLAG(XOR2(cc >> 6), F_OF);
CONDITIONAL_SET_FLAG(cc & 0x8, F_AF);
break;
case d_add16:
CONDITIONAL_SET_FLAG(emu->res.u32 & 0x10000, F_CF);
CONDITIONAL_SET_FLAG(!emu->res.u16, F_ZF);
@ -225,6 +234,15 @@ void UpdateFlags(x64emu_t *emu)
CONDITIONAL_SET_FLAG(XOR2(cc >> 14), F_OF);
CONDITIONAL_SET_FLAG(cc & 0x8, F_AF);
break;
case d_add16b:
CONDITIONAL_SET_FLAG(((uint32_t)emu->op1.u16+emu->op2.u16) & 0x10000, F_CF);
CONDITIONAL_SET_FLAG(!emu->res.u16, F_ZF);
CONDITIONAL_SET_FLAG(emu->res.u16 & 0x8000, F_SF);
CONDITIONAL_SET_FLAG(PARITY(emu->res.u8), F_PF);
cc = (emu->op1.u16 & emu->op2.u16) | ((~emu->res.u16) & (emu->op1.u16 | emu->op2.u16));
CONDITIONAL_SET_FLAG(XOR2(cc >> 14), F_OF);
CONDITIONAL_SET_FLAG(cc & 0x8, F_AF);
break;
case d_add32:
CONDITIONAL_SET_FLAG(emu->res.u64 & 0x100000000LL, F_CF);
CONDITIONAL_SET_FLAG(!emu->res.u32, F_ZF);

View File

@ -42,7 +42,9 @@ typedef enum {
typedef enum {
d_none = 0,
d_add8,
d_add8b, // using 8bits res
d_add16,
d_add16b, // using 16bits res
d_add32, // using 64bits res
d_add32b, // using 32bits res
d_add64,