diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index ec16b71b4..6d787a9a9 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -183,6 +183,7 @@ int convert_bitmask(uint64_t bitmask); #define ADDw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, 0, Rn, Rd)) #define ADDSw_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd)) #define ADDw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, lsl, Rn, Rd)) +#define ADDSw_REG_LSL(Rd, Rn, Rm, lsl) FEMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, lsl, Rn, Rd)) #define ADDxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b00, Rm, 0, Rn, Rd)) #define ADDz_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.is32bits?0:1, 0, 0, 0b00, Rm, 0, Rn, Rd)) #define ADDSxw_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(rex.w, 0, 1, 0b00, Rm, 0, Rn, Rd)) diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 90376126d..82a98fe06 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -39,6 +39,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin int q0, q1; int d0, d1; int s0; + int mask; uint64_t tmp64u; int64_t j64; int64_t fixedaddress; @@ -1643,13 +1644,15 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin LDxw(x1, x3, fixedaddress); ed = x1; } - if(rex.w) { - ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f + IFX(X_CF) { + if(rex.w) { + ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f + } + LSRxw_REG(x4, ed, x2); + BFIw(xFlags, x4, F_CF, 1); } - LSRxw_REG(x4, ed, x2); - BFIw(xFlags, x4, F_CF, 1); break; case 0xA4: nextop = F8; @@ -2044,8 +2047,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin IFX(X_CF) { BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0) } - MOV32w(x4, 1); - ORRxw_REG_LSL(ed, ed, x4, u8); + mask = convert_bitmask_xw(1LL<>12)&1, mask&0x3F, (mask>>6)&0x3F); if(wback) { STxw(ed, wback, fixedaddress); SMWRITE(); @@ -2093,8 +2096,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin IFX(X_CF) { BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0) } - MOV32w(x4, 1); - EORxw_REG_LSL(ed, ed, x4, u8); + mask = convert_bitmask_xw(1LL<>12)&1, mask&0x3F, (mask>>6)&0x3F); if(wback) { STxw(ed, wback, fixedaddress); SMWRITE(); diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c index c6e209c5f..946716ed0 100644 --- a/src/dynarec/arm64/dynarec_arm64_66.c +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -154,9 +154,9 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("ADC AX, Iw"); READFLAGS(X_CF); SETFLAGS(X_ALL, SF_SET_PENDING); - i16 = F16; + u16 = F16; UXTHw(x1, xRAX); - MOV32w(x2, i16); + MOV32w(x2, u16); emit_adc16(dyn, ninst, x1, x2, x3, x4); BFIz(xRAX, x1, 0, 16); break; @@ -500,7 +500,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); GETEW(x1, (opcode==0x81)?2:1); if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); + MOVZw(x5, (uint16_t)i16); emit_adc16(dyn, ninst, x1, x5, x2, x4); EWBACK; break; @@ -510,7 +510,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); GETEW(x1, (opcode==0x81)?2:1); if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); + MOVZw(x5, (uint16_t)i16); emit_sbb16(dyn, ninst, x1, x5, x2, x4); EWBACK; break; @@ -705,8 +705,8 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0x99: INST_NAME("CWD"); - SXTHw(x1, xRAX); - BFXILx(xRDX, x1, 16, 16); + SXTHw(x1, xRAX); // sign extend ax to x1 + BFXILx(xRDX, x1, 16, 16); // insert high 16bits of x1 as low DX break; case 0x9C: INST_NAME("PUSHF"); diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index e57aedb0d..6bd25c4a7 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -34,6 +34,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n uint8_t eb1, eb2; int64_t j64; uint64_t tmp64u, tmp64u2; + int mask; int v0, v1; int q0, q1; int d0, d1; @@ -2328,19 +2329,27 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n LDH(x1, x3, fixedaddress); ed = x1; } - ANDw_mask(x2, gd, 0, 0b000011); // mask=0x0f - LSRw_REG(x1, ed, x2); - BFIw(xFlags, x1, F_CF, 1); + IFX(X_CF) { + ANDw_mask(x2, gd, 0, 0b000011); // mask=0x0f + LSRw_REG(x1, ed, x2); + BFIw(xFlags, x1, F_CF, 1); + } break; case 0xA4: INST_NAME("SHLD Ew, Gw, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; - GETEW(x1, 1); - GETGW(x2); - u8 = F8; - emit_shld16c(dyn, ninst, ed, gd, u8, x4, x5); - EWBACK; + u8 = geted_ib(dyn, addr, ninst, nextop)&0x1f; + if(u8) { + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, 1); + GETGW(x2); + u8 = F8; + emit_shld16c(dyn, ninst, ed, gd, u8, x4, x5); + EWBACK; + } else { + FAKEED; + F8; + } break; case 0xA5: nextop = F8; @@ -2390,12 +2399,18 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0xAC: nextop = F8; INST_NAME("SHRD Ew, Gw, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEW(x1, 1); - GETGW(x2); - u8 = F8; - emit_shrd16c(dyn, ninst, ed, gd, u8, x4, x5); - EWBACK; + u8 = geted_ib(dyn, addr, ninst, nextop)&0x1f; + if(u8) { + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, 1); + GETGW(x2); + u8 = F8; + emit_shrd16c(dyn, ninst, ed, gd, u8, x4, x5); + EWBACK; + } else { + FAKEED; + F8; + } break; case 0xAD: nextop = F8; @@ -2516,8 +2531,8 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n IFX(X_CF) { BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0) } - MOV32w(x4, 1); - BFIxw(ed, x4, u8, 1); + mask = convert_bitmask_xw(1<>12)&1, mask&0x3F, (mask>>6)&0x3F); EWBACK; break; case 6: @@ -2543,8 +2558,8 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n IFX(X_CF) { BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0) } - MOV32w(x4, 1); - EORxw_REG_LSL(ed, ed, x4, u8); + mask = convert_bitmask_xw(1<>12)&1, mask&0x3F, (mask>>6)&0x3F); EWBACK; break; default: diff --git a/src/dynarec/arm64/dynarec_arm64_emit_math.c b/src/dynarec/arm64/dynarec_arm64_emit_math.c index ee3d4ecc6..d8873e761 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_math.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_math.c @@ -310,7 +310,7 @@ void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX(X_PEND) { STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); STRB_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, d_add8); + SET_DF(s3, d_add8b); } else IFX(X_ALL) { SET_DFNONE(s3); } @@ -318,27 +318,52 @@ void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) ORRw_REG(s3, s1, s2); // s3 = op1 | op2 ANDw_REG(s4, s1, s2); // s4 = op1 & op2 } - ADDw_REG(s1, s1, s2); - IFX(X_AF|X_OF) { - BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res - ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) + IFX(X_CF|X_SF|X_OF|X_ZF) { + LSLw(s1, s1, 24); + ADDSw_REG_LSL(s1, s1, s2, 24); IFX(X_AF) { + BICw_REG_LSR(s3, s3, s1, 24); // s3 = (op1 | op2) & ~ res + ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) LSRw(s4, s3, 3); BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 } + IFX(X_ZF) { + IFNATIVE(NF_EQ) {} else { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + } + IFX(X_CF) { + IFNATIVE(NF_CF) {} else { + CSETw(s4, cCS); + BFIw(xFlags, s4, F_CF, 1); + } + } IFX(X_OF) { - LSRw(s4, s3, 6); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 + IFNATIVE(NF_VF) {} else { + CSETw(s4, cVS); + BFIw(xFlags, s4, F_OF, 1); + } + } + IFX(X_SF) { + IFNATIVE(NF_SF) {} else { + CSETw(s4, cMI); + BFIx(xFlags, s4, F_SF, 1); + } + } + LSRw(s1, s1, 24); + } else { + ADDw_REG(s1, s1, s2); + IFX(X_AF) { + BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res + ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 } } - IFX(X_CF) { - BFXILw(xFlags, s1, 8, 1); - } IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); } - COMP_ZFSF(s1, 8) IFX(X_PF) { emit_pf(dyn, ninst, s1, s4); } @@ -488,36 +513,60 @@ void emit_add16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX(X_PEND) { STRH_U12(s1, xEmu, offsetof(x64emu_t, op1)); STRH_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, d_add16); + SET_DF(s3, d_add16b); } else IFX(X_ALL) { SET_DFNONE(s3); } - IFX(X_AF | X_OF) { + IFX(X_AF) { ORRw_REG(s3, s1, s2); // s3 = op1 | op2 ANDw_REG(s4, s1, s2); // s4 = op1 & op2 } - ADDw_REG(s1, s1, s2); - - IFX(X_AF|X_OF) { - BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res - ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) + IFX(X_CF|X_SF|X_OF|X_ZF) { + LSLw(s1, s1, 16); + ADDSw_REG_LSL(s1, s1, s2, 16); IFX(X_AF) { + BICw_REG_LSR(s3, s3, s1, 16); // s3 = (op1 | op2) & ~ res + ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) LSRw(s4, s3, 3); BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 } + IFX(X_ZF) { + IFNATIVE(NF_EQ) {} else { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + } + IFX(X_CF) { + IFNATIVE(NF_CF) {} else { + CSETw(s4, cCS); + BFIw(xFlags, s4, F_CF, 1); + } + } IFX(X_OF) { - LSRw(s4, s3, 14); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1 + IFNATIVE(NF_VF) {} else { + CSETw(s4, cVS); + BFIw(xFlags, s4, F_OF, 1); + } + } + IFX(X_SF) { + IFNATIVE(NF_SF) {} else { + CSETw(s4, cMI); + BFIx(xFlags, s4, F_SF, 1); + } + } + LSRw(s1, s1, 16); + } else { + ADDw_REG(s1, s1, s2); + IFX(X_AF) { + BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res + ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 } } - IFX(X_CF) { - BFXILw(xFlags, s1, 16, 1); - } IFX(X_PEND) { - STRw_U12(s1, xEmu, offsetof(x64emu_t, res)); + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); } - COMP_ZFSF(s1, 16) IFX(X_PF) { emit_pf(dyn, ninst, s1, s4); } @@ -1133,7 +1182,7 @@ void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } // emit ADC8 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch -void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int8_t c, int s3, int s4, int s5) +void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, uint8_t c, int s3, int s4, int s5) { MAYUSE(s5); MOV32w(s5, c); @@ -1470,7 +1519,7 @@ void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } // emit SBB8 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch -void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int8_t c, int s3, int s4, int s5) +void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, uint8_t c, int s3, int s4, int s5) { MAYUSE(s5); MOV32w(s5, c); diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index dd4749e7b..96a17c21e 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -868,8 +868,13 @@ static uint8_t getNativeFlagsUsed(dynarec_arm_t* dyn, int start, uint8_t flags) used_flags|=dyn->insts[ninst].use_nat_flags_before&flags; // if the opcode generate flags, return if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && (start!=ninst)) { - if(used_flags&~dyn->insts[ninst].set_nat_flags) // check partial changes that would destroy flag state + if(used_flags&~dyn->insts[ninst].set_nat_flags) { + // check partial changes that would destroy flag state + if(dyn->insts[ninst].use_nat_flags_before&flags) + return used_flags; + // but also check if there is before needed return 0; + } return used_flags; } // check if there is a callret barrier diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index e9fd068ba..5e297221c 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -1489,13 +1489,13 @@ void emit_dec8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); void emit_adc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); //void emit_adc32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int8_t c, int s3, int s4, int s5); +void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, uint8_t c, int s3, int s4, int s5); void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); //void emit_adc16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); //void emit_sbb32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int8_t c, int s3, int s4, int s5); +void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, uint8_t c, int s3, int s4, int s5); void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); //void emit_sbb16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_neg32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); diff --git a/src/emu/x64run_private.c b/src/emu/x64run_private.c index 6027a7444..a2bc60202 100644 --- a/src/emu/x64run_private.c +++ b/src/emu/x64run_private.c @@ -216,6 +216,15 @@ void UpdateFlags(x64emu_t *emu) CONDITIONAL_SET_FLAG(XOR2(cc >> 6), F_OF); CONDITIONAL_SET_FLAG(cc & 0x8, F_AF); break; + case d_add8b: + CONDITIONAL_SET_FLAG(((uint16_t)emu->op1.u8+emu->op2.u8) & 0x100, F_CF); + CONDITIONAL_SET_FLAG(!emu->res.u8, F_ZF); + CONDITIONAL_SET_FLAG(emu->res.u8 & 0x80, F_SF); + CONDITIONAL_SET_FLAG(PARITY(emu->res.u8), F_PF); + cc = (emu->op1.u8 & emu->op2.u8) | ((~emu->res.u8) & (emu->op1.u8 | emu->op2.u8)); + CONDITIONAL_SET_FLAG(XOR2(cc >> 6), F_OF); + CONDITIONAL_SET_FLAG(cc & 0x8, F_AF); + break; case d_add16: CONDITIONAL_SET_FLAG(emu->res.u32 & 0x10000, F_CF); CONDITIONAL_SET_FLAG(!emu->res.u16, F_ZF); @@ -225,6 +234,15 @@ void UpdateFlags(x64emu_t *emu) CONDITIONAL_SET_FLAG(XOR2(cc >> 14), F_OF); CONDITIONAL_SET_FLAG(cc & 0x8, F_AF); break; + case d_add16b: + CONDITIONAL_SET_FLAG(((uint32_t)emu->op1.u16+emu->op2.u16) & 0x10000, F_CF); + CONDITIONAL_SET_FLAG(!emu->res.u16, F_ZF); + CONDITIONAL_SET_FLAG(emu->res.u16 & 0x8000, F_SF); + CONDITIONAL_SET_FLAG(PARITY(emu->res.u8), F_PF); + cc = (emu->op1.u16 & emu->op2.u16) | ((~emu->res.u16) & (emu->op1.u16 | emu->op2.u16)); + CONDITIONAL_SET_FLAG(XOR2(cc >> 14), F_OF); + CONDITIONAL_SET_FLAG(cc & 0x8, F_AF); + break; case d_add32: CONDITIONAL_SET_FLAG(emu->res.u64 & 0x100000000LL, F_CF); CONDITIONAL_SET_FLAG(!emu->res.u32, F_ZF); diff --git a/src/include/regs.h b/src/include/regs.h index 6589ac2e2..58070c8a7 100644 --- a/src/include/regs.h +++ b/src/include/regs.h @@ -42,7 +42,9 @@ typedef enum { typedef enum { d_none = 0, d_add8, + d_add8b, // using 8bits res d_add16, + d_add16b, // using 16bits res d_add32, // using 64bits res d_add32b, // using 32bits res d_add64,