[DYNAREC][INTERP] Fixed an edge case of CMPXHG (#3049)

This commit is contained in:
Yang Liu
2025-10-10 20:45:50 +08:00
committed by GitHub
parent 74d4db051b
commit 70ab16a6d6
5 changed files with 43 additions and 26 deletions

View File

@@ -2106,19 +2106,23 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
CMPSxw_REG(xRAX, ed);
}
MOVxw_REG(x1, ed); // save value
Bcond(cNE, 4+4);
Bcond(cNE, 4 + (rex.w ? 4 : 8));
MOVxw_REG(ed, gd);
if (!rex.w) { B_NEXT_nocond; }
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0);
LDxw(x1, wback, fixedaddress);
UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);}
UFLAG_IF {
emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);
}
SUBxw_REG(x4, xRAX, x1);
CBNZxw_MARK(x4);
// EAX == Ed
STxw(gd, wback, fixedaddress);
if (!rex.w) { B_NEXT_nocond; }
MARK;
}
MOVxw_REG(xRAX, x1); // upper part of RAX will be erase on 32bits, no mater what
MOVxw_REG(xRAX, x1);
break;
case 0xB3:

View File

@@ -334,11 +334,13 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
CMPSxw_REG(xRAX, ed);
}
MOVxw_REG(x1, ed); // save value
Bcond(cNE, 4+4);
Bcond(cNE, 4 + (rex.w ? 4 : 8));
MOVxw_REG(ed, gd);
if (!rex.w) { B_NEXT_nocond; }
MOVxw_REG(xRAX, x1);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
UFLAG_IF { MOVxw_REG(x6, xRAX); }
if(!ALIGNED_ATOMICxw) {
if(cpuext.uscat) {
ANDx_mask(x1, wback, 1, 0, 3); // mask = F
@@ -353,16 +355,17 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
// disabling use of atomics for now, as it seems to make (at least)
// HorizonZeroDawn and Cyberpunk2077 (both from GoG) unstable
// but why?!
if(cpuext.atomics && 0) {
if (rex.w /* RAX should NOT be zero-upped if equal */ && cpuext.atomics && 0) {
UFLAG_IF {
MOVxw_REG(x1, xRAX);
CASALxw(x1, gd, wback);
if(!ALIGNED_ATOMICxw) {
MOVxw_REG(xRAX, x1);
if (!ALIGNED_ATOMICxw) {
B_MARK_nocond;
}
} else {
CASALxw(xRAX, gd, wback);
if(!ALIGNED_ATOMICxw) {
if (!ALIGNED_ATOMICxw) {
B_NEXT_nocond;
}
}
@@ -370,12 +373,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
MARKLOCK;
LDAXRxw(x1, wback);
CMPSxw_REG(xRAX, x1);
B_MARK(cNE);
Bcond(cNE, 4 + (rex.w ? 8 : 12));
// EAX == Ed
STLXRxw(x4, gd, wback);
CBNZx_MARKLOCK(x4);
// done
if(!ALIGNED_ATOMICxw) {
if (!rex.w) { B_MARK_nocond; }
MOVxw_REG(xRAX, x1);
if(!ALIGNED_ATOMICxw && rex.w) {
B_MARK_nocond;
}
}
@@ -387,18 +392,17 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
SUBxw_UXTB(x3, x3, x1);
CBNZw_MARK3(x3);
CMPSxw_REG(xRAX, x1);
B_MARK(cNE);
Bcond(cNE, 4 + (rex.w ? 12 : 16));
// EAX == Ed
STLXRB(x4, gd, wback);
CBNZx_MARK3(x4);
STRxw_U12(gd, wback, 0);
if (!rex.w) { B_MARK_nocond; }
MOVxw_REG(xRAX, x1);
}
MARK;
// Common part (and fallback for EAX != Ed)
UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5); MOVxw_REG(xRAX, x1);}
else {
if(!ALIGNED_ATOMICxw || !(cpuext.atomics && 0))
MOVxw_REG(xRAX, x1); // upper par of RAX will be erase on 32bits, no mater what
UFLAG_IF {
emit_cmp32(dyn, ninst, rex, x6, x1, x3, x4, x5);
}
}
break;

View File

@@ -175,10 +175,12 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
SUBxw(x2, x1, xRAX);
BNE_MARK2(x2, xZR);
MVxw(ed, gd);
if (!rex.w) { B_NEXT_nocond; }
MARK2;
MVxw(xRAX, x1);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
UFLAG_IF { MVxw(x6, xRAX); }
ANDI(x1, wback, (1 << (rex.w + 2)) - 1);
BNEZ_MARK3(x1);
// Aligned
@@ -186,11 +188,13 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
MV(x4, gd);
LLxw(x1, wback, 0);
SUBxw(x3, x1, xRAX);
BNEZ_MARK(x3);
BNEZ(x3, 4 + (rex.w ? 8 : 12));
// EAX == Ed
SCxw(x4, wback, 0);
BEQZ_MARKLOCK(x4);
B_MARK_nocond;
if (!rex.w) { B_MARK_nocond; }
MVxw(xRAX, x1);
if (rex.w) { B_MARK_nocond; }
MARK3;
// Unaligned
ADDI_D(x5, xZR, -(1 << (rex.w + 2)));
@@ -199,14 +203,15 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
LDxw(x1, wback, 0);
LLxw(x6, x5, 0);
SUBxw(x3, x1, xRAX);
BNEZ_MARK(x3);
BNEZ(x3, 4 + (rex.w ? 12 : 16));
// EAX == Ed
SCxw(x6, x5, 0);
BEQZ_MARKLOCK2(x6);
SDxw(gd, wback, 0);
MARK;
UFLAG_IF { emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5, x6); }
if (!rex.w) { B_MARK_nocond; }
MVxw(xRAX, x1);
MARK;
UFLAG_IF { emit_cmp32(dyn, ninst, rex, x6, x1, x3, x4, x5, x6); }
}
break;
default:

View File

@@ -249,21 +249,25 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
SUBxw(x2, ed, xRAX);
BNE_MARK2(x2, xZR);
MVxw(ed, gd);
if (!rex.w) { B_NEXT_nocond; }
MARK2;
MVxw(xRAX, x1);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
UFLAG_IF { MVxw(x6, xRAX); }
ANDI(x1, wback, (1 << (rex.w + 2)) - 1);
BNEZ_MARK3(x1);
// Aligned
MARKLOCK;
LRxw(x1, wback, 1, 1);
SUBxw(x3, x1, xRAX);
BNEZ_MARK(x3);
BNEZ(x3, 4 + (rex.w ? 8 : 12));
// EAX == Ed
SCxw(x4, gd, wback, 1, 1);
BNEZ_MARKLOCK(x4);
B_MARK_nocond;
if (!rex.w) { B_MARK_nocond; }
MVxw(xRAX, x1);
if (rex.w) { B_MARK_nocond; }
MARK3;
// Unaligned
ANDI(x5, wback, -(1 << (rex.w + 2)));
@@ -271,14 +275,15 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
LDxw(x1, wback, 0);
LRxw(x6, x5, 1, 1);
SUBxw(x3, x1, xRAX);
BNEZ_MARK(x3);
BNEZ(x3, 4 + (rex.w ? 12 : 16));
// EAX == Ed
SCxw(x4, x6, x5, 1, 1);
BNEZ_MARKLOCK2(x4);
SDxw(gd, wback, 0);
MARK;
UFLAG_IF { emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5, x6); }
if (!rex.w) { B_MARK_nocond; }
MVxw(xRAX, x1);
MARK;
UFLAG_IF { emit_cmp32(dyn, ninst, rex, x6, x1, x3, x4, x5, x6); }
}
break;
default:

View File

@@ -1418,7 +1418,6 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
ED->q[0] = GD->dword[0];
else
ED->dword[0] = GD->dword[0];
R_RAX = R_EAX; // to erase upper part of RAX
} else {
R_RAX = ED->dword[0];
}