From 70ab16a6d6c54c83a6f9f678c5bbe1c104c6a672 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Fri, 10 Oct 2025 20:45:50 +0800 Subject: [PATCH] [DYNAREC][INTERP] Fixed an edge case of CMPXHG (#3049) --- src/dynarec/arm64/dynarec_arm64_0f.c | 10 +++++++--- src/dynarec/arm64/dynarec_arm64_f0.c | 28 ++++++++++++++++------------ src/dynarec/la64/dynarec_la64_f0.c | 15 ++++++++++----- src/dynarec/rv64/dynarec_rv64_f0.c | 15 ++++++++++----- src/emu/x64run0f.c | 1 - 5 files changed, 43 insertions(+), 26 deletions(-) diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 8813a577e..0996a5a9f 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -2106,19 +2106,23 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CMPSxw_REG(xRAX, ed); } MOVxw_REG(x1, ed); // save value - Bcond(cNE, 4+4); + Bcond(cNE, 4 + (rex.w ? 4 : 8)); MOVxw_REG(ed, gd); + if (!rex.w) { B_NEXT_nocond; } } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0); LDxw(x1, wback, fixedaddress); - UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);} + UFLAG_IF { + emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5); + } SUBxw_REG(x4, xRAX, x1); CBNZxw_MARK(x4); // EAX == Ed STxw(gd, wback, fixedaddress); + if (!rex.w) { B_NEXT_nocond; } MARK; } - MOVxw_REG(xRAX, x1); // upper part of RAX will be erase on 32bits, no mater what + MOVxw_REG(xRAX, x1); break; case 0xB3: diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c index 2e5f82ff7..491bbe3c9 100644 --- a/src/dynarec/arm64/dynarec_arm64_f0.c +++ b/src/dynarec/arm64/dynarec_arm64_f0.c @@ -334,11 +334,13 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CMPSxw_REG(xRAX, ed); } MOVxw_REG(x1, ed); // save value - Bcond(cNE, 4+4); + Bcond(cNE, 4 + (rex.w ? 4 : 8)); MOVxw_REG(ed, gd); + if (!rex.w) { B_NEXT_nocond; } MOVxw_REG(xRAX, x1); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); + UFLAG_IF { MOVxw_REG(x6, xRAX); } if(!ALIGNED_ATOMICxw) { if(cpuext.uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F @@ -353,16 +355,17 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin // disabling use of atomics for now, as it seems to make (at least) // HorizonZeroDawn and Cyberpunk2077 (both from GoG) unstable // but why?! - if(cpuext.atomics && 0) { + if (rex.w /* RAX should NOT be zero-upped if equal */ && cpuext.atomics && 0) { UFLAG_IF { MOVxw_REG(x1, xRAX); CASALxw(x1, gd, wback); - if(!ALIGNED_ATOMICxw) { + MOVxw_REG(xRAX, x1); + if (!ALIGNED_ATOMICxw) { B_MARK_nocond; } } else { CASALxw(xRAX, gd, wback); - if(!ALIGNED_ATOMICxw) { + if (!ALIGNED_ATOMICxw) { B_NEXT_nocond; } } @@ -370,12 +373,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MARKLOCK; LDAXRxw(x1, wback); CMPSxw_REG(xRAX, x1); - B_MARK(cNE); + Bcond(cNE, 4 + (rex.w ? 8 : 12)); // EAX == Ed STLXRxw(x4, gd, wback); CBNZx_MARKLOCK(x4); // done - if(!ALIGNED_ATOMICxw) { + if (!rex.w) { B_MARK_nocond; } + MOVxw_REG(xRAX, x1); + if(!ALIGNED_ATOMICxw && rex.w) { B_MARK_nocond; } } @@ -387,18 +392,17 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SUBxw_UXTB(x3, x3, x1); CBNZw_MARK3(x3); CMPSxw_REG(xRAX, x1); - B_MARK(cNE); + Bcond(cNE, 4 + (rex.w ? 12 : 16)); // EAX == Ed STLXRB(x4, gd, wback); CBNZx_MARK3(x4); STRxw_U12(gd, wback, 0); + if (!rex.w) { B_MARK_nocond; } + MOVxw_REG(xRAX, x1); } MARK; - // Common part (and fallback for EAX != Ed) - UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5); MOVxw_REG(xRAX, x1);} - else { - if(!ALIGNED_ATOMICxw || !(cpuext.atomics && 0)) - MOVxw_REG(xRAX, x1); // upper par of RAX will be erase on 32bits, no mater what + UFLAG_IF { + emit_cmp32(dyn, ninst, rex, x6, x1, x3, x4, x5); } } break; diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c index 30d8d8443..a45508bee 100644 --- a/src/dynarec/la64/dynarec_la64_f0.c +++ b/src/dynarec/la64/dynarec_la64_f0.c @@ -175,10 +175,12 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SUBxw(x2, x1, xRAX); BNE_MARK2(x2, xZR); MVxw(ed, gd); + if (!rex.w) { B_NEXT_nocond; } MARK2; MVxw(xRAX, x1); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); + UFLAG_IF { MVxw(x6, xRAX); } ANDI(x1, wback, (1 << (rex.w + 2)) - 1); BNEZ_MARK3(x1); // Aligned @@ -186,11 +188,13 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MV(x4, gd); LLxw(x1, wback, 0); SUBxw(x3, x1, xRAX); - BNEZ_MARK(x3); + BNEZ(x3, 4 + (rex.w ? 8 : 12)); // EAX == Ed SCxw(x4, wback, 0); BEQZ_MARKLOCK(x4); - B_MARK_nocond; + if (!rex.w) { B_MARK_nocond; } + MVxw(xRAX, x1); + if (rex.w) { B_MARK_nocond; } MARK3; // Unaligned ADDI_D(x5, xZR, -(1 << (rex.w + 2))); @@ -199,14 +203,15 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LDxw(x1, wback, 0); LLxw(x6, x5, 0); SUBxw(x3, x1, xRAX); - BNEZ_MARK(x3); + BNEZ(x3, 4 + (rex.w ? 12 : 16)); // EAX == Ed SCxw(x6, x5, 0); BEQZ_MARKLOCK2(x6); SDxw(gd, wback, 0); - MARK; - UFLAG_IF { emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5, x6); } + if (!rex.w) { B_MARK_nocond; } MVxw(xRAX, x1); + MARK; + UFLAG_IF { emit_cmp32(dyn, ninst, rex, x6, x1, x3, x4, x5, x6); } } break; default: diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c index c7f3f634d..5f1db176e 100644 --- a/src/dynarec/rv64/dynarec_rv64_f0.c +++ b/src/dynarec/rv64/dynarec_rv64_f0.c @@ -249,21 +249,25 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SUBxw(x2, ed, xRAX); BNE_MARK2(x2, xZR); MVxw(ed, gd); + if (!rex.w) { B_NEXT_nocond; } MARK2; MVxw(xRAX, x1); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); + UFLAG_IF { MVxw(x6, xRAX); } ANDI(x1, wback, (1 << (rex.w + 2)) - 1); BNEZ_MARK3(x1); // Aligned MARKLOCK; LRxw(x1, wback, 1, 1); SUBxw(x3, x1, xRAX); - BNEZ_MARK(x3); + BNEZ(x3, 4 + (rex.w ? 8 : 12)); // EAX == Ed SCxw(x4, gd, wback, 1, 1); BNEZ_MARKLOCK(x4); - B_MARK_nocond; + if (!rex.w) { B_MARK_nocond; } + MVxw(xRAX, x1); + if (rex.w) { B_MARK_nocond; } MARK3; // Unaligned ANDI(x5, wback, -(1 << (rex.w + 2))); @@ -271,14 +275,15 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LDxw(x1, wback, 0); LRxw(x6, x5, 1, 1); SUBxw(x3, x1, xRAX); - BNEZ_MARK(x3); + BNEZ(x3, 4 + (rex.w ? 12 : 16)); // EAX == Ed SCxw(x4, x6, x5, 1, 1); BNEZ_MARKLOCK2(x4); SDxw(gd, wback, 0); - MARK; - UFLAG_IF { emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5, x6); } + if (!rex.w) { B_MARK_nocond; } MVxw(xRAX, x1); + MARK; + UFLAG_IF { emit_cmp32(dyn, ninst, rex, x6, x1, x3, x4, x5, x6); } } break; default: diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c index 9895f3db7..d45050304 100644 --- a/src/emu/x64run0f.c +++ b/src/emu/x64run0f.c @@ -1418,7 +1418,6 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) ED->q[0] = GD->dword[0]; else ED->dword[0] = GD->dword[0]; - R_RAX = R_EAX; // to erase upper part of RAX } else { R_RAX = ED->dword[0]; }