From 592af6db3bc9545bc8e5a8ed5440404c04c1d7ba Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Sat, 8 Feb 2025 16:04:50 +0800 Subject: [PATCH] [ARM64_DYNAREC] Added more aligned optim cases for REP MOVSB (#2326) --- .clang-format | 2 +- src/dynarec/arm64/dynarec_arm64_00.c | 2 +- src/dynarec/arm64/dynarec_arm64_66.c | 12 ++++++++++++ src/dynarec/arm64/dynarec_arm64_helper.h | 4 ++++ src/dynarec/arm64/dynarec_arm64_pass0.h | 1 + 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/.clang-format b/.clang-format index c70cac1a5..6098c201a 100644 --- a/.clang-format +++ b/.clang-format @@ -12,7 +12,7 @@ MaxEmptyLinesToKeep: 2 IndentCaseLabels: true AlignConsecutiveMacros: true WhitespaceSensitiveMacros: ['QUOTE'] -IfMacros: ['IFX', 'IFXORNAT', 'IFX2', 'IFXA', 'IFX_PENDOR0', 'IFXX', 'IFX2X', 'IFXN', 'UFLAG_IF', 'PASS2IF'] +IfMacros: ['IFX', 'IFXORNAT', 'IFX2', 'IFXA', 'IFX_PENDOR0', 'IFXX', 'IFX2X', 'IFXN', 'UFLAG_IF', 'PASS2IF', 'IF_UNALIGNED', 'IF_ALIGNED'] UseTab: Never IndentPPDirectives: None --- diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 162a833db..fc54be0e4 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -1675,7 +1675,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("REP MOVSB"); CBZx_NEXT(xRCX); TBNZ_MARK2(xFlags, F_DF); - IF_UNALIGNED(ip) {} else { + IF_ALIGNED(ip) { // special optim for large RCX value on forward case only MARK3; CMPSx_U12(xRCX, 8); diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c index c98902a9e..73767538d 100644 --- a/src/dynarec/arm64/dynarec_arm64_66.c +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -779,6 +779,17 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("REP MOVSB"); CBZx_NEXT(xRCX); TBNZ_MARK2(xFlags, F_DF); + IF_ALIGNED (ip) { + // special optim for large RCX value on forward case only + MARK3; + CMPSx_U12(xRCX, 8); + B_MARK(cCC); + LDRx_S9_postindex(x1, xRSI, 8); + STRx_S9_postindex(x1, xRDI, 8); + SUBx_U12(xRCX, xRCX, 8); + CBNZx_MARK3(xRCX); + CBZx_MARKLOCK(xRCX); + } MARK; // Part with DF==0 LDRB_S9_postindex(x1, xRSI, 1); STRB_S9_postindex(x1, xRDI, 1); @@ -790,6 +801,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STRB_S9_postindex(x1, xRDI, -1); SUBx_U12(xRCX, xRCX, 1); CBNZx_MARK2(xRCX); + MARKLOCK; // done } else { INST_NAME("MOVSB"); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index b6398a2ba..97f352218 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -973,6 +973,10 @@ #define IF_UNALIGNED(A) if(is_addr_unaligned(A)) #endif +#ifndef IF_ALIGNED +#define IF_ALIGNED(A) if (!is_addr_unaligned(A)) +#endif + #define STORE_REG(A) STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) #define STP_REGS(A, B) STPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) #define LDP_REGS(A, B) LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h index 1495503ff..a88fbfa2a 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass0.h +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -69,3 +69,4 @@ #define INVERT_CARRY_BEFORE(A) dyn->insts[ninst].invert_carry_before = 1 // mark opcode as "unaligned" possible only if the current address is not marked as already unaligned #define IF_UNALIGNED(A) if((dyn->insts[ninst].unaligned=(is_addr_unaligned(A)?0:1))) +#define IF_ALIGNED(A) if((dyn->insts[ninst].unaligned=(is_addr_unaligned(A)?1:0))) \ No newline at end of file