[ARM64_DYNAREC] Added DYNAREC_PAUSE option for hint instructions (#2070)

* [ARM64_DYNAREC] Added DYNAREC_PAUSE option for hint instructions

* Use sevl for wfe

* Add docs

* Fix typo

* use switch case
This commit is contained in:
Yang Liu 2024-11-25 18:28:19 +08:00 committed by GitHub
parent c30a392fe8
commit 84608fc581
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 69 additions and 10 deletions

View File

@ -179,6 +179,13 @@ Tweaking the memory barriers to reduce the performance impact by STRONGMEM
* 1 : Use weak barriers to have more performance boost
* 2 : All 1. Plus disabled the last write barriers
#### BOX64_DYNAREC_PAUSE *
Enable/Disable x86 PAUSE emulation, which may help the performance of spinlocks
* 0 : Ignore x86 PAUSE instruction (Default.)
* 1 : Use YIELD to emulate x86 PAUSE instruction
* 2 : Use WFI to emulate x86 PAUSE instruction
* 3 : Use SEVL+WFE to emulate x86 PAUSE instruction
#### BOX64_DYNAREC_X87DOUBLE *
Force the use of Double for x87 emulation
* 0 : Try to use float when possible for x87 emulation (default, faster)

View File

@ -291,7 +291,7 @@ Define Box64's Dynarec max allowed forward value when building Block.
* 0 : No forward value. When current block end, don't try to go further even if there are previous forward jumps
* XXX : Allow up to XXXX bytes of gap when building a Block after the block end to next forward jump (Default: 128)
=item B<BOX64_DYNAREC_STRONGMEM>=I<0|1|2>
=item B<BOX64_DYNAREC_STRONGMEM>=I<0|1|2|3>
Enable/Disable simulation of Strong Memory model
@ -300,7 +300,7 @@ Enable/Disable simulation of Strong Memory model
* 2 : All 1. plus memory barriers on SIMD instructions
* 3 : All 2. plus more memory barriers on a regular basis
=item B<BOX64_DYNAREC_WEAKBARRIER>=I<0|1>
=item B<BOX64_DYNAREC_WEAKBARRIER>=I<0|1|2>
Tweaking the memory barriers to reduce the performance impact by STRONGMEM
@ -308,6 +308,15 @@ Tweaking the memory barriers to reduce the performance impact by STRONGMEM
* 1 : Use weak barriers to have more performance boost
* 2 : All 1. Plus disabled the last write barriers
=item B<BOX64_DYNAREC_PAUSE>=I<0|1|2|3>
Enable/Disable x86 PAUSE emulation, which may help the performance of spinlocks
* 0 : Ignore x86 PAUSE instruction (Default.)
* 1 : Use YIELD to emulate x86 PAUSE instruction
* 2 : Use WFI to emulate x86 PAUSE instruction
* 3 : Use SEVL+WFE to emulate x86 PAUSE instruction
=item B<BOX64_DYNAREC_X87DOUBLE>=I<0|1>
Force the use of Double for x87 emulation

View File

@ -80,6 +80,7 @@ int box64_dynarec_bigblock = 1;
int box64_dynarec_forward = 128;
int box64_dynarec_strongmem = 0;
int box64_dynarec_weakbarrier = 0;
int box64_dynarec_pause = 0;
int box64_dynarec_x87double = 0;
int box64_dynarec_div0 = 0;
int box64_dynarec_fastnan = 1;
@ -787,6 +788,18 @@ void LoadLogEnv()
if (box64_dynarec_weakbarrier)
printf_log(LOG_INFO, "Dynarec will try to use weaker memory barriers to reduce the performance loss introduce by strong memory emulation\n");
}
#ifdef ARM64
p = getenv("BOX64_DYNAREC_PAUSE");
if (p) {
if (strlen(p) == 1) {
if (p[0] >= '0' && p[0] <= '3')
box64_dynarec_pause = p[0] - '0';
}
if (box64_dynarec_pause)
printf_log(LOG_INFO, "Dynarec will use %s to emulate pause instruction\n",
box64_dynarec_pause == 1 ? "yield" : (box64_dynarec_pause == 2 ? "wfi" : "wfe"));
}
#endif
p = getenv("BOX64_DYNAREC_X87DOUBLE");
if(p) {
if(strlen(p)==1) {

View File

@ -543,6 +543,7 @@ int convert_bitmask(uint64_t bitmask);
#define WFE EMIT(0b11010101000000110010000001011111)
#define WFI EMIT(0b11010101000000110010000001111111)
#define YIELD EMIT(0b11010101000000110010000000111111)
#define SEVL EMIT(0b11010101000000110010000010111111)
#define CSINC_gen(sf, Rm, cond, Rn, Rd) ((sf)<<31 | 0b11010100<<21 | (Rm)<<16 | (cond)<<12 | 1<<10 | (Rn)<<5 | (Rd))
#define CSINCx(Rd, Rn, Rm, cond) EMIT(CSINC_gen(1, Rm, cond, Rn, Rd))

View File

@ -136,6 +136,10 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
snprintf(buff, sizeof(buff), "YIELD");
return buff;
}
if(isMask(opcode, "11010101000000110010000010111111", &a)) {
snprintf(buff, sizeof(buff), "SEVL");
return buff;
}
// --- LDR / STR
if(isMask(opcode, "f010100011iiiiiii22222nnnnnttttt", &a)) {
int offset = signExtend(imm, 7)<<(2+sf);

View File

@ -986,11 +986,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
/* inside the block, cache transform */ \
CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);\
SKIP_SEVL(i32); \
B(i32); \
} \
} else { \
/* inside the block, no cache change */ \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \
SKIP_SEVL(i32); \
Bcond(YES, i32); \
}
@ -1427,7 +1429,15 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
if(gd==xRAX) {
if (rep == 2) {
INST_NAME("PAUSE");
YIELD;
switch (box64_dynarec_pause) {
case 1: YIELD; break;
case 2: WFI; break;
case 3:
dyn->insts[ninst].wfe = 1;
SEVL;
WFE;
break;
}
} else {
INST_NAME("NOP");
}
@ -2950,11 +2960,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
} else { \
CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \
SKIP_SEVL(i32); \
Bcond(c__, i32); \
} \
} else { \
/* inside the block */ \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \
SKIP_SEVL(i32); \
if(Z) {CBZz(xRCX, i32);} else {CBNZz(xRCX, i32);}; \
}
case 0xE0:
@ -3161,6 +3173,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
// inside the block
CacheTransform(dyn, ninst, CHECK_CACHE(), x1, x2, x3);
tmp = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);
SKIP_SEVL(tmp);
if(tmp==4) {
NOP;
} else {

View File

@ -1577,11 +1577,13 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
} else { \
CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \
SKIP_SEVL(i32); \
B(i32); \
} \
} else { \
/* inside the block */ \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \
SKIP_SEVL(i32); \
Bcond(YES, i32); \
}

View File

@ -1380,12 +1380,14 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
} else { \
CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);\
SKIP_SEVL(i32); \
B(i32); \
} \
} else { \
/* inside the block */ \
} else { \
/* inside the block */ \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \
Bcond(YES, i32); \
SKIP_SEVL(i32); \
Bcond(YES, i32); \
}
case 0xE0:
INST_NAME("LOOPNZ (32bits)");

View File

@ -80,12 +80,14 @@ uintptr_t dynarec64_67_32(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int
} else { \
CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);\
SKIP_SEVL(i32); \
B(i32); \
} \
} else { \
/* inside the block */ \
} else { \
/* inside the block */ \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \
Bcond(YES, i32); \
SKIP_SEVL(i32); \
Bcond(YES, i32); \
}
case 0xE0:
INST_NAME("LOOPNZ (16bits)");

View File

@ -1140,6 +1140,8 @@
#endif
#define CLEARIP() dyn->last_ip=0
#define SKIP_SEVL(val) if (dyn->insts[dyn->insts[ninst].x64.jmp_insts].wfe) val += 4;
#if STEP < 2
#define PASS2IF(A, B) if(A)
#elif STEP == 2

View File

@ -109,6 +109,7 @@ typedef struct instruction_arm64_s {
uint8_t will_write:2; // [strongmem] will write to memory
uint8_t last_write:1; // [strongmem] the last write in a SEQ
uint8_t lock:1; // [strongmem] lock semantic
uint8_t wfe:1; // opcode uses sevl + wfe
uint8_t set_nat_flags; // 0 or combinaison of native flags define
uint8_t use_nat_flags; // 0 or combinaison of native flags define
uint8_t use_nat_flags_before; // 0 or combinaison of native flags define

View File

@ -28,6 +28,7 @@ extern int box64_dynarec_bigblock;
extern int box64_dynarec_forward;
extern int box64_dynarec_strongmem;
extern int box64_dynarec_weakbarrier;
extern int box64_dynarec_pause;
extern int box64_dynarec_fastnan;
extern int box64_dynarec_fastround;
extern int box64_dynarec_x87double;

View File

@ -155,8 +155,9 @@ ENTRYINT(BOX64_DYNAREC_DUMP, box64_dynarec_dump, 0, 2, 2) \
ENTRYINT(BOX64_DYNAREC_LOG, box64_dynarec_log, 0, 3, 2) \
ENTRYINT(BOX64_DYNAREC_BIGBLOCK, box64_dynarec_bigblock, 0, 3, 2) \
ENTRYSTRING_(BOX64_DYNAREC_FORWARD, box64_dynarec_forward) \
ENTRYINT(BOX64_DYNAREC_STRONGMEM, box64_dynarec_strongmem, 0, 4, 3) \
ENTRYINT(BOX64_DYNAREC_STRONGMEM, box64_dynarec_strongmem, 0, 3, 2) \
ENTRYINT(BOX64_DYNAREC_WEAKBARRIER, box64_dynarec_weakbarrier, 0, 2, 2) \
ENTRYINT(BOX64_DYNAREC_PAUSE, box64_dynarec_pause, 0, 3, 2) \
ENTRYBOOL(BOX64_DYNAREC_X87DOUBLE, box64_dynarec_x87double) \
ENTRYBOOL(BOX64_DYNAREC_DIV0, box64_dynarec_div0) \
ENTRYBOOL(BOX64_DYNAREC_FASTNAN, box64_dynarec_fastnan) \
@ -184,6 +185,7 @@ IGNORE(BOX64_DYNAREC_BIGBLOCK) \
IGNORE(BOX64_DYNAREC_FORWARD) \
IGNORE(BOX64_DYNAREC_STRONGMEM) \
IGNORE(BOX64_DYNAREC_WEAKBARRIER) \
IGNORE(BOX64_DYNAREC_PAUSE) \
IGNORE(BOX64_DYNAREC_X87DOUBLE) \
IGNORE(BOX64_DYNAREC_DIV0) \
IGNORE(BOX64_DYNAREC_FASTNAN) \