diff --git a/CMakeLists.txt b/CMakeLists.txt index b618764c4..741cc0143 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1555,6 +1555,13 @@ add_test(fpu_rounding ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX set_tests_properties(fpu_rounding PROPERTIES ENVIRONMENT "BOX64_DYNAREC_FASTROUND=0") +add_test(x87pc ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64} + -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test32 -D TEST_OUTPUT=tmpfile32.txt + -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref32.txt + -P ${CMAKE_SOURCE_DIR}/runTest.cmake ) + +set_tests_properties(x87pc PROPERTIES ENVIRONMENT "BOX64_DYNAREC_X87DOUBLE=2") + else() add_test(bootSyscall ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64} diff --git a/src/dynarec/arm64/dynarec_arm64_d8.c b/src/dynarec/arm64/dynarec_arm64_d8.c index f7ccac5e6..32b26566d 100644 --- a/src/dynarec/arm64/dynarec_arm64_d8.c +++ b/src/dynarec/arm64/dynarec_arm64_d8.c @@ -56,9 +56,8 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -79,9 +78,8 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -139,9 +137,8 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -162,9 +159,8 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -185,9 +181,8 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -208,9 +203,8 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -232,9 +226,8 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FADDD(v1, v1, s0); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -251,9 +244,8 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FMULD(v1, v1, s0); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -299,9 +291,8 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FSUBD(v1, v1, s0); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -318,9 +309,8 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FSUBD(v1, s0, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -337,9 +327,8 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FDIVD(v1, v1, s0); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -356,9 +345,8 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FDIVD(v1, s0, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c index cd5a5b787..787b597f7 100644 --- a/src/dynarec/arm64/dynarec_arm64_d9.c +++ b/src/dynarec/arm64/dynarec_arm64_d9.c @@ -430,9 +430,8 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSQRTS(v1, v1); } else { FSQRTD(v1, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; diff --git a/src/dynarec/arm64/dynarec_arm64_da.c b/src/dynarec/arm64/dynarec_arm64_da.c index 403877358..8189f43fd 100644 --- a/src/dynarec/arm64/dynarec_arm64_da.c +++ b/src/dynarec/arm64/dynarec_arm64_da.c @@ -151,7 +151,6 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x5, x4); FADDD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -167,7 +166,6 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x5, x4); FMULD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -206,7 +204,6 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x5, x4); FSUBD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -222,7 +219,6 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x5, x4); FSUBD(v1, v2, v1); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -238,7 +234,6 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x5, x4); FDIVD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -254,7 +249,6 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x5, x4); FDIVD(v1, v2, v1); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; diff --git a/src/dynarec/arm64/dynarec_arm64_dc.c b/src/dynarec/arm64/dynarec_arm64_dc.c index 3ff95fe61..ccdff0704 100644 --- a/src/dynarec/arm64/dynarec_arm64_dc.c +++ b/src/dynarec/arm64/dynarec_arm64_dc.c @@ -54,9 +54,8 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -77,9 +76,8 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -137,9 +135,8 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -160,9 +157,8 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -183,9 +179,8 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -206,9 +201,8 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -227,7 +221,6 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x2, x4); FADDD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -241,7 +234,6 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x2, x4); FMULD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -274,7 +266,6 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x2, x4); FSUBD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -288,7 +279,6 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x2, x4); FSUBD(v1, v2, v1); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -302,7 +292,6 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x2, x4); FDIVD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -316,7 +305,6 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x2, x4); FDIVD(v1, v2, v1); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; diff --git a/src/dynarec/arm64/dynarec_arm64_de.c b/src/dynarec/arm64/dynarec_arm64_de.c index 2d861f2bd..f1754ebb3 100644 --- a/src/dynarec/arm64/dynarec_arm64_de.c +++ b/src/dynarec/arm64/dynarec_arm64_de.c @@ -54,9 +54,8 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -78,9 +77,8 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -134,9 +132,8 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -158,9 +155,8 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -182,9 +178,8 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -206,9 +201,8 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -229,7 +223,6 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SCVTFDD(v2, v2); FADDD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); break; case 1: INST_NAME("FIMUL ST0, word[ED]"); @@ -242,7 +235,6 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SCVTFDD(v2, v2); FMULD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); break; case 2: INST_NAME("FICOM ST0, word[ED]"); @@ -280,7 +272,6 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SCVTFDD(v2, v2); FSUBD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); break; case 5: INST_NAME("FISUBR ST0, word[ED]"); @@ -293,7 +284,6 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SCVTFDD(v2, v2); FSUBD(v1, v2, v1); X87_CHECK_PRECISION(v1); - MARK_X87PC(); break; case 6: INST_NAME("FIDIV ST0, word[ED]"); @@ -306,7 +296,6 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SCVTFDD(v2, v2); FDIVD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); break; case 7: INST_NAME("FIDIVR ST0, word[ED]"); @@ -319,7 +308,6 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SCVTFDD(v2, v2); FDIVD(v1, v2, v1); X87_CHECK_PRECISION(v1); - MARK_X87PC(); break; } return addr; diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 7cbb8ff1a..ded76a69c 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -1006,16 +1006,14 @@ } #endif #ifndef X87_CHECK_PRECISION -#define X87_CHECK_PRECISION(A) \ - if(dyn->need_x87check) { \ - CBNZw(x87pc, 4+8); \ - FCVT_S_D(A, A); \ - FCVT_D_S(A, A); \ +#define X87_CHECK_PRECISION(A) \ + if (!ST_IS_F(0) && dyn->need_x87check) { \ + CBNZw(x87pc, 4 + 8); \ + FCVT_S_D(A, A); \ + FCVT_D_S(A, A); \ } #endif -#ifndef MARK_X87PC -#define MARK_X87PC() -#endif + #define STORE_REG(A) STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) #define STP_REGS(A, B) STPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) #define LDP_REGS(A, B) LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h index 2bd18ac5b..d1e6d7d89 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass0.h +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -74,5 +74,7 @@ #define IF_ALIGNED(A) if(!(dyn->insts[ninst].unaligned=is_addr_unaligned(A))) #define NATIVE_RESTORE_X87PC() -#define X87_CHECK_PRECISION(A) -#define MARK_X87PC() if(dyn->need_x87check) dyn->insts[ninst].x87precision = 1 \ No newline at end of file +#define X87_CHECK_PRECISION(A) \ + do { \ + if (dyn->need_x87check) dyn->insts[ninst].x87precision = 1; \ + } while (0) diff --git a/src/dynarec/arm64/dynarec_arm64_pass1.h b/src/dynarec/arm64/dynarec_arm64_pass1.h index 0f904c61c..ba8b11a6e 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass1.h +++ b/src/dynarec/arm64/dynarec_arm64_pass1.h @@ -24,9 +24,11 @@ #define INST_NAME(name) #define NATIVE_RESTORE_X87PC() -#define X87_CHECK_PRECISION(A) \ - do { \ - if (dyn->need_x87check) \ - dyn->need_x87check = 2; \ +#define X87_CHECK_PRECISION(A) \ + do { \ + if (dyn->need_x87check) { \ + dyn->insts[ninst].x87precision = 1; \ + if (!ST_IS_F(0)) \ + dyn->need_x87check = 2; \ + } \ } while (0) -#define MARK_X87PC() if(dyn->need_x87check) dyn->insts[ninst].x87precision = 1 \ No newline at end of file diff --git a/src/dynarec/rv64/dynarec_rv64_d8.c b/src/dynarec/rv64/dynarec_rv64_d8.c index 83de0eecd..25a8fed55 100644 --- a/src/dynarec/rv64/dynarec_rv64_d8.c +++ b/src/dynarec/rv64/dynarec_rv64_d8.c @@ -52,9 +52,8 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xC8 ... 0xCF: @@ -66,9 +65,8 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xD0 ... 0xD7: @@ -101,9 +99,8 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xE8 ... 0xEF: @@ -115,9 +112,8 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xF0 ... 0xF7: @@ -129,9 +125,8 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xF8 ... 0xFF: @@ -143,9 +138,8 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; default: @@ -166,9 +160,8 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCVTDS(s0, s0); FADDD(v1, v1, s0); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 1: @@ -183,9 +176,8 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCVTDS(s0, s0); FMULD(v1, v1, s0); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 2: @@ -227,9 +219,8 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCVTDS(s0, s0); FSUBD(v1, v1, s0); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 5: @@ -244,9 +235,8 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCVTDS(s0, s0); FSUBD(v1, s0, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 6: @@ -261,9 +251,8 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCVTDS(s0, s0); FDIVD(v1, v1, s0); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 7: @@ -278,9 +267,8 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCVTDS(s0, s0); FDIVD(v1, s0, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; } diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c index 5e2e0e324..a5fb073f3 100644 --- a/src/dynarec/rv64/dynarec_rv64_d9.c +++ b/src/dynarec/rv64/dynarec_rv64_d9.c @@ -349,9 +349,8 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSQRTS(v1, v1); } else { FSQRTD(v1, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xFB: diff --git a/src/dynarec/rv64/dynarec_rv64_da.c b/src/dynarec/rv64/dynarec_rv64_da.c index 884edb88b..729212823 100644 --- a/src/dynarec/rv64/dynarec_rv64_da.c +++ b/src/dynarec/rv64/dynarec_rv64_da.c @@ -119,7 +119,6 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FADDD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 1: @@ -132,7 +131,6 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FMULD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 2: @@ -164,7 +162,6 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FSUBD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 5: @@ -177,7 +174,6 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FSUBD(v1, v2, v1); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 6: @@ -190,7 +186,6 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FDIVD(v1, v1, v2); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 7: @@ -203,7 +198,6 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FDIVD(v1, v2, v1); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; } diff --git a/src/dynarec/rv64/dynarec_rv64_dc.c b/src/dynarec/rv64/dynarec_rv64_dc.c index 80a7f082b..76d1cea70 100644 --- a/src/dynarec/rv64/dynarec_rv64_dc.c +++ b/src/dynarec/rv64/dynarec_rv64_dc.c @@ -48,9 +48,8 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xC8 ... 0xCF: @@ -62,9 +61,8 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xD0 ... 0xD7: @@ -97,9 +95,8 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xE8 ... 0xEF: @@ -111,9 +108,8 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xF0 ... 0xF7: @@ -125,9 +121,8 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xF8 ... 0xFF: @@ -139,9 +134,8 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; default: @@ -226,7 +220,6 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FDIVD(v1, v2, v1); X87_CHECK_PRECISION(v1); - MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; } diff --git a/src/dynarec/rv64/dynarec_rv64_de.c b/src/dynarec/rv64/dynarec_rv64_de.c index 50b29a5ba..fbb98e0db 100644 --- a/src/dynarec/rv64/dynarec_rv64_de.c +++ b/src/dynarec/rv64/dynarec_rv64_de.c @@ -47,9 +47,8 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -62,9 +61,8 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -100,9 +98,8 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -115,9 +112,8 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -130,9 +126,8 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -145,9 +140,8 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); - X87_CHECK_PRECISION(v1); } - MARK_X87PC(); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 9e9e9f42a..689627868 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -849,16 +849,13 @@ } #endif #ifndef X87_CHECK_PRECISION -#define X87_CHECK_PRECISION(A) \ - if (dyn->need_x87check) { \ - BNEZ(x87pc, 4 + 8); \ - FCVTSD(A, A); \ - FCVTDS(A, A); \ +#define X87_CHECK_PRECISION(A) \ + if (!ST_IS_F(0) && dyn->need_x87check) { \ + BNEZ(x87pc, 4 + 8); \ + FCVTSD(A, A); \ + FCVTDS(A, A); \ } #endif -#ifndef MARK_X87PC -#define MARK_X87PC() -#endif #define STORE_REG(A) SD(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) #define LOAD_REG(A) LD(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index b1f2302c9..f2d38d903 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -102,6 +102,7 @@ #define IF_ALIGNED(A) if ((dyn->insts[ninst].unaligned = (is_addr_unaligned(A) ? 1 : 0))) #define NATIVE_RESTORE_X87PC() -#define X87_CHECK_PRECISION(A) -#define MARK_X87PC() \ - if (dyn->need_x87check) dyn->insts[ninst].x87precision = 1 +#define X87_CHECK_PRECISION(A) \ + do { \ + if (dyn->need_x87check) dyn->insts[ninst].x87precision = 1; \ + } while (0) diff --git a/src/dynarec/rv64/dynarec_rv64_pass1.h b/src/dynarec/rv64/dynarec_rv64_pass1.h index fd7d24338..1e1cbc946 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass1.h +++ b/src/dynarec/rv64/dynarec_rv64_pass1.h @@ -27,11 +27,11 @@ #define NATIVE_RESTORE_X87PC() -#define X87_CHECK_PRECISION(A) \ - do { \ - if (dyn->need_x87check) \ - dyn->need_x87check = 2; \ +#define X87_CHECK_PRECISION(A) \ + do { \ + if (dyn->need_x87check) { \ + dyn->insts[ninst].x87precision = 1; \ + if (!ST_IS_F(0)) \ + dyn->need_x87check = 2; \ + } \ } while (0) - -#define MARK_X87PC() \ - if (dyn->need_x87check) dyn->insts[ninst].x87precision = 1 diff --git a/tests/ref32.txt b/tests/ref32.txt new file mode 100644 index 000000000..6417fb519 --- /dev/null +++ b/tests/ref32.txt @@ -0,0 +1,3 @@ +"fdivrp" : 3fd5555560000000 +"fsqrt" : 3ff6a09e60000000 +"faddp" : 3fc99999a0000000 diff --git a/tests/test32 b/tests/test32 new file mode 100755 index 000000000..f5cba1d84 Binary files /dev/null and b/tests/test32 differ diff --git a/tests/test32.c b/tests/test32.c new file mode 100644 index 000000000..ae123de92 --- /dev/null +++ b/tests/test32.c @@ -0,0 +1,37 @@ +// gcc -mfpmath=387 -o test32 test32.c -lm +#include +#include +#include + +volatile double test_values[] = { + 3.0, 2.0, 0.1 +}; + +#define TEST_X87(INSN_LOAD, INSN_OP, VAL_IDX) \ + do { \ + volatile double result; \ + volatile double* val_ptr = &test_values[VAL_IDX]; \ + __asm__ volatile( \ + "fldcw %[low]\n\t" INSN_LOAD "\n\t" INSN_OP "\n\t" \ + "fstl %[res]\n\t" \ + "fldcw %[orig]\n\t" \ + : [res] "=m"(result) \ + : [low] "m"(low_cw), \ + [orig] "m"(original_cw), \ + [val] "m"(*val_ptr) \ + : "st", "st(1)"); \ + printf("%-16s: %016lx\n", #INSN_OP, *(uint64_t*)&result); \ + } while (0) + +uint16_t original_cw, low_cw; + +int main() +{ + __asm__ volatile("fstcw %0" : "=m"(original_cw)); + low_cw = original_cw & ~((uint16_t)0x0300); + + TEST_X87("fld1; fldl %3", "fdivrp", 0); // 1.0 / 3.0 + TEST_X87("fldl %3", "fsqrt", 1); // sqrt(2.0) + TEST_X87("fldl %3; fldl %3", "faddp", 2); // 0.1 + 0.1 + return 0; +}