Improved x87 FIST(T)(P) opcode ([ARM64_DYNAREC] too)

This commit is contained in:
ptitSeb 2023-04-28 12:32:17 +02:00
parent dbaee7c49c
commit 2ebde976db
6 changed files with 1294 additions and 213 deletions

View File

@ -178,6 +178,7 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
#else
MRS_fpsr(x5);
BFCw(x5, FPSR_IOC, 1); // reset IOC bit
BFCw(x5, FPSR_QC, 1); // reset QC bit
MSR_fpsr(x5);
if(ST_IS_F(0)) {
VFCVTZSs(s0, v1);
@ -185,13 +186,16 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
VFCVTZSd(s0, v1);
SQXTN_S_D(s0, s0);
}
SQXTN_H_S(s0, s0);
VST16(s0, wback, fixedaddress);
VMOVSto(x3, s0, 0);
MRS_fpsr(x5); // get back FPSR to check the IOC bit
TBZ_MARK3(x5, FPSR_IOC);
MOV32w(x5, 0x8000);
STH(x5, wback, fixedaddress);
TBNZ_MARK2(x5, FPSR_IOC);
SXTHw(x5, x3); // check if 16bits value is fine
SUBw_REG(x5, x5, x3);
CBZw_MARK3(x5);
MARK2;
MOV32w(x3, 0x8000);
MARK3;
STH(x3, wback, fixedaddress);
#endif
x87_do_pop(dyn, ninst, x3);
break;
@ -212,6 +216,7 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
#else
MRS_fpsr(x5);
BFCw(x5, FPSR_IOC, 1); // reset IOC bit
BFCw(x5, FPSR_QC, 1); // reset QC bit
MSR_fpsr(x5);
if(ST_IS_F(0)) {
FRINTXS(s0, v1);
@ -221,13 +226,16 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
VFCVTZSd(s0, s0);
SQXTN_S_D(s0, s0);
}
SQXTN_H_S(s0, s0);
VST16(s0, wback, fixedaddress);
VMOVSto(x3, s0, 0);
MRS_fpsr(x5); // get back FPSR to check the IOC bit
TBZ_MARK3(x5, FPSR_IOC);
MOV32w(x5, 0x8000);
STH(x5, wback, fixedaddress);
TBNZ_MARK2(x5, FPSR_IOC);
SXTHw(x5, x3); // check if 16bits value is fine
SUBw_REG(x5, x5, x3);
CBZw_MARK3(x5);
MARK2;
MOV32w(x3, 0x8000);
MARK3;
STH(x3, wback, fixedaddress);
#endif
x87_restoreround(dyn, ninst, u8);
break;
@ -257,13 +265,16 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
VFCVTZSd(s0, s0);
SQXTN_S_D(s0, s0);
}
SQXTN_H_S(s0, s0);
VST16(s0, wback, fixedaddress);
VMOVSto(x3, s0, 0);
MRS_fpsr(x5); // get back FPSR to check the IOC bit
TBZ_MARK3(x5, FPSR_IOC);
MOV32w(x5, 0x8000);
STH(x5, wback, fixedaddress);
TBNZ_MARK2(x5, FPSR_IOC);
SXTHw(x5, x3); // check if 16bits value is fine
SUBw_REG(x5, x5, x3);
CBZw_MARK3(x5);
MARK2;
MOV32w(x3, 0x8000);
MARK3;
STH(x3, wback, fixedaddress);
#endif
x87_do_pop(dyn, ninst, x3);
x87_restoreround(dyn, ninst, u8);

View File

@ -125,7 +125,14 @@ uintptr_t RunDD(x64emu_t *emu, rex_t rex, uintptr_t addr)
break;
case 1: /* FISTTP ED qword */
GETE8(0);
*(int64_t*)ED = ST0.d;
if(STll(0).sref==ST(0).sq)
ED->sq[0] = STll(0).sq;
else {
if(isgreater(ST0.d, (double)0x7fffffffffffffffLL) || isless(ST0.d, -(double)0x8000000000000000LL) || !isfinite(ST0.d))
*(uint64_t*)ED = 0x8000000000000000LL;
else
*(int64_t*)ED = ST0.d;
}
fpu_do_pop(emu);
break;
case 2: /* FST double */

View File

@ -175,7 +175,7 @@ uintptr_t RunDF(x64emu_t *emu, rex_t rex, uintptr_t addr)
if(STll(0).sref==ST(0).sq)
ED->sq[0] = STll(0).sq;
else {
if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d))
if(isgreater(ST0.d, (double)0x7fffffffffffffffLL) || isless(ST0.d, -(double)0x8000000000000000LL) || !isfinite(ST0.d))
ED->sq[0] = 0x8000000000000000LL;
else
ED->sq[0] = fpu_round(emu, ST0.d);

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -5,6 +5,8 @@
#include <stdint.h>
#include <math.h>
// Build with `gcc -march=core2 -mfpmath=387 -O2 test22.c -o test22`
#if defined(__x86_64__)
uint64_t _fucomip_(double a, double b)
{
@ -30,7 +32,7 @@ uint64_t _fucompp_(double a, double b)
:"=a" (ret):"m"(a), "m"(b):"cc");
return ret;
}
uint64_t _fistpw_(double a)
uint64_t _fistpw_(double a, uint8_t rd)
{
uint16_t ret;
uint16_t t1, t2;
@ -38,15 +40,31 @@ uint64_t _fistpw_(double a)
"fldl %1\n"
"fstcw %2\n"
"mov %2, %%ax\n"
"mov $0x0c, %%ah\n"
"mov %4, %%ah\n"
"mov %%ax, %3\n"
"fldcw %3\n"
"fistp %0\n"
"fldcw %2\n"
:"=m" (ret):"m"(a), "m"(t1), "m"(t2):"cc");
:"=m" (ret):"m"(a), "m"(t1), "m"(t2), "m"(rd):"cc");
return ret;
}
uint64_t _fistpl_(double a)
uint64_t _fisttpw_(double a, uint8_t rd)
{
uint16_t ret;
uint16_t t1, t2;
asm volatile (
"fldl %1\n"
"fstcw %2\n"
"mov %2, %%ax\n"
"mov %4, %%ah\n"
"mov %%ax, %3\n"
"fldcw %3\n"
"fisttp %0\n"
"fldcw %2\n"
:"=m" (ret):"m"(a), "m"(t1), "m"(t2), "m"(rd):"cc");
return ret;
}
uint64_t _fistpl_(double a, uint8_t rd)
{
uint32_t ret;
uint16_t t1, t2;
@ -54,15 +72,31 @@ uint64_t _fistpl_(double a)
"fldl %1\n"
"fstcw %2\n"
"mov %2, %%ax\n"
"mov $0x0c, %%ah\n"
"mov %4, %%ah\n"
"mov %%ax, %3\n"
"fldcw %3\n"
"fistpl %0\n"
"fldcw %2\n"
:"=m" (ret):"m"(a), "m"(t1), "m"(t2):"cc");
:"=m" (ret):"m"(a), "m"(t1), "m"(t2), "m"(rd):"cc");
return ret;
}
uint64_t _fistpq_(double a)
uint64_t _fisttpl_(double a, uint8_t rd)
{
uint32_t ret;
uint16_t t1, t2;
asm volatile (
"fldl %1\n"
"fstcw %2\n"
"mov %2, %%ax\n"
"mov %4, %%ah\n"
"mov %%ax, %3\n"
"fldcw %3\n"
"fisttpl %0\n"
"fldcw %2\n"
:"=m" (ret):"m"(a), "m"(t1), "m"(t2), "m"(rd):"cc");
return ret;
}
uint64_t _fistpq_(double a, uint8_t rd)
{
uint64_t ret;
uint16_t t1, t2;
@ -70,15 +104,15 @@ uint64_t _fistpq_(double a)
"fldl %1\n"
"fstcw %2\n"
"mov %2, %%ax\n"
"mov $0x0c, %%ah\n"
"mov %4, %%ah\n"
"mov %%ax, %3\n"
"fldcw %3\n"
"fistpq %0\n"
"fldcw %2\n"
:"=m" (ret):"m"(a), "m"(t1), "m"(t2):"cc");
:"=m" (ret):"m"(a), "m"(t1), "m"(t2), "m"(rd):"cc");
return ret;
}
uint64_t _frndint_(double a)
uint64_t _fisttpq_(double a, uint8_t rd)
{
uint64_t ret;
uint16_t t1, t2;
@ -86,13 +120,29 @@ uint64_t _frndint_(double a)
"fldl %1\n"
"fstcw %2\n"
"mov %2, %%ax\n"
"mov $0x0c, %%ah\n"
"mov %4, %%ah\n"
"mov %%ax, %3\n"
"fldcw %3\n"
"fisttpq %0\n"
"fldcw %2\n"
:"=m" (ret):"m"(a), "m"(t1), "m"(t2), "m"(rd):"cc");
return ret;
}
uint64_t _frndint_(double a, uint8_t rd)
{
uint64_t ret;
uint16_t t1, t2;
asm volatile (
"fldl %1\n"
"fstcw %2\n"
"mov %2, %%ax\n"
"mov %4, %%ah\n"
"mov %%ax, %3\n"
"fldcw %3\n"
"frndint\n"
"fstpl %0\n"
"fldcw %2\n"
:"=m" (ret):"m"(a), "m"(t1), "m"(t2):"cc");
:"=m" (ret):"m"(a), "m"(t1), "m"(t2), "m"(rd):"cc");
return ret;
}
#endif
@ -107,6 +157,8 @@ int main(int argc, const char** argv)
{0x0, 0x4082c00000000000LL},
{0x8000000000000000LL, 0x4082c00000000000LL},
{0x8000000000000000LL, 0x0},
{0x40dfffc000000000LL, 0x40e0002000000000LL}, // 0x7fff and 0x8001 as double
{0xc0e0002000000000LL, 0xc0dfffc000000000LL}, // -0x8001 and -0x7fff as double
{0x8000000000000000LL, 0x3ff0000000000000LL},
{0x3ff0000000000000LL, 0x3fe89d9000000000LL},
{0x3ff0000000000000LL, 0x7ff0000000000000LL},
@ -133,14 +185,23 @@ int main(int argc, const char** argv)
za = (flags>>(8+6))&1?'Z':'-';
pa = (flags>>(8+2))&1?'P':'-';
printf("%c%c%c\n", za, pa, ca);
printf("FRNDINT 0x%llx => 0x%llx\n", *(uint64_t*)&a, _frndint_(a));
printf("FRNDINT 0x%llx => 0x%llx\n", *(uint64_t*)&b, _frndint_(b));
printf("FISTP 0x%llx => word: %x\n", *(uint64_t*)&a, _fistpw_(a));
printf("FISTP 0x%llx => word: %x\n", *(uint64_t*)&b, _fistpw_(b));
printf("FISTP 0x%llx => long: %x\n", *(uint64_t*)&a, _fistpl_(a));
printf("FISTP 0x%llx => long: %x\n", *(uint64_t*)&b, _fistpl_(b));
printf("FISTP 0x%llx => quad: %llx\n", *(uint64_t*)&a, _fistpq_(a));
printf("FISTP 0x%llx => quad: %llx\n", *(uint64_t*)&b, _fistpq_(b));
for(int rd=0; rd<3; ++rd) {
printf("Rounding %d\n", rd);
printf(" FRNDINT 0x%llx => 0x%llx\n", *(uint64_t*)&a, _frndint_(a, rd<<2));
printf(" FRNDINT 0x%llx => 0x%llx\n", *(uint64_t*)&b, _frndint_(b, rd<<2));
printf(" FISTP 0x%llx => word: %x\n", *(uint64_t*)&a, _fistpw_(a, rd<<2));
printf(" FISTP 0x%llx => word: %x\n", *(uint64_t*)&b, _fistpw_(b, rd<<2));
printf(" FISTP 0x%llx => long: %x\n", *(uint64_t*)&a, _fistpl_(a, rd<<2));
printf(" FISTP 0x%llx => long: %x\n", *(uint64_t*)&b, _fistpl_(b, rd<<2));
printf(" FISTP 0x%llx => quad: %llx\n", *(uint64_t*)&a, _fistpq_(a, rd<<2));
printf(" FISTP 0x%llx => quad: %llx\n", *(uint64_t*)&b, _fistpq_(b, rd<<2));
printf(" FISTTP 0x%llx => word: %x\n", *(uint64_t*)&a, _fisttpw_(a, rd<<2));
printf(" FISTTP 0x%llx => word: %x\n", *(uint64_t*)&b, _fisttpw_(b, rd<<2));
printf(" FISTTP 0x%llx => long: %x\n", *(uint64_t*)&a, _fisttpl_(a, rd<<2));
printf(" FISTTP 0x%llx => long: %x\n", *(uint64_t*)&b, _fisttpl_(b, rd<<2));
printf(" FISTTP 0x%llx => quad: %llx\n", *(uint64_t*)&a, _fisttpq_(a, rd<<2));
printf(" FISTTP 0x%llx => quad: %llx\n", *(uint64_t*)&b, _fisttpq_(b, rd<<2));
}
}
printf("\nDone\n");
}