mirror of
https://github.com/ptitSeb/box64.git
synced 2025-05-07 15:36:41 +08:00
[ARM64_DYNAREC] Fixed (rarely used) some edge case for (V)PMULHRSW opcode (and improved tests)
This commit is contained in:
parent
b6b069cf8d
commit
e4da025dc0
@ -2166,6 +2166,10 @@ int convert_bitmask(uint64_t bitmask);
|
||||
#define URHADDQ_16(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b01, Vm, Vn, Vd))
|
||||
#define URHADDQ_32(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b10, Vm, Vn, Vd))
|
||||
|
||||
//SRSHR/URSHR
|
||||
#define RSHR(Q, U, immh, immb, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 1<<13 | 0<<12 | 1<<10 | (Rn)<<5 | (Rd))
|
||||
#define SRSHRQ_32(Vd, Vn, shift) EMIT(RSHR(1, 0, 0b0100 | (((32-(shift))>>3)&0b11), (32-(shift))&0b111, Vn, Vd))
|
||||
|
||||
// QRDMULH Signed saturating (Rounding) Doubling Multiply returning High half
|
||||
#define QDMULH_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10110<<11 | 1<<10 | (Rn)<<5 | (Rd))
|
||||
#define SQRDMULH_8(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 1, 0b00, Vm, Vn, Vd))
|
||||
|
@ -1587,6 +1587,28 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
|
||||
snprintf(buff, sizeof(buff), "%cRHADD V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', Rd, Vd, Rn, Vd, Rm, Vd);
|
||||
return buff;
|
||||
}
|
||||
//S/URSHR
|
||||
if(isMask(opcode, "0QU011110iiiiiii001001nnnnnddddd", &a)) {
|
||||
int shft = 0;
|
||||
int sz = 0;
|
||||
const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"};
|
||||
if(imm&0b1000000) {
|
||||
sz = 3;
|
||||
shft = imm&0b111111;
|
||||
} else if(imm&0b100000) {
|
||||
sz = 2;
|
||||
shft = imm&0b1111;
|
||||
} else if(imm&0b10000) {
|
||||
sz = 1;
|
||||
shft = imm&0b111;
|
||||
} else if(imm&0b1000) {
|
||||
sz = 0;
|
||||
shft = imm&0b111;
|
||||
}
|
||||
const char* Vd = Y[(sz<<1) | a.Q];
|
||||
snprintf(buff, sizeof(buff), "%cRSHR V%d.%s, V%d.%s, #%d", a.U?'U':'S', Rd, Vd, Rn, Vd, shft);
|
||||
return buff;
|
||||
}
|
||||
//SQ(R)DMULH
|
||||
if(isMask(opcode, "0QU01110ff1mmmmm101101nnnnnddddd", &a)) {
|
||||
const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "???"};
|
||||
@ -1762,6 +1784,26 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
|
||||
snprintf(buff, sizeof(buff), "PMULL%s V%d.%s, V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn);
|
||||
return buff;
|
||||
}
|
||||
// [S/U]MULL
|
||||
if(isMask(opcode, "0QU01110ff1mmmmm110000nnnnnddddd", &a)) {
|
||||
const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "??"};
|
||||
const char* Z[] = {"8H", "4S", "2D", "??"};
|
||||
int sz = sf;
|
||||
const char* Vn = Y[(sz<<1)|a.Q];
|
||||
const char* Vd = Z[sz];
|
||||
snprintf(buff, sizeof(buff), "%cMULL%s V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn);
|
||||
return buff;
|
||||
}
|
||||
//XTN(2)
|
||||
if(isMask(opcode, "0Q001110ff100001001010nnnnnddddd", &a)) {
|
||||
const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "??"};
|
||||
const char* Z[] = {"8H", "4S", "2D", "??"};
|
||||
int sz = sf;
|
||||
const char* Vd = Y[(sz<<1)|a.Q];
|
||||
const char* Vn = Z[sz];
|
||||
snprintf(buff, sizeof(buff), "XTN%s V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn);
|
||||
return buff;
|
||||
}
|
||||
|
||||
// DMB
|
||||
if(isMask(opcode, "11010101000000110011nnnn10111111", &a)) {
|
||||
|
@ -677,7 +677,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
|
||||
nextop = F8;
|
||||
GETGM(q0);
|
||||
GETEM(q1, 0);
|
||||
SQRDMULH_16(q0, q0, q1);
|
||||
v0 = fpu_get_scratch(dyn, ninst);
|
||||
VSMULL_16(v0, q0, q1);
|
||||
SRSHRQ_32(v0, v0, 15);
|
||||
XTN_16(q0, v0);
|
||||
break;
|
||||
case 0x1C:
|
||||
INST_NAME("PABSB Gm,Em");
|
||||
|
@ -454,7 +454,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
|
||||
nextop = F8;
|
||||
GETGX(q0, 1);
|
||||
GETEX(q1, 0, 0);
|
||||
SQRDMULHQ_16(q0, q0, q1);
|
||||
v0 = fpu_get_scratch(dyn, ninst);
|
||||
v1 = fpu_get_scratch(dyn, ninst);
|
||||
VSMULL_16(v0, q0, q1);
|
||||
VSMULL2_16(v1, q0, q1);
|
||||
SRSHRQ_32(v0, v0, 15);
|
||||
SRSHRQ_32(v1, v1, 15);
|
||||
XTN_16(q0, v0);
|
||||
XTN2_16(q0, v1);
|
||||
break;
|
||||
|
||||
case 0x10:
|
||||
|
@ -219,10 +219,17 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
|
||||
case 0x0B:
|
||||
INST_NAME("VPMULHRSW Gx,Vx, Ex");
|
||||
nextop = F8;
|
||||
q0 = fpu_get_scratch(dyn, ninst);
|
||||
q1 = fpu_get_scratch(dyn, ninst);
|
||||
for(int l=0; l<1+vex.l; ++l) {
|
||||
if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
|
||||
SQRDMULHQ_16(v0, v2, v1);
|
||||
}
|
||||
VSMULL_16(q0, v1, v2);
|
||||
VSMULL2_16(q1, v1, v2);
|
||||
SRSHRQ_32(q0, q0, 15);
|
||||
SRSHRQ_32(q1, q1, 15);
|
||||
XTN_16(v0, q0);
|
||||
XTN2_16(v0, q1);
|
||||
}
|
||||
if(!vex.l) YMM0(gd);
|
||||
break;
|
||||
case 0x0C:
|
||||
|
@ -4,14 +4,14 @@ ucomiss 1.000000, inf => 0x203
|
||||
ucomiss inf, 1.000000 => 0x202
|
||||
ucomiss 1.000000, -inf => 0x202
|
||||
ucomiss -inf, 1.000000 => 0x203
|
||||
ucomiss 1.000000, nan => 0x203
|
||||
ucomiss nan, 1.000000 => 0x203
|
||||
ucomiss 1.000000, nan => 0x247
|
||||
ucomiss nan, 1.000000 => 0x247
|
||||
ucomiss 1.000000, 1.000000 => 0x242
|
||||
ucomiss 1.000000, 1.000000 => 0x242
|
||||
ucomiss inf, inf => 0x242
|
||||
ucomiss -inf, inf => 0x203
|
||||
ucomiss inf, -inf => 0x202
|
||||
ucomiss nan, nan => 0x203
|
||||
ucomiss nan, nan => 0x247
|
||||
minss 1, 2 => 1
|
||||
minss 2, 1 => 1
|
||||
minss -inf, 2 => -inf
|
||||
@ -161,7 +161,9 @@ phsubw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff
|
||||
psignb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x80 0x7f 0x0 0x1 0xfe 0xfd 0x0 0xfe 0x84 0x8e 0x52 0xa5 0x0 0xc0 0x32
|
||||
psignw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8000 0x8001 0x0 0x1 0xfffe 0xfffd 0x7fff
|
||||
psignd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x80000000 0x7fffffff 0x0
|
||||
pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0x0 0x8000 0x7ffe 0x0 0x0 0x0 0x0 0x7ffe
|
||||
pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8001 0xffff 0x0 0x0 0xfffe 0x0 0x7ffe
|
||||
pmulhrsw(0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x7ffe 0x0 0x0 0x0 0x6200 0x0 0x7ffe
|
||||
pblendvps(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe , 0x1 0x80000000 0x80000005 0xfffe ) = 0xffffffff 0x80000000 0x5 0x0
|
||||
ptestz(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0
|
||||
ptestc(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0
|
||||
|
@ -161,7 +161,12 @@ phsubw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff
|
||||
psignb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x80 0x7f 0x0 0x1 0xfe 0xfd 0x0 0xfe 0x84 0x8e 0x52 0xa5 0x0 0xc0 0x32
|
||||
psignw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8000 0x8001 0x0 0x1 0xfffe 0xfffd 0x7fff
|
||||
psignd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x80000000 0x7fffffff 0x0
|
||||
pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0x0 0x8000 0x7ffe 0x0 0x0 0x0 0x0 0x7ffe
|
||||
pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8001 0xffff 0x0 0x0 0xfffe 0x0 0x7ffe
|
||||
pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x7ffe 0x20 0x7f00 0x1 0x8000 0xa050 0xfff1 0x8008 ) = 0xffff 0xffe0 0x7eff 0x0 0xffff 0xffff 0x0 0x7ff7
|
||||
pmulhrsw(0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x7ffe 0x0 0x0 0x0 0x6200 0x0 0x7ffe
|
||||
pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x7ffe 0x20 0x7f00 0x1 0x8000 0xa050 0xfff1 0x8008 ) = 0xffff 0xffe0 0x7eff 0x0 0xffff 0xffff 0x0 0x7ff7
|
||||
pmulhrsw(0x7ffe 0x20 0x7f00 0x1 0x8000 0xa050 0xfff1 0x8008 , 0x7ffe 0x20 0x7f00 0x1 0x8000 0xa050 0xfff1 0x8008 ) = 0x7ffc 0x0 0x7e02 0x0 0x8000 0x4788 0x0 0x7ff0
|
||||
pblendvps(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe , 0x1 0x80000000 0x80000005 0xfffe ) = 0xffffffff 0x80000000 0x5 0x0
|
||||
ptestz(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0
|
||||
ptestc(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0
|
||||
|
BIN
tests/test17
BIN
tests/test17
Binary file not shown.
@ -421,7 +421,9 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r);
|
||||
GO2(sign, 8, psignb, a128_8, b128_8)
|
||||
GO2(sign, 16, psignw, a128_16, b128_16)
|
||||
GO2(sign, 32, psignd, a128_32, b128_32)
|
||||
GO2(mulhrs, 16, pmulhrsw, a128_16, a128_16)
|
||||
GO2(mulhrs, 16, pmulhrsw, a128_16, b128_16)
|
||||
GO2(mulhrs, 16, pmulhrsw, b128_16, b128_16)
|
||||
GO3PS(blendv, 32, a128_32, b128_32, c128_32)
|
||||
GO2i(testz, a128_32, b128_32)
|
||||
GO2i(testc, a128_32, b128_32)
|
||||
|
BIN
tests/test30
BIN
tests/test30
Binary file not shown.
@ -518,7 +518,12 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r);
|
||||
GO2(sign, 8, psignb, a128_8, b128_8)
|
||||
GO2(sign, 16, psignw, a128_16, b128_16)
|
||||
GO2(sign, 32, psignd, a128_32, b128_32)
|
||||
GO2(mulhrs, 16, pmulhrsw, a128_16, a128_16)
|
||||
GO2(mulhrs, 16, pmulhrsw, a128_16, b128_16)
|
||||
GO2(mulhrs, 16, pmulhrsw, a128_16, c128_16)
|
||||
GO2(mulhrs, 16, pmulhrsw, b128_16, b128_16)
|
||||
GO2(mulhrs, 16, pmulhrsw, a128_16, c128_16)
|
||||
GO2(mulhrs, 16, pmulhrsw, c128_16, c128_16)
|
||||
GO3PS(blendv, 32, a128_32, b128_32, c128_32)
|
||||
GO2i(testz, a128_32, b128_32)
|
||||
GO2i(testc, a128_32, b128_32)
|
||||
|
Loading…
x
Reference in New Issue
Block a user