[ARM64_DYNAREC] Fixed (rarely used) some edge case for (V)PMULHRSW opcode (and improved tests)

This commit is contained in:
ptitSeb 2025-04-25 20:03:58 +02:00
parent b6b069cf8d
commit e4da025dc0
11 changed files with 84 additions and 7 deletions

View File

@ -2166,6 +2166,10 @@ int convert_bitmask(uint64_t bitmask);
#define URHADDQ_16(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b01, Vm, Vn, Vd))
#define URHADDQ_32(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b10, Vm, Vn, Vd))
//SRSHR/URSHR
#define RSHR(Q, U, immh, immb, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 1<<13 | 0<<12 | 1<<10 | (Rn)<<5 | (Rd))
#define SRSHRQ_32(Vd, Vn, shift) EMIT(RSHR(1, 0, 0b0100 | (((32-(shift))>>3)&0b11), (32-(shift))&0b111, Vn, Vd))
// QRDMULH Signed saturating (Rounding) Doubling Multiply returning High half
#define QDMULH_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10110<<11 | 1<<10 | (Rn)<<5 | (Rd))
#define SQRDMULH_8(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 1, 0b00, Vm, Vn, Vd))

View File

@ -1587,6 +1587,28 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
snprintf(buff, sizeof(buff), "%cRHADD V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', Rd, Vd, Rn, Vd, Rm, Vd);
return buff;
}
//S/URSHR
if(isMask(opcode, "0QU011110iiiiiii001001nnnnnddddd", &a)) {
int shft = 0;
int sz = 0;
const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"};
if(imm&0b1000000) {
sz = 3;
shft = imm&0b111111;
} else if(imm&0b100000) {
sz = 2;
shft = imm&0b1111;
} else if(imm&0b10000) {
sz = 1;
shft = imm&0b111;
} else if(imm&0b1000) {
sz = 0;
shft = imm&0b111;
}
const char* Vd = Y[(sz<<1) | a.Q];
snprintf(buff, sizeof(buff), "%cRSHR V%d.%s, V%d.%s, #%d", a.U?'U':'S', Rd, Vd, Rn, Vd, shft);
return buff;
}
//SQ(R)DMULH
if(isMask(opcode, "0QU01110ff1mmmmm101101nnnnnddddd", &a)) {
const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "???"};
@ -1762,6 +1784,26 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
snprintf(buff, sizeof(buff), "PMULL%s V%d.%s, V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn);
return buff;
}
// [S/U]MULL
if(isMask(opcode, "0QU01110ff1mmmmm110000nnnnnddddd", &a)) {
const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "??"};
const char* Z[] = {"8H", "4S", "2D", "??"};
int sz = sf;
const char* Vn = Y[(sz<<1)|a.Q];
const char* Vd = Z[sz];
snprintf(buff, sizeof(buff), "%cMULL%s V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn);
return buff;
}
//XTN(2)
if(isMask(opcode, "0Q001110ff100001001010nnnnnddddd", &a)) {
const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "??"};
const char* Z[] = {"8H", "4S", "2D", "??"};
int sz = sf;
const char* Vd = Y[(sz<<1)|a.Q];
const char* Vn = Z[sz];
snprintf(buff, sizeof(buff), "XTN%s V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn);
return buff;
}
// DMB
if(isMask(opcode, "11010101000000110011nnnn10111111", &a)) {

View File

@ -677,7 +677,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
nextop = F8;
GETGM(q0);
GETEM(q1, 0);
SQRDMULH_16(q0, q0, q1);
v0 = fpu_get_scratch(dyn, ninst);
VSMULL_16(v0, q0, q1);
SRSHRQ_32(v0, v0, 15);
XTN_16(q0, v0);
break;
case 0x1C:
INST_NAME("PABSB Gm,Em");

View File

@ -454,7 +454,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
SQRDMULHQ_16(q0, q0, q1);
v0 = fpu_get_scratch(dyn, ninst);
v1 = fpu_get_scratch(dyn, ninst);
VSMULL_16(v0, q0, q1);
VSMULL2_16(v1, q0, q1);
SRSHRQ_32(v0, v0, 15);
SRSHRQ_32(v1, v1, 15);
XTN_16(q0, v0);
XTN2_16(q0, v1);
break;
case 0x10:

View File

@ -219,10 +219,17 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
case 0x0B:
INST_NAME("VPMULHRSW Gx,Vx, Ex");
nextop = F8;
q0 = fpu_get_scratch(dyn, ninst);
q1 = fpu_get_scratch(dyn, ninst);
for(int l=0; l<1+vex.l; ++l) {
if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
SQRDMULHQ_16(v0, v2, v1);
}
VSMULL_16(q0, v1, v2);
VSMULL2_16(q1, v1, v2);
SRSHRQ_32(q0, q0, 15);
SRSHRQ_32(q1, q1, 15);
XTN_16(v0, q0);
XTN2_16(v0, q1);
}
if(!vex.l) YMM0(gd);
break;
case 0x0C:

View File

@ -4,14 +4,14 @@ ucomiss 1.000000, inf => 0x203
ucomiss inf, 1.000000 => 0x202
ucomiss 1.000000, -inf => 0x202
ucomiss -inf, 1.000000 => 0x203
ucomiss 1.000000, nan => 0x203
ucomiss nan, 1.000000 => 0x203
ucomiss 1.000000, nan => 0x247
ucomiss nan, 1.000000 => 0x247
ucomiss 1.000000, 1.000000 => 0x242
ucomiss 1.000000, 1.000000 => 0x242
ucomiss inf, inf => 0x242
ucomiss -inf, inf => 0x203
ucomiss inf, -inf => 0x202
ucomiss nan, nan => 0x203
ucomiss nan, nan => 0x247
minss 1, 2 => 1
minss 2, 1 => 1
minss -inf, 2 => -inf
@ -161,7 +161,9 @@ phsubw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff
psignb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x80 0x7f 0x0 0x1 0xfe 0xfd 0x0 0xfe 0x84 0x8e 0x52 0xa5 0x0 0xc0 0x32
psignw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8000 0x8001 0x0 0x1 0xfffe 0xfffd 0x7fff
psignd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x80000000 0x7fffffff 0x0
pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0x0 0x8000 0x7ffe 0x0 0x0 0x0 0x0 0x7ffe
pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8001 0xffff 0x0 0x0 0xfffe 0x0 0x7ffe
pmulhrsw(0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x7ffe 0x0 0x0 0x0 0x6200 0x0 0x7ffe
pblendvps(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe , 0x1 0x80000000 0x80000005 0xfffe ) = 0xffffffff 0x80000000 0x5 0x0
ptestz(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0
ptestc(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0

View File

@ -161,7 +161,12 @@ phsubw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff
psignb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x80 0x7f 0x0 0x1 0xfe 0xfd 0x0 0xfe 0x84 0x8e 0x52 0xa5 0x0 0xc0 0x32
psignw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8000 0x8001 0x0 0x1 0xfffe 0xfffd 0x7fff
psignd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x80000000 0x7fffffff 0x0
pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0x0 0x8000 0x7ffe 0x0 0x0 0x0 0x0 0x7ffe
pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8001 0xffff 0x0 0x0 0xfffe 0x0 0x7ffe
pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x7ffe 0x20 0x7f00 0x1 0x8000 0xa050 0xfff1 0x8008 ) = 0xffff 0xffe0 0x7eff 0x0 0xffff 0xffff 0x0 0x7ff7
pmulhrsw(0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x7ffe 0x0 0x0 0x0 0x6200 0x0 0x7ffe
pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x7ffe 0x20 0x7f00 0x1 0x8000 0xa050 0xfff1 0x8008 ) = 0xffff 0xffe0 0x7eff 0x0 0xffff 0xffff 0x0 0x7ff7
pmulhrsw(0x7ffe 0x20 0x7f00 0x1 0x8000 0xa050 0xfff1 0x8008 , 0x7ffe 0x20 0x7f00 0x1 0x8000 0xa050 0xfff1 0x8008 ) = 0x7ffc 0x0 0x7e02 0x0 0x8000 0x4788 0x0 0x7ff0
pblendvps(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe , 0x1 0x80000000 0x80000005 0xfffe ) = 0xffffffff 0x80000000 0x5 0x0
ptestz(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0
ptestc(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0

Binary file not shown.

View File

@ -421,7 +421,9 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r);
GO2(sign, 8, psignb, a128_8, b128_8)
GO2(sign, 16, psignw, a128_16, b128_16)
GO2(sign, 32, psignd, a128_32, b128_32)
GO2(mulhrs, 16, pmulhrsw, a128_16, a128_16)
GO2(mulhrs, 16, pmulhrsw, a128_16, b128_16)
GO2(mulhrs, 16, pmulhrsw, b128_16, b128_16)
GO3PS(blendv, 32, a128_32, b128_32, c128_32)
GO2i(testz, a128_32, b128_32)
GO2i(testc, a128_32, b128_32)

Binary file not shown.

View File

@ -518,7 +518,12 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r);
GO2(sign, 8, psignb, a128_8, b128_8)
GO2(sign, 16, psignw, a128_16, b128_16)
GO2(sign, 32, psignd, a128_32, b128_32)
GO2(mulhrs, 16, pmulhrsw, a128_16, a128_16)
GO2(mulhrs, 16, pmulhrsw, a128_16, b128_16)
GO2(mulhrs, 16, pmulhrsw, a128_16, c128_16)
GO2(mulhrs, 16, pmulhrsw, b128_16, b128_16)
GO2(mulhrs, 16, pmulhrsw, a128_16, c128_16)
GO2(mulhrs, 16, pmulhrsw, c128_16, c128_16)
GO3PS(blendv, 32, a128_32, b128_32, c128_32)
GO2i(testz, a128_32, b128_32)
GO2i(testc, a128_32, b128_32)