Correct MMX and SSE PINSRW and PEXTRW

This commit is contained in:
Jonathan Campbell 2024-11-29 14:13:32 -08:00
parent ed6ac88ce6
commit 7fc19cdcc6
3 changed files with 45 additions and 14 deletions

View File

@ -126,8 +126,12 @@ union alignas(8) MMX_reg {
} uw;
static_assert(sizeof(uw) == 8, "MMX packing error");
uint16_t uwa[4]; /* for PSHUFW */
uint8_t uba[8];
uint16_t uwa[4];
uint32_t uda[2];
static_assert(sizeof(uba) == 8, "MMX packing error");
static_assert(sizeof(uwa) == 8, "MMX packing error");
static_assert(sizeof(uda) == 8, "MMX packing error");
static_assert(offsetof(uw_t,w0) == 0, "MMX packing error");
static_assert(offsetof(uw_t,w1) == 2, "MMX packing error");
static_assert(offsetof(uw_t,w2) == 4, "MMX packing error");

View File

@ -421,6 +421,8 @@
};
}
break;
#endif
#if CPU_CORE >= CPU_ARCHTYPE_386
CASE_0F_B(0x19) CASE_0F_B(0x1A) CASE_0F_B(0x1B) CASE_0F_B(0x1C) CASE_0F_B(0x1D) CASE_0F_B(0x1E) CASE_0F_B(0x1F) /* hinting NOPs */
if (CPU_ArchitectureType<CPU_ARCHTYPE_PPROSLOW) goto illegal_opcode;
break;
@ -1770,8 +1772,7 @@
#endif
#if CPU_CORE >= CPU_ARCHTYPE_386
CASE_0F_B(0xc4) /* SSE instruction group */
if (CPU_ArchitectureType<CPU_ARCHTYPE_PENTIUMIII || !CPU_SSE()) goto illegal_opcode;
CASE_0F_B(0xc4) /* MMX/SSE instruction group */
{
GetRM;
uint8_t imm;
@ -1780,14 +1781,27 @@
switch (last_prefix) {
case MP_NONE: /* 0F C4 PINSRW reg, r/m, imm8 */
if (CPU_ArchitectureType<CPU_ARCHTYPE_PENTIUMIII) goto illegal_opcode;
if (rm >= 0xc0) {
imm = Fetchb();
SSE_PINSRW(*reg_mmx[reg],cpu_regs.regs[rm & 7].dword[0],imm);
MMX_PINSRW(*reg_mmx[reg],cpu_regs.regs[rm & 7].dword[0],imm);
} else {
GetEAa;
src = LoadMd(eaa);
imm = Fetchb();
SSE_PINSRW(*reg_mmx[reg],src,imm);
MMX_PINSRW(*reg_mmx[reg],src,imm);
}
break;
case MP_66: /* 66 0F C4 PINSRW reg, r/m, imm8 */
if (CPU_ArchitectureType<CPU_ARCHTYPE_PENTIUMIII || !CPU_SSE()) goto illegal_opcode;
if (rm >= 0xc0) {
imm = Fetchb();
SSE_PINSRW(fpu.xmmreg[reg],cpu_regs.regs[rm & 7].dword[0],imm);
} else {
GetEAa;
src = LoadMd(eaa);
imm = Fetchb();
SSE_PINSRW(fpu.xmmreg[reg],src,imm);
}
break;
default:
@ -1799,7 +1813,6 @@
#if CPU_CORE >= CPU_ARCHTYPE_386
CASE_0F_B(0xc5) /* SSE instruction group */
if (CPU_ArchitectureType<CPU_ARCHTYPE_PENTIUMIII || !CPU_SSE()) goto illegal_opcode;
{
GetRM;
uint8_t imm;
@ -1809,7 +1822,16 @@
case MP_NONE: /* 0F C5 PEXTRW reg, r/m, imm8 */
if (rm >= 0xc0) {
imm = Fetchb();
SSE_PEXTRW(cpu_regs.regs[reg].dword[0],*reg_mmx[rm & 7],imm);
MMX_PEXTRW(cpu_regs.regs[reg].dword[0],*reg_mmx[rm & 7],imm);
} else {
goto illegal_opcode;
}
break;
case MP_66: /* 66 0F C5 PEXTRW reg, r/m, imm8 */
if (CPU_ArchitectureType<CPU_ARCHTYPE_PENTIUMIII || !CPU_SSE()) goto illegal_opcode;
if (rm >= 0xc0) {
imm = Fetchb();
SSE_PEXTRW(cpu_regs.regs[reg].dword[0],fpu.xmmreg[rm & 7],imm);
} else {
goto illegal_opcode;
}

View File

@ -527,17 +527,22 @@ static INLINE void SSE_CMPSS(XMM_Reg &d,const XMM_Reg &s,const uint8_t cf) {
////
static INLINE void SSE_PINSRW(MMX_reg &d,const uint32_t &s,const uint8_t i) {
const uint8_t shf = (i&3u)*16u;
const uint64_t mask = (uint64_t)0xFFFF << (uint64_t)shf;
d.q = (d.q & (~mask)) | (((uint64_t)(s&0xFFFFu)) << (uint64_t)shf);
static INLINE void MMX_PINSRW(MMX_reg &d,const uint32_t &s,const uint8_t i) {
d.uwa[i&3u] = (uint16_t)s;
}
static INLINE void SSE_PINSRW(XMM_Reg &d,const uint32_t &s,const uint8_t i) {
d.u16[i&7u] = (uint16_t)s;
}
////
static INLINE void SSE_PEXTRW(uint32_t &d,const MMX_reg &s,const uint8_t i) {
const uint8_t shf = (i&3u)*16u;
d = (s.q >> (uint64_t)shf) & (uint64_t)0xFFFFu;
static INLINE void MMX_PEXTRW(uint32_t &d,const MMX_reg &s,const uint8_t i) {
d = s.uwa[i&3u];
}
static INLINE void SSE_PEXTRW(uint32_t &d,const XMM_Reg &s,const uint8_t i) {
d = s.u16[i&7u];
}
////