diff --git a/include/fpu.h b/include/fpu.h index 8969308e2..e27f88896 100644 --- a/include/fpu.h +++ b/include/fpu.h @@ -126,8 +126,12 @@ union alignas(8) MMX_reg { } uw; static_assert(sizeof(uw) == 8, "MMX packing error"); - uint16_t uwa[4]; /* for PSHUFW */ + uint8_t uba[8]; + uint16_t uwa[4]; + uint32_t uda[2]; + static_assert(sizeof(uba) == 8, "MMX packing error"); static_assert(sizeof(uwa) == 8, "MMX packing error"); + static_assert(sizeof(uda) == 8, "MMX packing error"); static_assert(offsetof(uw_t,w0) == 0, "MMX packing error"); static_assert(offsetof(uw_t,w1) == 2, "MMX packing error"); static_assert(offsetof(uw_t,w2) == 4, "MMX packing error"); diff --git a/src/cpu/core_normal/prefix_0f.h b/src/cpu/core_normal/prefix_0f.h index cd3e8fc62..5f2aea1ea 100644 --- a/src/cpu/core_normal/prefix_0f.h +++ b/src/cpu/core_normal/prefix_0f.h @@ -421,6 +421,8 @@ }; } break; +#endif +#if CPU_CORE >= CPU_ARCHTYPE_386 CASE_0F_B(0x19) CASE_0F_B(0x1A) CASE_0F_B(0x1B) CASE_0F_B(0x1C) CASE_0F_B(0x1D) CASE_0F_B(0x1E) CASE_0F_B(0x1F) /* hinting NOPs */ if (CPU_ArchitectureType= CPU_ARCHTYPE_386 - CASE_0F_B(0xc4) /* SSE instruction group */ - if (CPU_ArchitectureType= 0xc0) { imm = Fetchb(); - SSE_PINSRW(*reg_mmx[reg],cpu_regs.regs[rm & 7].dword[0],imm); + MMX_PINSRW(*reg_mmx[reg],cpu_regs.regs[rm & 7].dword[0],imm); } else { GetEAa; src = LoadMd(eaa); imm = Fetchb(); - SSE_PINSRW(*reg_mmx[reg],src,imm); + MMX_PINSRW(*reg_mmx[reg],src,imm); + } + break; + case MP_66: /* 66 0F C4 PINSRW reg, r/m, imm8 */ + if (CPU_ArchitectureType= 0xc0) { + imm = Fetchb(); + SSE_PINSRW(fpu.xmmreg[reg],cpu_regs.regs[rm & 7].dword[0],imm); + } else { + GetEAa; + src = LoadMd(eaa); + imm = Fetchb(); + SSE_PINSRW(fpu.xmmreg[reg],src,imm); } break; default: @@ -1799,7 +1813,6 @@ #if CPU_CORE >= CPU_ARCHTYPE_386 CASE_0F_B(0xc5) /* SSE instruction group */ - if (CPU_ArchitectureType= 0xc0) { imm = Fetchb(); - SSE_PEXTRW(cpu_regs.regs[reg].dword[0],*reg_mmx[rm & 7],imm); + MMX_PEXTRW(cpu_regs.regs[reg].dword[0],*reg_mmx[rm & 7],imm); + } else { + goto illegal_opcode; + } + break; + case MP_66: /* 66 0F C5 PEXTRW reg, r/m, imm8 */ + if (CPU_ArchitectureType= 0xc0) { + imm = Fetchb(); + SSE_PEXTRW(cpu_regs.regs[reg].dword[0],fpu.xmmreg[rm & 7],imm); } else { goto illegal_opcode; } diff --git a/src/cpu/core_normal/support.h b/src/cpu/core_normal/support.h index cd26d86f4..c80fee902 100644 --- a/src/cpu/core_normal/support.h +++ b/src/cpu/core_normal/support.h @@ -527,17 +527,22 @@ static INLINE void SSE_CMPSS(XMM_Reg &d,const XMM_Reg &s,const uint8_t cf) { //// -static INLINE void SSE_PINSRW(MMX_reg &d,const uint32_t &s,const uint8_t i) { - const uint8_t shf = (i&3u)*16u; - const uint64_t mask = (uint64_t)0xFFFF << (uint64_t)shf; - d.q = (d.q & (~mask)) | (((uint64_t)(s&0xFFFFu)) << (uint64_t)shf); +static INLINE void MMX_PINSRW(MMX_reg &d,const uint32_t &s,const uint8_t i) { + d.uwa[i&3u] = (uint16_t)s; +} + +static INLINE void SSE_PINSRW(XMM_Reg &d,const uint32_t &s,const uint8_t i) { + d.u16[i&7u] = (uint16_t)s; } //// -static INLINE void SSE_PEXTRW(uint32_t &d,const MMX_reg &s,const uint8_t i) { - const uint8_t shf = (i&3u)*16u; - d = (s.q >> (uint64_t)shf) & (uint64_t)0xFFFFu; +static INLINE void MMX_PEXTRW(uint32_t &d,const MMX_reg &s,const uint8_t i) { + d = s.uwa[i&3u]; +} + +static INLINE void SSE_PEXTRW(uint32_t &d,const XMM_Reg &s,const uint8_t i) { + d = s.u16[i&7u]; } ////