[INTERP] Fixes and improvments to SSE/AVX packed shift opcodes

This commit is contained in:
ptitSeb 2025-04-28 11:36:50 +02:00
parent ebaea69204
commit 5a4021d72b
3 changed files with 40 additions and 84 deletions

View File

@ -1971,14 +1971,12 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
nextop = F8;
GETEM(0);
GETGM;
if(EM->q>31) {
for(int i=0; i<2; ++i)
GM->sd[i] = (GM->sd[i]<0)?-1:0;
} else {
if(EM->q>31)
tmp8u = 31;
else
tmp8u = EM->ub[0];
for(int i=0; i<2; ++i)
GM->sd[i] >>= tmp8u;
}
for(int i=0; i<2; ++i)
GM->sd[i] >>= tmp8u;
break;
case 0xE3: /* PAVGW Gm, Em */
nextop = F8;
@ -2085,7 +2083,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
else {
tmp8u = EM->ub[0];
for(int i=0; i<4; ++i)
GM->sw[i] <<= tmp8u;
GM->uw[i] <<= tmp8u;
}
break;
case 0xF2: /* PSLLD Gm, Em */
@ -2097,7 +2095,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
else {
tmp8u = EM->ub[0];
for(int i=0; i<2; ++i)
GM->sd[i] <<= tmp8u;
GM->ud[i] <<= tmp8u;
}
break;
case 0xF3: /* PSLLQ Gm, Em */

View File

@ -1649,17 +1649,9 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
case 3: /* PSRLDQ Ex, Ib */
tmp8u = F8;
if(tmp8u>15)
{EX->q[0] = EX->q[1] = 0;}
else if (tmp8u!=0) {
tmp8u*=8;
if (tmp8u < 64) {
EX->q[0] = (EX->q[0] >> tmp8u) | (EX->q[1] << (64 - tmp8u));
EX->q[1] = (EX->q[1] >> tmp8u);
} else {
EX->q[0] = EX->q[1] >> (tmp8u - 64);
EX->q[1] = 0;
}
}
EX->u128 = 0;
else if (tmp8u)
EX->u128 >>= (tmp8u<<3);
break;
case 6: /* PSLLQ Ex, Ib */
tmp8u = F8;
@ -1671,17 +1663,9 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
case 7: /* PSLLDQ Ex, Ib */
tmp8u = F8;
if(tmp8u>15)
{EX->q[0] = EX->q[1] = 0;}
else if (tmp8u!=0) {
tmp8u*=8;
if (tmp8u < 64) {
EX->q[1] = (EX->q[1] << tmp8u) | (EX->q[0] >> (64 - tmp8u));
EX->q[0] = (EX->q[0] << tmp8u);
} else {
EX->q[1] = EX->q[0] << (tmp8u - 64);
EX->q[0] = 0;
}
}
EX->u128 = 0;
else if (tmp8u)
EX->u128 <<= (tmp8u<<3);
break;
default:
return 0;

View File

@ -977,29 +977,17 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
case 3: /* VPSRLDQ Vx, Ex, Ib */
tmp8u = F8;
if(tmp8u>15) VX->u128 = 0;
else if (tmp8u!=0) {
u8=tmp8u*8;
if (u8 < 64) {
VX->q[0] = (EX->q[0] >> u8) | (EX->q[1] << (64 - u8));
VX->q[1] = (EX->q[1] >> u8);
} else {
VX->q[0] = EX->q[1] >> (u8 - 64);
VX->q[1] = 0;
}
} else VX->u128 = EX->u128;
else if (tmp8u)
VX->u128 = EX->u128 >> (tmp8u<<3);
else
VX->u128 = EX->u128;
if(vex.l) {
GETEY;
if(tmp8u>15) VY->u128 = 0;
else if (tmp8u!=0) {
u8=tmp8u*8;
if (u8 < 64) {
VY->q[0] = (EY->q[0] >> u8) | (EY->q[1] << (64 - u8));
VY->q[1] = (EY->q[1] >> u8);
} else {
VY->q[0] = EY->q[1] >> (u8 - 64);
VY->q[1] = 0;
}
} else VY->u128 = EY->u128;
else if (tmp8u)
VY->u128 = EY->u128 >> (tmp8u<<3);
else
VY->u128 = EY->u128;
}
break;
case 6: /* VPSLLQ Vx, Ex, Ib */
@ -1017,30 +1005,16 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
case 7: /* VPSLLDQ Vx, Ex, Ib */
tmp8u = F8;
if(tmp8u>15) VX->u128 = 0;
else if (tmp8u!=0) {
u8=tmp8u<<3;
if (u8 < 64) {
VX->q[1] = (EX->q[1] << u8) | (EX->q[0] >> (64 - u8));
VX->q[0] = (EX->q[0] << u8);
} else {
VX->q[1] = EX->q[0] << (u8 - 64);
VX->q[0] = 0;
}
} else
else if (tmp8u!=0)
VX->u128 = EX->u128 << (tmp8u<<3);
else
VX->u128 = EX->u128;
if(vex.l) {
GETEY;
if(tmp8u>15) VY->u128 = 0;
else if (tmp8u!=0) {
u8=tmp8u<<3;
if (u8 < 64) {
VY->q[1] = (EY->q[1] << u8) | (EY->q[0] >> (64 - u8));
VY->q[0] = (EY->q[0] << u8);
} else {
VY->q[1] = EY->q[0] << (u8 - 64);
VY->q[0] = 0;
}
} else
else if (tmp8u)
VY->u128 = EY->u128 << (tmp8u<<3);
else
VY->u128 = EY->u128;
}
break;
@ -1343,7 +1317,7 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
else
{tmp8u=tmp64u; for (int i=0; i<8; ++i) GX->uw[i] = VX->uw[i] >> tmp8u;}
if(vex.l) {
GETEY; GETVY;
GETVY;
if(tmp64u>15) GY->u128 = 0;
else
{tmp8u=tmp64u; for (int i=0; i<8; ++i) GY->uw[i] = VY->uw[i] >> tmp8u;}
@ -1360,7 +1334,7 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
else
{tmp8u=tmp64u; for (int i=0; i<4; ++i) GX->ud[i] = VX->ud[i] >> tmp8u;}
if(vex.l) {
GETEY; GETVY;
GETVY;
if(tmp64u>31) GY->u128 = 0;
else
{tmp8u=tmp64u; for (int i=0; i<4; ++i) GY->ud[i] = VY->ud[i] >> tmp8u;}
@ -1377,7 +1351,7 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
else
{tmp8u=tmp64u; for (int i=0; i<2; ++i) GX->q[i] = VX->q[i] >> tmp8u;}
if(vex.l) {
GETEY; GETVY;
GETVY;
if(tmp64u>63) GY->u128 = 0;
else
{tmp8u=tmp64u; for (int i=0; i<2; ++i) GY->q[i] = VY->q[i] >> tmp8u;}
@ -1614,7 +1588,7 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
for (int i=0; i<8; ++i)
GX->sw[i] = VX->sw[i] >> tmp8u;
if(vex.l) {
GETEY; GETVY;
GETVY;
for (int i=0; i<8; ++i)
GY->sw[i] = VY->sw[i] >> tmp8u;
} else
@ -1880,11 +1854,11 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
else
{tmp8u=EX->ub[0]; for (int i=0; i<8; ++i) GX->uw[i] = VX->uw[i]<<tmp8u;}
if(vex.l) {
GETEY; GETVY;
if(EY->q[0]>15)
GETVY;
if(EX->q[0]>15)
GY->u128 = 0;
else
{tmp8u=EY->ub[0]; for (int i=0; i<8; ++i) GY->uw[i] = VY->uw[i]<<tmp8u;}
{for (int i=0; i<8; ++i) GY->uw[i] = VY->uw[i]<<tmp8u;}
} else
GY->u128 = 0;
break;
@ -1897,11 +1871,11 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
else
{tmp8u=EX->ub[0]; for (int i=0; i<4; ++i) GX->ud[i] = VX->ud[i]<<tmp8u;}
if(vex.l) {
GETEY; GETVY;
if(EY->q[0]>31)
GETVY;
if(EX->q[0]>31)
GY->u128 = 0;
else
{tmp8u=EY->ub[0]; for (int i=0; i<4; ++i) GY->ud[i] = VY->ud[i]<<tmp8u;}
{for (int i=0; i<4; ++i) GY->ud[i] = VY->ud[i]<<tmp8u;}
} else
GY->u128 = 0;
break;
@ -1914,11 +1888,11 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
else
{tmp8u=EX->ub[0]; for (int i=0; i<2; ++i) GX->q[i] = VX->q[i]<<tmp8u;}
if(vex.l) {
GETEY; GETVY;
if(EY->q[0]>63)
GETVY;
if(EX->q[0]>63)
GY->u128 = 0;
else
{tmp8u=EY->ub[0]; for (int i=0; i<2; ++i) GY->q[i] = VY->q[i]<<tmp8u;}
{for (int i=0; i<2; ++i) GY->q[i] = VY->q[i]<<tmp8u;}
} else
GY->u128 = 0;
break;