mirror of
https://github.com/ptitSeb/box86.git
synced 2025-05-08 21:08:57 +08:00
Added better handling of SSE float/double border case ([DYNAREC] too, with BOX86_DYNAREC_FASTNAN)
This commit is contained in:
parent
13689e7eac
commit
926ff0f4f8
@ -132,6 +132,11 @@ Force the use of Double for x87 emulation
|
||||
* 0 : Try to use float when possible for x87 emulation (faster)
|
||||
* 1 : Only use Double for x87 emulation (slower, may be needed for some specific games, like Crysis)
|
||||
|
||||
#### BOX86_DYNAREC_FASTNAN
|
||||
Enable/Disable generation of -NAN
|
||||
* 0 : Generate -NAN like on x86 (Default.)
|
||||
* 1 : Don't do anything special with NAN, to go as fast as possible (was default before this option exsted)
|
||||
|
||||
#### BOX86_LIBGL
|
||||
* libXXXX set the name for libGL (defaults to libGL.so.1).
|
||||
* /PATH/TO/libGLXXX : Sets the name and path for libGL
|
||||
|
@ -957,6 +957,7 @@ Op is 20-27
|
||||
#define VCEQQ_0_8(Dd, Dm) EMIT(VCEQ_0_gen(((Dd)>>4)&1, 0, (Dd)&15, 0, 1, ((Dm)>>4)&1, (Dm)&15))
|
||||
#define VCEQQ_0_16(Dd, Dm) EMIT(VCEQ_0_gen(((Dd)>>4)&1, 1, (Dd)&15, 0, 1, ((Dm)>>4)&1, (Dm)&15))
|
||||
#define VCEQQ_0_32(Dd, Dm) EMIT(VCEQ_0_gen(((Dd)>>4)&1, 2, (Dd)&15, 0, 1, ((Dm)>>4)&1, (Dm)&15))
|
||||
#define VCEQQ_0_F32(Dd, Dm) EMIT(VCEQ_0_gen(((Dd)>>4)&1, 2, (Dd)&15, 1, 1, ((Dm)>>4)&1, (Dm)&15))
|
||||
|
||||
#define VCGT_I_gen(U, D, size, Vn, Vd, N, Q, M, Vm) (0b1111<<28 | 0b001<<25 | (U)<<24 | 0<<23 | (D)<<22 | (size)<<20 | (Vn)<<16 | (Vd)<<12 | 0b0011<<8 | (N)<<7 | (Q)<<6 | (M)<<5 | (Vm))
|
||||
#define VCGT_U8(Dd, Dn, Dm) EMIT(VCGT_I_gen(1, ((Dd)>>4)&1, 0, (Dn)&15, (Dd)&15, ((Dn)>>4)&1, 0, ((Dm)>>4)&1, (Dm)&15))
|
||||
|
@ -627,7 +627,24 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VMULQ_F32(v1, v2, v2); // v1 = x1²
|
||||
VRSQRTSQ_F32(v1, v1, q0); // v1 = (3-d*X1²)/2
|
||||
VMULQ_F32(v2, v2, v1); // v2 = X1*(3-d*X1²)/2 = X2
|
||||
VMULQ_F32(v0, v2, q0); // v0 = X2*d ~ SQRT(d)
|
||||
if(!box86_dynarec_fastnan) {
|
||||
// need to preserve v0, in case it's == q0
|
||||
VMULQ_F32(v1, v2, q0); // v1 = X2*d ~ SQRT(d)
|
||||
if(v0<dyn->n.fpu_scratch || v0!=q0)
|
||||
q1 = v0; // v0 is a scratch register
|
||||
else
|
||||
q1 = fpu_get_scratch_quad(dyn); // need a new scratch
|
||||
VCEQQ_0_F32(q1, q0); // prepare mask, 111 when == 0.0
|
||||
VMOVQ_H32(v2, 0xff); // prepare +inf
|
||||
VSHRQ_U32(v2, v2, 1); // 0xff000000 -> 0x7f800000 == +inf
|
||||
VCEQQ_32(v2, v2, q0); // v2 mask: 111 when == +inf
|
||||
VORRQ(q1, q1, v2); // q1 mask: 111 when ==0 || == +inf
|
||||
VBICQ(v1, v1, q1); // mask value in result
|
||||
VANDQ(q1, q0, q1); // keep original value that are 0 or +inf
|
||||
VORRQ(v0, v1, q1); // copy over
|
||||
} else {
|
||||
VMULQ_F32(v0, v2, q0); // v0 = X2*d ~ SQRT(d)
|
||||
}
|
||||
break;
|
||||
case 0x52:
|
||||
INST_NAME("RSQRTPS Gx, Ex");
|
||||
|
@ -627,9 +627,28 @@ uintptr_t dynarec660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nins
|
||||
INST_NAME("MULPD Gx, Ex");
|
||||
nextop = F8;
|
||||
GETEX(q0, 0);
|
||||
GETGX(v0, 1);
|
||||
VMUL_F64(v0, v0, q0);
|
||||
VMUL_F64(v0+1, v0+1, q0+1);
|
||||
GETGX(q1, 1);
|
||||
if(!box86_dynarec_fastnan) {
|
||||
VMRS(x14); // get fpscr
|
||||
ORR_IMM8(x3, x14, 0b001, 6); // enable exceptions
|
||||
BIC_IMM8(x3, x3, 0b10011111, 0);
|
||||
VMSR(x3);
|
||||
}
|
||||
VMUL_F64(q1, q1, q0);
|
||||
if(!box86_dynarec_fastnan) {
|
||||
VMRS(x3); // get the FPCSR reg and test FPU execption (invalid operation only)
|
||||
TSTS_IMM8_ROR(x3, 0b00000001, 0);
|
||||
VNEG_F64_cond(cNE, q1, q1);
|
||||
ORR_IMM8(x3, x14, 0b001, 6); // enable exceptions
|
||||
BIC_IMM8(x3, x3, 0b10011111, 0);
|
||||
VMSR(x3);
|
||||
}
|
||||
VMUL_F64(q1+1, q1+1, q0+1);
|
||||
if(!box86_dynarec_fastnan) {
|
||||
VMRS(x3); // get the FPCSR reg and test FPU execption (invalid operation only)
|
||||
TSTS_IMM8_ROR(x3, 0b00000001, 0);
|
||||
VNEG_F64_cond(cNE, q1+1, q1+1);
|
||||
}
|
||||
break;
|
||||
case 0x5A:
|
||||
INST_NAME("CVTPD2PS Gx, Ex");
|
||||
@ -677,9 +696,28 @@ uintptr_t dynarec660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nins
|
||||
INST_NAME("DIVPD Gx, Ex");
|
||||
nextop = F8;
|
||||
GETEX(q0, 0);
|
||||
GETGX(v0, 1);
|
||||
VDIV_F64(v0, v0, q0);
|
||||
VDIV_F64(v0+1, v0+1, q0+1);
|
||||
GETGX(q1, 1);
|
||||
if(!box86_dynarec_fastnan) {
|
||||
VMRS(x14); // get fpscr
|
||||
ORR_IMM8(x3, x14, 0b001, 6); // enable exceptions
|
||||
BIC_IMM8(x3, x3, 0b10011111, 0);
|
||||
VMSR(x3);
|
||||
}
|
||||
VDIV_F64(q1, q1, q0);
|
||||
if(!box86_dynarec_fastnan) {
|
||||
VMRS(x3); // get the FPCSR reg and test FPU execption (invalid operation only)
|
||||
TSTS_IMM8_ROR(x3, 0b00000001, 0);
|
||||
VNEG_F64_cond(cNE, q1, q1);
|
||||
ORR_IMM8(x3, x14, 0b001, 6); // enable exceptions
|
||||
BIC_IMM8(x3, x3, 0b10011111, 0);
|
||||
VMSR(x3);
|
||||
}
|
||||
VDIV_F64(q1+1, q1+1, q0+1);
|
||||
if(!box86_dynarec_fastnan) {
|
||||
VMRS(x3); // get the FPCSR reg and test FPU execption (invalid operation only)
|
||||
TSTS_IMM8_ROR(x3, 0b00000001, 0);
|
||||
VNEG_F64_cond(cNE, q1+1, q1+1);
|
||||
}
|
||||
break;
|
||||
case 0x5F:
|
||||
INST_NAME("MAXPD Gx, Ex");
|
||||
|
@ -163,7 +163,18 @@ uintptr_t dynarecF20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nins
|
||||
gd = (nextop&0x38)>>3;
|
||||
v0 = sse_get_reg(dyn, ninst, x1, gd, 1);
|
||||
GETEX(d0, 0);
|
||||
if(!box86_dynarec_fastnan) {
|
||||
VMRS(x14); // get fpscr
|
||||
ORR_IMM8(x3, x14, 0b001, 6); // enable exceptions
|
||||
BIC_IMM8(x3, x3, 0b10011111, 0);
|
||||
VMSR(x3);
|
||||
}
|
||||
VSQRT_F64(v0, d0);
|
||||
if(!box86_dynarec_fastnan) {
|
||||
VMRS(x3); // get the FPCSR reg and test FPU execption (invalid operation only)
|
||||
TSTS_IMM8_ROR(x3, 0b00000001, 0);
|
||||
VNEG_F64_cond(cNE, v0, v0);
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x58:
|
||||
@ -223,7 +234,18 @@ uintptr_t dynarecF20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nins
|
||||
gd = (nextop&0x38)>>3;
|
||||
v0 = sse_get_reg(dyn, ninst, x1, gd, 1);
|
||||
GETEX(d0, 0);
|
||||
if(!box86_dynarec_fastnan) {
|
||||
VMRS(x14); // get fpscr
|
||||
ORR_IMM8(x3, x14, 0b001, 6); // enable exceptions
|
||||
BIC_IMM8(x3, x3, 0b10011111, 0);
|
||||
VMSR(x3);
|
||||
}
|
||||
VDIV_F64(v0, v0, d0);
|
||||
if(!box86_dynarec_fastnan) {
|
||||
VMRS(x3); // get the FPCSR reg and test FPU execption (invalid operation only)
|
||||
TSTS_IMM8_ROR(x3, 0b00000001, 0);
|
||||
VNEG_F64_cond(cNE, v0, v0);
|
||||
}
|
||||
break;
|
||||
case 0x5F:
|
||||
INST_NAME("MAXSD Gx, Ex");
|
||||
|
@ -449,8 +449,18 @@
|
||||
_0f_0x52: /* RSQRTPS Gx, Ex */
|
||||
nextop = F8;
|
||||
GET_EX;
|
||||
for(int i=0; i<4; ++i)
|
||||
GX.f[i] = 1.0f/sqrtf(EX->f[i]);
|
||||
for(int i=0; i<4; ++i) {
|
||||
if(EX->f[i]==0)
|
||||
GX.f[i] = 1.0f/EX->f[i];
|
||||
else if (EX->f[i]<0)
|
||||
GX.f[i] = NAN;
|
||||
else if (isnan(EX->f[i]))
|
||||
GX.f[i] = EX->f[i];
|
||||
else if (isinf(EX->f[i]))
|
||||
GX.f[i] = 0.0;
|
||||
else
|
||||
GX.f[i] = 1.0f/sqrtf(EX->f[i]);
|
||||
}
|
||||
NEXT;
|
||||
_0f_0x53: /* RCPPS Gx, Ex */
|
||||
nextop = F8;
|
||||
|
@ -53,7 +53,10 @@
|
||||
case 0x51: /* SQRTSD Gx, Ex */
|
||||
nextop = F8;
|
||||
GET_EX;
|
||||
GX.d[0] = sqrt(EX->d[0]);
|
||||
if(EX->d[0]<0.0 )
|
||||
GX.d[0] = -NAN;
|
||||
else
|
||||
GX.d[0] = sqrt(EX->d[0]);
|
||||
break;
|
||||
|
||||
case 0x58: /* ADDSD Gx, Ex */
|
||||
@ -80,13 +83,20 @@
|
||||
case 0x5D: /* MINSD Gx, Ex */
|
||||
nextop = F8;
|
||||
GET_EX;
|
||||
if (isnan(GX.d[0]) || isnan(EX->d[0]) || isless(EX->d[0], GX.d[0]))
|
||||
if (isnan(GX.d[0]) || isnan(EX->d[0]) || (EX->d[0]<GX.d[0]))
|
||||
GX.d[0] = EX->d[0];
|
||||
break;
|
||||
case 0x5E: /* DIVSD Gx, Ex */
|
||||
nextop = F8;
|
||||
GET_EX;
|
||||
#ifndef NOALIGN
|
||||
is_nan = isnan(GX.d[0]) || isnan(EX->d[0]);
|
||||
#endif
|
||||
GX.d[0] /= EX->d[0];
|
||||
#ifndef NOALIGN
|
||||
if(!is_nan && isnan(GX.d[0]))
|
||||
GX.d[0] = -NAN;
|
||||
#endif
|
||||
break;
|
||||
case 0x5F: /* MAXSD Gx, Ex */
|
||||
nextop = F8;
|
||||
|
@ -46,6 +46,9 @@ int Run(x86emu_t *emu, int step)
|
||||
int64_t ll;
|
||||
sse_regs_t *opex, eax1;
|
||||
mmx87_regs_t *opem, eam1;
|
||||
#ifndef NOALIGN
|
||||
int is_nan;
|
||||
#endif
|
||||
|
||||
if(emu->quit)
|
||||
return 0;
|
||||
|
@ -38,6 +38,9 @@ void Run660F(x86emu_t *emu)
|
||||
int32_t tmp32s;
|
||||
sse_regs_t *opex, eax1, *opx2;
|
||||
mmx87_regs_t *opem;
|
||||
#ifndef NOALIGN
|
||||
int is_nan;
|
||||
#endif
|
||||
|
||||
|
||||
opcode = F8;
|
||||
@ -591,8 +594,14 @@ void Run660F(x86emu_t *emu)
|
||||
case 0x51: /* SQRTPD Gx, Ex */
|
||||
nextop = F8;
|
||||
GET_EX;
|
||||
GX.d[0] = sqrt(EX->d[0]);
|
||||
GX.d[1] = sqrt(EX->d[1]);
|
||||
for (int i=0; i<2; ++i) {
|
||||
#ifndef NOALIGN
|
||||
if(EX->d[i]<0.0) // on x86, default nan are negative
|
||||
GX.d[i] = -NAN; // but input NAN are not touched (so sqrt(+nan) -> +nan)
|
||||
else
|
||||
#endif
|
||||
GX.d[i] = sqrt(EX->d[i]);
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x54: /* ANDPD Gx, Ex */
|
||||
@ -628,8 +637,15 @@ void Run660F(x86emu_t *emu)
|
||||
case 0x59: /* MULPD Gx, Ex */
|
||||
nextop = F8;
|
||||
GET_EX;
|
||||
GX.d[0] *= EX->d[0];
|
||||
GX.d[1] *= EX->d[1];
|
||||
for(int i=0; i<2; ++i) {
|
||||
#ifndef NOALIGN
|
||||
// mul generate a -NAN only if doing (+/-)inf * (+/-)0
|
||||
if((isinf(GX.d[i]) && EX->d[i]==0.0) || (isinf(EX->d[i]) && GX.d[i]==0.0))
|
||||
GX.d[i] = -NAN;
|
||||
else
|
||||
#endif
|
||||
GX.d[i] *= EX->d[i];
|
||||
}
|
||||
break;
|
||||
case 0x5A: /* CVTPD2PS Gx, Ex */
|
||||
nextop = F8;
|
||||
@ -685,8 +701,16 @@ void Run660F(x86emu_t *emu)
|
||||
case 0x5E: /* DIVPD Gx, Ex */
|
||||
nextop = F8;
|
||||
GET_EX;
|
||||
GX.d[0] /= EX->d[0];
|
||||
GX.d[1] /= EX->d[1];
|
||||
for (int i=0; i<2; ++i) {
|
||||
#ifndef NOALIGN
|
||||
is_nan = isnan(GX.d[i]) || isnan(EX->d[i]);
|
||||
#endif
|
||||
GX.d[i] /= EX->d[i];
|
||||
#ifndef NOALIGN
|
||||
if(!is_nan && isnan(GX.d[i]))
|
||||
GX.d[i] = -NAN;
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case 0x5F: /* MAXPD Gx, Ex */
|
||||
nextop = F8;
|
||||
@ -975,11 +999,25 @@ void Run660F(x86emu_t *emu)
|
||||
case 0x7C: /* HADDPD Gx, Ex */
|
||||
nextop = F8;
|
||||
GET_EX;
|
||||
#ifndef NOALIGN
|
||||
is_nan = isnan(GX.d[0]) || isnan(GX.d[1]);
|
||||
#endif
|
||||
GX.d[0] += GX.d[1];
|
||||
#ifndef NOALIGN
|
||||
if(!is_nan && isnan(GX.d[0]))
|
||||
GX.d[0] = -NAN;
|
||||
#endif
|
||||
if(EX==&GX) {
|
||||
GX.d[1] = GX.d[0];
|
||||
} else {
|
||||
#ifndef NOALIGN
|
||||
is_nan = isnan(EX->d[0]) || isnan(EX->d[1]);
|
||||
#endif
|
||||
GX.d[1] = EX->d[0] + EX->d[1];
|
||||
#ifndef NOALIGN
|
||||
if(!is_nan && isnan(GX.d[1]))
|
||||
GX.d[1] = -NAN;
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -17,6 +17,7 @@ extern int box86_dynarec_bigblock;
|
||||
extern int box86_dynarec_strongmem;
|
||||
extern int box86_dynarec_x87double;
|
||||
extern uintptr_t box86_nodynarec_start, box86_nodynarec_end;
|
||||
extern int box86_dynarec_fastnan;
|
||||
#ifdef ARM
|
||||
extern int arm_vfp; // vfp version (3 or 4), with 32 registers is mendatory
|
||||
extern int arm_swap;
|
||||
|
10
src/main.c
10
src/main.c
@ -55,6 +55,7 @@ int box86_dynarec_largest = 0;
|
||||
int box86_dynarec_bigblock = 1;
|
||||
int box86_dynarec_strongmem = 0;
|
||||
int box86_dynarec_x87double = 0;
|
||||
int box86_dynarec_fastnan = 0;
|
||||
uintptr_t box86_nodynarec_start = 0;
|
||||
uintptr_t box86_nodynarec_end = 0;
|
||||
#ifdef ARM
|
||||
@ -340,6 +341,15 @@ void LoadLogEnv()
|
||||
if(box86_dynarec_x87double)
|
||||
printf_log(LOG_INFO, "Dynarec will use only double for x87 emulation\n");
|
||||
}
|
||||
p = getenv("BOX86_DYNAREC_FASTNAN");
|
||||
if(p) {
|
||||
if(strlen(p)==1) {
|
||||
if(p[0]>='0' && p[0]<='1')
|
||||
box86_dynarec_fastnan = p[0]-'0';
|
||||
}
|
||||
if(box86_dynarec_fastnan)
|
||||
printf_log(LOG_INFO, "Dynarec will not try to normalize generated NAN\n");
|
||||
}
|
||||
p = getenv("BOX86_NODYNAREC");
|
||||
if(p) {
|
||||
if (strchr(p,'-')) {
|
||||
|
353
tests/ref17.txt
353
tests/ref17.txt
@ -181,3 +181,356 @@ pmovzxbq(0xffffffffffffffff 0x8000000000000000 ) = 0xff 0xff
|
||||
pmovzxwd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xffff 0xffff 0x0 0x8000
|
||||
pmovzxwq(0xffffffffffffffff 0x8000000000000000 ) = 0xffff 0xffff
|
||||
pmovzxdq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffff 0xffffffff
|
||||
pminsd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x80000000 0x5 0xfffffffe
|
||||
pmaxsd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x1 0x80000000 0x7fffffff 0x0
|
||||
pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 0) = 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001
|
||||
pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 255) = 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001
|
||||
pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 170) = 0xffff 0x7fff 0x7fff 0xffff 0x1 0x9000 0x3 0x8001
|
||||
pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 2) = 0xffff 0x7fff 0x7fff 0x0 0x1 0x2 0x3 0x8001
|
||||
palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 0) = 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1
|
||||
palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 2) = 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 0xff 0x80
|
||||
palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 7) = 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 0xff 0x80 0x7f 0x0 0x1 0x2 0x3
|
||||
palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 15) = 0x1 0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0
|
||||
palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 16) = 0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32
|
||||
palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 255) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
|
||||
movmskpd(0xffffffffffffffff 0x8000000000000000 ) = 0x3
|
||||
psqrtpd(1 2 ) = 1 1.41421
|
||||
psqrtpd(0 -2 ) = 0 0xfff8000000000000
|
||||
psqrtpd(inf -inf ) = inf 0xfff8000000000000
|
||||
psqrtpd(0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0
|
||||
andpd(1 2 , 0 -2 ) = 0 2
|
||||
andpd(0 -2 , inf -inf ) = 0 -2
|
||||
andpd(1 2 , 0x7ff8000000000000 -0 ) = 1 0
|
||||
andpd(0 -2 , 0x7ff8000000000000 -0 ) = 0 -0
|
||||
andpd(inf -inf , 0x7ff8000000000000 -0 ) = inf -0
|
||||
andpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0
|
||||
andnpd(1 2 , 0 -2 ) = 0 -0
|
||||
andnpd(0 -2 , inf -inf ) = inf 1
|
||||
andnpd(1 2 , 0x7ff8000000000000 -0 ) = 3 -0
|
||||
andnpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0
|
||||
andnpd(inf -inf , 0x7ff8000000000000 -0 ) = 1.11254e-308 0
|
||||
andnpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0 0
|
||||
orpd(1 2 , 0 -2 ) = 1 -2
|
||||
orpd(0 -2 , inf -inf ) = inf -inf
|
||||
orpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2
|
||||
orpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2
|
||||
orpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf
|
||||
orpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0
|
||||
xorpd(1 2 , 0 -2 ) = 1 -0
|
||||
xorpd(0 -2 , inf -inf ) = inf 1
|
||||
xorpd(1 2 , 0x7ff8000000000000 -0 ) = 3 -2
|
||||
xorpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2
|
||||
xorpd(inf -inf , 0x7ff8000000000000 -0 ) = 1.11254e-308 inf
|
||||
xorpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0 0
|
||||
addpd(1 2 , 0 -2 ) = 1 0
|
||||
addpd(0 -2 , inf -inf ) = inf -inf
|
||||
addpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2
|
||||
addpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2
|
||||
addpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf
|
||||
addpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0
|
||||
mulpd(1 2 , 0 -2 ) = 0 -4
|
||||
mulpd(0 -2 , inf -inf ) = 0xfff8000000000000 inf
|
||||
mulpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0
|
||||
mulpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0
|
||||
mulpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0xfff8000000000000
|
||||
mulpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0
|
||||
subpd(1 2 , 0 -2 ) = 1 4
|
||||
subpd(0 -2 , inf -inf ) = -inf inf
|
||||
subpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2
|
||||
subpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2
|
||||
subpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf
|
||||
subpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0
|
||||
minpd(1 2 , 0 -2 ) = 0 -2
|
||||
minpd(0 -2 , inf -inf ) = 0 -inf
|
||||
minpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0
|
||||
minpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2
|
||||
minpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf
|
||||
minpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0
|
||||
divpd(1 2 , 0 -2 ) = inf -1
|
||||
divpd(0 -2 , inf -inf ) = 0 0
|
||||
divpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf
|
||||
divpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 inf
|
||||
divpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 inf
|
||||
divpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0xfff8000000000000
|
||||
maxpd(1 2 , 0 -2 ) = 1 2
|
||||
maxpd(0 -2 , inf -inf ) = inf -2
|
||||
maxpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2
|
||||
maxpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0
|
||||
maxpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0
|
||||
maxpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0
|
||||
punpcklbw(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x0 0x80 0x1 0x7f 0x5 0x0 0x15 0x1 0x20 0x2 0x80 0x3 0xff 0x81 0x0
|
||||
punpcklwd(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0x8000 0x8000 0x7fff 0x7fff 0xffff 0x0 0xffff
|
||||
punpckldq(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x1 0x80000000 0x80000000
|
||||
ppacksswb(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x80ff 0x7f 0x201 0x8003 0x7f80 0xffff 0x8050 0x80fe
|
||||
pcmpgtb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x0 0xff 0x0 0x0 0xff 0xff 0x0 0x0 0x0 0xff 0xff 0x0 0x0 0x0 0xff
|
||||
pcmpgtw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0x0 0xffff 0xffff 0x0 0xffff 0xffff 0x0
|
||||
pcmpgtd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0xffffffff 0xffffffff
|
||||
packuswb(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0xff 0x201 0x3 0xff00 0x0 0x50 0x0
|
||||
punpckhbw(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xfe 0x8 0x84 0x7 0x72 0x81 0x52 0x6 0xa5 0xa 0x0 0xf 0xc0 0x10 0x32 0x1
|
||||
punpckhwd(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x50 0x2 0x9000 0x3 0xfffe 0x8001 0x8001
|
||||
punpckhdq(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x7fffffff 0x5 0x0 0xfffffffe
|
||||
ppackssdw(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x8000ffff 0x7fff 0x80000001 0xfffe0005
|
||||
punpcklqdq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0xffffffffffffffff 0x1
|
||||
punpckhqdq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0x8000000000000000 0x8000000000000000
|
||||
pshufd(0xffffffff 0x80000000 0x7fffffff 0x0 0) = 0xffffffff 0xffffffff 0xffffffff 0xffffffff
|
||||
pshufd(0xffffffff 0x80000000 0x7fffffff 0x0 255) = 0x0 0x0 0x0 0x0
|
||||
pshufd(0xffffffff 0x80000000 0x7fffffff 0x0 170) = 0x7fffffff 0x7fffffff 0x7fffffff 0x7fffffff
|
||||
pshufd(0xffffffff 0x80000000 0x7fffffff 0x0 2) = 0x7fffffff 0xffffffff 0xffffffff 0xffffffff
|
||||
psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 0) = 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001
|
||||
psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 255) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
|
||||
psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 170) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
|
||||
psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 2) = 0x3fff 0x2000 0x1fff 0x0 0x0 0x0 0x0 0x2000
|
||||
psrld(0xffffffff 0x80000000 0x7fffffff 0x0 0) = 0xffffffff 0x80000000 0x7fffffff 0x0
|
||||
psrld(0xffffffff 0x80000000 0x7fffffff 0x0 255) = 0x0 0x0 0x0 0x0
|
||||
psrld(0xffffffff 0x80000000 0x7fffffff 0x0 170) = 0x0 0x0 0x0 0x0
|
||||
psrld(0xffffffff 0x80000000 0x7fffffff 0x0 2) = 0x3fffffff 0x20000000 0x1fffffff 0x0
|
||||
psrlq(0xffffffffffffffff 0x8000000000000000 0) = 0xffffffffffffffff 0x8000000000000000
|
||||
psrlq(0xffffffffffffffff 0x8000000000000000 255) = 0x0 0x0
|
||||
psrlq(0xffffffffffffffff 0x8000000000000000 170) = 0x0 0x0
|
||||
psrlq(0xffffffffffffffff 0x8000000000000000 2) = 0x3fffffffffffffff 0x2000000000000000
|
||||
psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 0) = 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001
|
||||
psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 255) = 0xffff 0xffff 0x0 0x0 0x0 0x0 0x0 0xffff
|
||||
psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 170) = 0xffff 0xffff 0x0 0x0 0x0 0x0 0x0 0xffff
|
||||
psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 2) = 0xffff 0xe000 0x1fff 0x0 0x0 0x0 0x0 0xe000
|
||||
psrad(0xffffffff 0x80000000 0x7fffffff 0x0 0) = 0xffffffff 0x80000000 0x7fffffff 0x0
|
||||
psrad(0xffffffff 0x80000000 0x7fffffff 0x0 255) = 0xffffffff 0xffffffff 0x0 0x0
|
||||
psrad(0xffffffff 0x80000000 0x7fffffff 0x0 170) = 0xffffffff 0xffffffff 0x0 0x0
|
||||
psrad(0xffffffff 0x80000000 0x7fffffff 0x0 2) = 0xffffffff 0xe0000000 0x1fffffff 0x0
|
||||
psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 0) = 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001
|
||||
psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 255) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
|
||||
psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 170) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
|
||||
psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 2) = 0xfffc 0x0 0xfffc 0x0 0x4 0x8 0xc 0x4
|
||||
pslld(0xffffffff 0x80000000 0x7fffffff 0x0 0) = 0xffffffff 0x80000000 0x7fffffff 0x0
|
||||
pslld(0xffffffff 0x80000000 0x7fffffff 0x0 255) = 0x0 0x0 0x0 0x0
|
||||
pslld(0xffffffff 0x80000000 0x7fffffff 0x0 170) = 0x0 0x0 0x0 0x0
|
||||
pslld(0xffffffff 0x80000000 0x7fffffff 0x0 2) = 0xfffffffc 0x0 0xfffffffc 0x0
|
||||
psllq(0xffffffffffffffff 0x8000000000000000 0) = 0xffffffffffffffff 0x8000000000000000
|
||||
psllq(0xffffffffffffffff 0x8000000000000000 255) = 0x0 0x0
|
||||
psllq(0xffffffffffffffff 0x8000000000000000 170) = 0x0 0x0
|
||||
psllq(0xffffffffffffffff 0x8000000000000000 2) = 0xfffffffffffffffc 0x0
|
||||
pcmpeqb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
|
||||
pcmpeqw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0xffff
|
||||
pcmpeqd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0xffffffff 0x0 0x0
|
||||
haddpd(1 2 , 0 -2 ) = 3 -2
|
||||
haddpd(0 -2 , inf -inf ) = -2 0xfff8000000000000
|
||||
haddpd(1 2 , 0x7ff8000000000000 -0 ) = 3 0x7ff8000000000000
|
||||
haddpd(0 -2 , 0x7ff8000000000000 -0 ) = -2 0x7ff8000000000000
|
||||
haddpd(inf -inf , 0x7ff8000000000000 -0 ) = 0xfff8000000000000 0x7ff8000000000000
|
||||
haddpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0x7ff8000000000000
|
||||
psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
|
||||
psrld(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0x0 0x0
|
||||
psrlq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0x7fffffffffffffff 0x4000000000000000
|
||||
paddq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0x0 0x0
|
||||
pmullw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x8000 0x8001 0x0 0x50 0x2000 0xfffa 0x1
|
||||
psubusb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x7f 0x7a 0x0 0x0 0x0 0x0 0x81 0xf6 0x7d 0x0 0x4c 0x9b 0x0 0xb0 0x31
|
||||
psubusw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x1 0x0 0x0 0x0 0x0 0x0 0x0
|
||||
pminub(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x1 0x5 0x0 0x1 0x2 0x3 0x0 0x8 0x7 0x72 0x6 0xa 0x0 0x10 0x1
|
||||
pand(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0x3000000050000 0x2000408
|
||||
paddusb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x81 0x84 0x15 0x21 0x82 0xff 0x81 0xff 0x8b 0xf3 0x58 0xaf 0xf 0xd0 0x33
|
||||
paddusw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0xffff 0xffff 0xffff 0x51 0x9002 0xffff 0xffff
|
||||
pmaxub(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x80 0x7f 0x15 0x20 0x80 0xff 0x81 0xfe 0x84 0x81 0x52 0xa5 0xf 0xc0 0x32
|
||||
pandn(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0xfc802015000100 0x1100f0a04810300
|
||||
pavgb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x80 0x41 0x42 0xb 0x11 0x41 0x81 0x41 0x83 0x46 0x7a 0x2c 0x58 0x8 0x68 0x1a
|
||||
psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0xffff 0x0 0x0 0x0 0x0 0x0 0xffff
|
||||
psrad(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0xffffffff 0x0 0x0
|
||||
pavgb(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xc000 0x8000 0xbfff 0x8000 0x29 0x4801 0x8001 0x8001
|
||||
pmulhuw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x3fff 0x7ffe 0x0 0x0 0x1 0x2 0x4001
|
||||
pmulhw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0xc000 0xffff 0x0 0x0 0xffff 0xffff 0x3fff
|
||||
psubsb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x80 0x7a 0xeb 0xe1 0x7f 0x4 0x81 0xf6 0x80 0x7f 0x4c 0x9b 0xf1 0xb0 0x31
|
||||
psubsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x8000 0x7fff 0x1 0xffb1 0x7002 0x5 0x0
|
||||
pminsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x8000 0xffff 0xffff 0x1 0x9000 0xfffe 0x8001
|
||||
por(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0x81ff8221157f81ff 0x33d00faf56f387fe
|
||||
paddusb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x81 0x7f 0x15 0x21 0x82 0x2 0x81 0x6 0x8b 0xf3 0x58 0xaf 0xf 0xd0 0x33
|
||||
paddusw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0xffff 0x7ffe 0xffff 0x51 0x9002 0x1 0x8000
|
||||
pmaxsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0x7fff 0x7fff 0x0 0x50 0x2 0x3 0x8001
|
||||
pxor(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0x81fc8221157a81ff 0x33d00faf54f383f6
|
||||
psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
|
||||
pslld(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0x0 0x0
|
||||
psllq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0xfffffffffffffffe 0x0
|
||||
pmuludq(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x0 0x7ffffffb 0x2
|
||||
pmaddwd(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0xc001 0x8001 0xffff 0x2050 0xffff 0xfffb 0x3ffe
|
||||
psadbw(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x27 0x4 0x0 0x0 0x0 0x0 0x0 0x0 0x59 0x3 0x0 0x0 0x0 0x0 0x0 0x0
|
||||
psubb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x7f 0x7a 0xeb 0xe1 0x82 0x4 0x81 0xf6 0x7d 0xf1 0x4c 0x9b 0xf1 0xb0 0x31
|
||||
psubw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x1 0x8000 0x1 0xffb1 0x7002 0x5 0x0
|
||||
psubd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xfffffffe 0x0 0x7ffffffa 0x2
|
||||
psubq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0xfffffffffffffffe 0x0
|
||||
paddb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x81 0x84 0x15 0x21 0x82 0x2 0x81 0x6 0x8b 0xf3 0x58 0xaf 0xf 0xd0 0x33
|
||||
paddw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0xffff 0x7ffe 0xffff 0x51 0x9002 0x1 0x2
|
||||
paddd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0x80000004 0xfffffffe
|
||||
pmovhlps(1 2 3 -4 , 0 -2 -10 0.5 ) = -10 0.5 3 -4
|
||||
unpcklps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 0 2 -2
|
||||
unpckhps(1 2 3 -4 , 0 -2 -10 0.5 ) = 3 -10 -4 0.5
|
||||
pmovhps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 2 0 -2
|
||||
psqrtps(1 2 3 -4 ) = 1 1.41421 1.73205 nan
|
||||
psqrtps(0 -2 -10 0.5 ) = 0 nan nan 0.707107
|
||||
psqrtps(inf -inf -inf 1 ) = inf nan nan 1
|
||||
psqrtps(nan -0 nan inf ) = nan -0 nan inf
|
||||
prcpps(nan -0 nan inf ) = nan -inf nan 0
|
||||
andps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 2 2 0
|
||||
andps(0 -2 -10 0.5 , inf -inf -inf 1 ) = 0 -2 -8 0.5
|
||||
andps(1 2 3 -4 , nan -0 nan inf ) = 1 0 3 4
|
||||
andps(0 -2 -10 0.5 , nan -0 nan inf ) = 0 -0 -8 0.5
|
||||
andps(inf -inf -inf 1 , nan -0 nan inf ) = inf -0 -inf 1
|
||||
andps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf
|
||||
andnps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 -0 -2.93874e-38 0.5
|
||||
andnps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf 1 0.25 1.17549e-38
|
||||
andnps(1 2 3 -4 , nan -0 nan inf ) = 3 -0 -1 0.5
|
||||
andnps(0 -2 -10 0.5 , nan -0 nan inf ) = nan 0 0.375 4
|
||||
andnps(inf -inf -inf 1 , nan -0 nan inf ) = 5.87747e-39 0 5.87747e-39 2
|
||||
andnps(nan -0 nan inf , nan -0 nan inf ) = 0 0 0 0
|
||||
orps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 -2 -14 -inf
|
||||
orps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf -inf nan 1
|
||||
orps(1 2 3 -4 , nan -0 nan inf ) = nan -2 nan -inf
|
||||
orps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan inf
|
||||
orps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan inf
|
||||
orps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf
|
||||
xorps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 -0 -4.11423e-38 -inf
|
||||
xorps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf 1 0.3125 1.17549e-38
|
||||
xorps(1 2 3 -4 , nan -0 nan inf ) = 3 -2 -1 -0.5
|
||||
xorps(0 -2 -10 0.5 , nan -0 nan inf ) = nan 2 0.4375 4
|
||||
xorps(inf -inf -inf 1 , nan -0 nan inf ) = 5.87747e-39 inf 5.87747e-39 2
|
||||
xorps(nan -0 nan inf , nan -0 nan inf ) = 0 0 0 0
|
||||
addps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 0 -7 -3.5
|
||||
addps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf -inf -inf 1.5
|
||||
addps(1 2 3 -4 , nan -0 nan inf ) = nan 2 nan inf
|
||||
addps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan inf
|
||||
addps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan inf
|
||||
addps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf
|
||||
mulps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 -4 -30 -2
|
||||
mulps(0 -2 -10 0.5 , inf -inf -inf 1 ) = nan inf inf 0.5
|
||||
mulps(1 2 3 -4 , nan -0 nan inf ) = nan -0 nan -inf
|
||||
mulps(0 -2 -10 0.5 , nan -0 nan inf ) = nan 0 nan inf
|
||||
mulps(inf -inf -inf 1 , nan -0 nan inf ) = nan nan nan inf
|
||||
mulps(nan -0 nan inf , nan -0 nan inf ) = nan 0 nan inf
|
||||
subps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 4 13 -4.5
|
||||
subps(0 -2 -10 0.5 , inf -inf -inf 1 ) = -inf inf inf -0.5
|
||||
subps(1 2 3 -4 , nan -0 nan inf ) = nan 2 nan -inf
|
||||
subps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan -inf
|
||||
subps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan -inf
|
||||
subps(nan -0 nan inf , nan -0 nan inf ) = nan 0 nan nan
|
||||
minps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 -2 -10 -4
|
||||
minps(0 -2 -10 0.5 , inf -inf -inf 1 ) = 0 -inf -inf 0.5
|
||||
minps(1 2 3 -4 , nan -0 nan inf ) = nan -0 nan -4
|
||||
minps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan 0.5
|
||||
minps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan 1
|
||||
minps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf
|
||||
divps(1 2 3 -4 , 0 -2 -10 0.5 ) = inf -1 -0.3 -8
|
||||
divps(0 -2 -10 0.5 , inf -inf -inf 1 ) = 0 0 0 0.5
|
||||
divps(1 2 3 -4 , nan -0 nan inf ) = nan -inf nan -0
|
||||
divps(0 -2 -10 0.5 , nan -0 nan inf ) = nan inf nan 0
|
||||
divps(inf -inf -inf 1 , nan -0 nan inf ) = nan inf nan 0
|
||||
divps(nan -0 nan inf , nan -0 nan inf ) = nan nan nan nan
|
||||
maxps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 2 3 0.5
|
||||
maxps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf -2 -10 1
|
||||
maxps(1 2 3 -4 , nan -0 nan inf ) = nan 2 nan inf
|
||||
maxps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -0 nan inf
|
||||
maxps(inf -inf -inf 1 , nan -0 nan inf ) = nan -0 nan inf
|
||||
maxps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf
|
||||
shufps(1 2 3 -4 , 0 -2 -10 0.5 , 0) = 1 1 0 0
|
||||
shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 0) = 0 0 inf inf
|
||||
shufps(1 2 3 -4 , nan -0 nan inf , 0) = 1 1 nan nan
|
||||
shufps(0 -2 -10 0.5 , nan -0 nan inf , 0) = 0 0 nan nan
|
||||
shufps(inf -inf -inf 1 , nan -0 nan inf , 0) = inf inf nan nan
|
||||
shufps(nan -0 nan inf , nan -0 nan inf , 0) = nan nan nan nan
|
||||
shufps(1 2 3 -4 , 0 -2 -10 0.5 , 21) = 2 2 -2 0
|
||||
shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 21) = -2 -2 -inf inf
|
||||
shufps(1 2 3 -4 , nan -0 nan inf , 21) = 2 2 -0 nan
|
||||
shufps(0 -2 -10 0.5 , nan -0 nan inf , 21) = -2 -2 -0 nan
|
||||
shufps(inf -inf -inf 1 , nan -0 nan inf , 21) = -inf -inf -0 nan
|
||||
shufps(nan -0 nan inf , nan -0 nan inf , 21) = -0 -0 -0 nan
|
||||
shufps(1 2 3 -4 , 0 -2 -10 0.5 , 255) = -4 -4 0.5 0.5
|
||||
shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 255) = 0.5 0.5 1 1
|
||||
shufps(1 2 3 -4 , nan -0 nan inf , 255) = -4 -4 inf inf
|
||||
shufps(0 -2 -10 0.5 , nan -0 nan inf , 255) = 0.5 0.5 inf inf
|
||||
shufps(inf -inf -inf 1 , nan -0 nan inf , 255) = 1 1 inf inf
|
||||
shufps(nan -0 nan inf , nan -0 nan inf , 255) = inf inf inf inf
|
||||
shufps(1 2 3 -4 , 0 -2 -10 0.5 , 2) = 3 1 0 0
|
||||
shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 2) = -10 0 inf inf
|
||||
shufps(1 2 3 -4 , nan -0 nan inf , 2) = 3 1 nan nan
|
||||
shufps(0 -2 -10 0.5 , nan -0 nan inf , 2) = -10 0 nan nan
|
||||
shufps(inf -inf -inf 1 , nan -0 nan inf , 2) = -inf inf nan nan
|
||||
shufps(nan -0 nan inf , nan -0 nan inf , 2) = nan nan nan nan
|
||||
sqrtsd(1 2 , 1 2 ) = 1 2
|
||||
sqrtsd(1 2 , 0 -2 ) = 0 2
|
||||
sqrtsd(1 2 , inf -inf ) = inf 2
|
||||
sqrtsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2
|
||||
sqrtsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2
|
||||
sqrtsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf
|
||||
sqrtsd(1 2 , 2 1 ) = 1.41421 2
|
||||
sqrtsd(1 2 , -2 0 ) = 0xfff8000000000000 2
|
||||
sqrtsd(1 2 , -inf inf ) = 0xfff8000000000000 2
|
||||
sqrtsd(1 2 , -0 0x7ff8000000000000 ) = -0 2
|
||||
sqrtsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2
|
||||
sqrtsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2
|
||||
addsd(1 2 , 1 2 ) = 2 2
|
||||
addsd(1 2 , 0 -2 ) = 1 2
|
||||
addsd(1 2 , inf -inf ) = inf 2
|
||||
addsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2
|
||||
addsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2
|
||||
addsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf
|
||||
addsd(1 2 , 2 1 ) = 3 2
|
||||
addsd(1 2 , -2 0 ) = -1 2
|
||||
addsd(1 2 , -inf inf ) = -inf 2
|
||||
addsd(1 2 , -0 0x7ff8000000000000 ) = 1 2
|
||||
addsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2
|
||||
addsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2
|
||||
mulsd(1 2 , 1 2 ) = 1 2
|
||||
mulsd(1 2 , 0 -2 ) = 0 2
|
||||
mulsd(1 2 , inf -inf ) = inf 2
|
||||
mulsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2
|
||||
mulsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2
|
||||
mulsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf
|
||||
mulsd(1 2 , 2 1 ) = 2 2
|
||||
mulsd(1 2 , -2 0 ) = -2 2
|
||||
mulsd(1 2 , -inf inf ) = -inf 2
|
||||
mulsd(1 2 , -0 0x7ff8000000000000 ) = -0 2
|
||||
mulsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2
|
||||
mulsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2
|
||||
subsd(1 2 , 1 2 ) = 0 2
|
||||
subsd(1 2 , 0 -2 ) = 1 2
|
||||
subsd(1 2 , inf -inf ) = -inf 2
|
||||
subsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2
|
||||
subsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2
|
||||
subsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf
|
||||
subsd(1 2 , 2 1 ) = -1 2
|
||||
subsd(1 2 , -2 0 ) = 3 2
|
||||
subsd(1 2 , -inf inf ) = inf 2
|
||||
subsd(1 2 , -0 0x7ff8000000000000 ) = 1 2
|
||||
subsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2
|
||||
subsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2
|
||||
minsd(1 2 , 1 2 ) = 1 2
|
||||
minsd(1 2 , 0 -2 ) = 0 2
|
||||
minsd(1 2 , inf -inf ) = 1 2
|
||||
minsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2
|
||||
minsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2
|
||||
minsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf
|
||||
minsd(1 2 , 2 1 ) = 1 2
|
||||
minsd(1 2 , -2 0 ) = -2 2
|
||||
minsd(1 2 , -inf inf ) = -inf 2
|
||||
minsd(1 2 , -0 0x7ff8000000000000 ) = -0 2
|
||||
minsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2
|
||||
minsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2
|
||||
divsd(1 2 , 1 2 ) = 1 2
|
||||
divsd(1 2 , 0 -2 ) = inf 2
|
||||
divsd(1 2 , inf -inf ) = 0 2
|
||||
divsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2
|
||||
divsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2
|
||||
divsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf
|
||||
divsd(1 2 , 2 1 ) = 0.5 2
|
||||
divsd(1 2 , -2 0 ) = -0.5 2
|
||||
divsd(1 2 , -inf inf ) = -0 2
|
||||
divsd(1 2 , -0 0x7ff8000000000000 ) = -inf 2
|
||||
divsd(0 -2 , -0 0x7ff8000000000000 ) = 0xfff8000000000000 -2
|
||||
divsd(0 -2 , -0 0x7ff8000000000000 ) = 0xfff8000000000000 -2
|
||||
maxsd(1 2 , 1 2 ) = 1 2
|
||||
maxsd(1 2 , 0 -2 ) = 1 2
|
||||
maxsd(1 2 , inf -inf ) = inf 2
|
||||
maxsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2
|
||||
maxsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2
|
||||
maxsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf
|
||||
maxsd(1 2 , 2 1 ) = 2 2
|
||||
maxsd(1 2 , -2 0 ) = 1 2
|
||||
maxsd(1 2 , -inf inf ) = 1 2
|
||||
maxsd(1 2 , -0 0x7ff8000000000000 ) = 1 2
|
||||
maxsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2
|
||||
maxsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2
|
||||
|
BIN
tests/test17
BIN
tests/test17
Binary file not shown.
319
tests/test17.c
319
tests/test17.c
@ -1,3 +1,5 @@
|
||||
// build with gcc -march=corei7 -O2 -g -msse -msse2 test17.c -o test17
|
||||
// and -m32 for 32bits version
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
@ -103,10 +105,28 @@ const v128 b128_16 = {.u16 = {
|
||||
const v128 b128_32 = {.u32 = {
|
||||
0x00000001, 0x80000000, 0x00000005, 0xfffffffe
|
||||
}};
|
||||
const v128 b128_64 = {.u64 = {
|
||||
0x0000000000000001LL, 0x8000000000000000LL
|
||||
}};
|
||||
const v128 c128_32 = {.u32 = {
|
||||
0x00000001, 0x80000000, 0x80000005, 0x0000fffe
|
||||
}};
|
||||
|
||||
const v128 a128_pd = {.d64 = { 1.0, 2.0}};
|
||||
const v128 b128_pd = {.d64 = { 0.0, -2.0}};
|
||||
const v128 c128_pd = {.d64 = { INFINITY, -INFINITY}};
|
||||
const v128 d128_pd = {.d64 = { NAN, -0.0}};
|
||||
const v128 a128_ps = {.f32 = { 1.0, 2.0, 3.0, -4.0}};
|
||||
const v128 b128_ps = {.f32 = { 0.0, -2.0, -10.0, 0.5}};
|
||||
const v128 c128_ps = {.f32 = { INFINITY, -INFINITY, -INFINITY, 1.0}};
|
||||
const v128 d128_ps = {.f32 = { NAN, -0.0, -NAN, INFINITY}};
|
||||
|
||||
v128 reverse_pd(v128 a) {
|
||||
v128 ret;
|
||||
ret.md = _mm_shuffle_pd(a.md, a.md, 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void print_8(v128 v) {
|
||||
for(int i=0; i<16; ++i)
|
||||
printf("0x%x ", v.u8[i]);
|
||||
@ -123,6 +143,22 @@ void print_64(v128 v) {
|
||||
for(int i=0; i<2; ++i)
|
||||
printf("0x%llx ", v.u64[i]);
|
||||
}
|
||||
#define print_128 print_64
|
||||
void print_ps(v128 v) {
|
||||
for(int i=0; i<4; ++i)
|
||||
if(isnanf(v.f32[i]))
|
||||
printf("nan ");
|
||||
else
|
||||
printf("%g ", v.f32[i]);
|
||||
}
|
||||
void print_pd(v128 v) {
|
||||
for(int i=0; i<2; ++i)
|
||||
if(isnan(v.d64[i]))
|
||||
printf("0x%llx ", v.u64[i]);
|
||||
else
|
||||
printf("%g ", v.d64[i]);
|
||||
}
|
||||
#define print_sd print_pd
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
@ -226,13 +262,32 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r);
|
||||
|
||||
#define GO1(A, N, C) \
|
||||
a128.mm = _mm_##A##_epi##N(a128_##N.mm); \
|
||||
printf("%s(", #C); print_##N(a128_##N); \
|
||||
printf("%s(", #C); print_##N(a128_##N); \
|
||||
printf(") = "); print_##N(a128); printf("\n");
|
||||
#define GO1C(A, N, C, A1, I) \
|
||||
a128.mm = _mm_##A##_epi##N(A1.mm, I); \
|
||||
printf("%s(", #C); print_##N(A1); \
|
||||
printf("%d) = ", I); print_##N(a128); printf("\n");
|
||||
#define GO2(A, N, C, A1, A2) \
|
||||
a128.mm = _mm_##A##_epi##N(A1.mm, A2.mm); \
|
||||
printf("%s(", #C); print_##N(A1); \
|
||||
printf("%s(", #C); print_##N(A1); \
|
||||
printf(", "); print_##N(A2); \
|
||||
printf(") = "); print_##N(a128); printf("\n");
|
||||
#define GO2u(A, N, C, A1, A2) \
|
||||
a128.mm = _mm_##A##_epu##N(A1.mm, A2.mm); \
|
||||
printf("%s(", #C); print_##N(A1); \
|
||||
printf(", "); print_##N(A2); \
|
||||
printf(") = "); print_##N(a128); printf("\n");
|
||||
#define GO2f(A, C, A1, A2) \
|
||||
a128.mm = _mm_##A##_si128(A1.mm, A2.mm); \
|
||||
printf("%s(", #C); print_128(A1); \
|
||||
printf(", "); print_128(A2); \
|
||||
printf(") = "); print_128(a128); printf("\n");
|
||||
#define GO2C(A, N, C, A1, A2, I) \
|
||||
a128.mm = _mm_##A##_epi##N(A1.mm, A2.mm, I); \
|
||||
printf("%s(", #C); print_##N(A1); \
|
||||
printf(", "); print_##N(A2); \
|
||||
printf("%d) = ", I); print_##N(a128); printf("\n");
|
||||
#define GO2i(A, A1, A2) \
|
||||
i = _mm_##A##_si128(A1.mm, A2.mm); \
|
||||
printf("p%s(", #A); print_64(A1); \
|
||||
@ -244,6 +299,106 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r);
|
||||
printf(", "); print_##N(A2); \
|
||||
printf(", "); print_##N(A3); \
|
||||
printf(") = "); print_##N(a128); printf("\n");
|
||||
#define GO1ipd(A, C, A1) \
|
||||
i = _mm_##A##_pd(A1.md); \
|
||||
printf("%s(", #C); print_64(A1); \
|
||||
printf(") = 0x%x\n", i);
|
||||
#define GO1pd(A, C, A1) \
|
||||
a128.md = _mm_##A##_pd(A1.md); \
|
||||
printf("%s(", #C); print_pd(A1); \
|
||||
printf(") = "); print_pd(a128); printf("\n");
|
||||
#define GO2pd(A, C, A1, A2) \
|
||||
a128.md = _mm_##A##_pd(A1.md, A2.md); \
|
||||
printf("%s(", #C); print_pd(A1); \
|
||||
printf(", "); print_pd(A2); \
|
||||
printf(") = "); print_pd(a128); printf("\n");
|
||||
#define GO2Cpd(A, C, A1, A2, I) \
|
||||
a128.md = _mm_##A##_pd(A1.md, A2.md, I); \
|
||||
printf("%s(", #C); print_pd(A1); \
|
||||
printf(", "); print_pd(A2); \
|
||||
printf(", %d) = ", I); print_pd(a128); printf("\n");
|
||||
#define GO1isd(A, C, A1) \
|
||||
i = _mm_##A##_sd(A1.md); \
|
||||
printf("%s(", #C); print_64(A1); \
|
||||
printf(") = 0x%x\n", i);
|
||||
#define GO1sd(A, C, A1) \
|
||||
a128.md = _mm_##A##_sd(A1.md); \
|
||||
printf("%s(", #C); print_sd(A1); \
|
||||
printf(") = "); print_sd(a128); printf("\n");
|
||||
#define GO2sd(A, C, A1, A2) \
|
||||
a128.md = _mm_##A##_sd(A1.md, A2.md); \
|
||||
printf("%s(", #C); print_sd(A1); \
|
||||
printf(", "); print_sd(A2); \
|
||||
printf(") = "); print_sd(a128); printf("\n");
|
||||
#define GO2Csd(A, C, A1, A2, I) \
|
||||
a128.md = _mm_##A##_sd(A1.md, A2.md, I); \
|
||||
printf("%s(", #C); print_sd(A1); \
|
||||
printf(", "); print_sd(A2); \
|
||||
printf(", %d) = ", I); print_sd(a128); printf("\n");
|
||||
#define GO1ips(A, C, A1) \
|
||||
i = _mm_##A##_ps(A1.mf); \
|
||||
printf("%s(", #C); print_32(A1); \
|
||||
printf(") = 0x%x\n", i);
|
||||
#define GO1ps(A, C, A1) \
|
||||
a128.mf = _mm_##A##_ps(A1.mf); \
|
||||
printf("%s(", #C); print_ps(A1); \
|
||||
printf(") = "); print_ps(a128); printf("\n");
|
||||
#define GO2ps(A, C, A1, A2) \
|
||||
a128.mf = _mm_##A##_ps(A1.mf, A2.mf); \
|
||||
printf("%s(", #C); print_ps(A1); \
|
||||
printf(", "); print_ps(A2); \
|
||||
printf(") = "); print_ps(a128); printf("\n");
|
||||
#define GO2Cps(A, C, A1, A2, I) \
|
||||
a128.mf = _mm_##A##_ps(A1.mf, A2.mf, I); \
|
||||
printf("%s(", #C); print_ps(A1); \
|
||||
printf(", "); print_ps(A2); \
|
||||
printf(", %d) = ", I); print_ps(a128); printf("\n");
|
||||
|
||||
#define MULITGO2pd(A, B) \
|
||||
GO2pd(A, B, a128_pd, b128_pd) \
|
||||
GO2pd(A, B, b128_pd, c128_pd) \
|
||||
GO2pd(A, B, a128_pd, d128_pd) \
|
||||
GO2pd(A, B, b128_pd, d128_pd) \
|
||||
GO2pd(A, B, c128_pd, d128_pd) \
|
||||
GO2pd(A, B, d128_pd, d128_pd)
|
||||
|
||||
#define MULITGO2Cpd(A, B, I) \
|
||||
GO2Cpd(A, B, a128_pd, b128_pd, I) \
|
||||
GO2Cpd(A, B, b128_pd, c128_pd, I) \
|
||||
GO2Cpd(A, B, a128_pd, d128_pd, I) \
|
||||
GO2Cpd(A, B, b128_pd, d128_pd, I) \
|
||||
GO2Cpd(A, B, c128_pd, d128_pd, I) \
|
||||
GO2Cpd(A, B, d128_pd, d128_pd, I)
|
||||
|
||||
#define MULITGO2ps(A, B) \
|
||||
GO2ps(A, B, a128_ps, b128_ps) \
|
||||
GO2ps(A, B, b128_ps, c128_ps) \
|
||||
GO2ps(A, B, a128_ps, d128_ps) \
|
||||
GO2ps(A, B, b128_ps, d128_ps) \
|
||||
GO2ps(A, B, c128_ps, d128_ps) \
|
||||
GO2ps(A, B, d128_ps, d128_ps)
|
||||
|
||||
#define MULITGO2Cps(A, B, I) \
|
||||
GO2Cps(A, B, a128_ps, b128_ps, I) \
|
||||
GO2Cps(A, B, b128_ps, c128_ps, I) \
|
||||
GO2Cps(A, B, a128_ps, d128_ps, I) \
|
||||
GO2Cps(A, B, b128_ps, d128_ps, I) \
|
||||
GO2Cps(A, B, c128_ps, d128_ps, I) \
|
||||
GO2Cps(A, B, d128_ps, d128_ps, I)
|
||||
|
||||
#define MULTIGO2sd(A, B) \
|
||||
GO2sd(A, B, a128_pd, a128_pd) \
|
||||
GO2sd(A, B, a128_pd, b128_pd) \
|
||||
GO2sd(A, B, a128_pd, c128_pd) \
|
||||
GO2sd(A, B, a128_pd, d128_pd) \
|
||||
GO2sd(A, B, b128_pd, d128_pd) \
|
||||
GO2sd(A, B, c128_pd, d128_pd) \
|
||||
GO2sd(A, B, a128_pd, reverse_pd(a128_pd)) \
|
||||
GO2sd(A, B, a128_pd, reverse_pd(b128_pd)) \
|
||||
GO2sd(A, B, a128_pd, reverse_pd(c128_pd)) \
|
||||
GO2sd(A, B, a128_pd, reverse_pd(d128_pd)) \
|
||||
GO2sd(A, B, b128_pd, reverse_pd(d128_pd)) \
|
||||
GO2sd(A, B, b128_pd, reverse_pd(d128_pd))
|
||||
|
||||
|
||||
GO2(shuffle, 8, pshufb, a128_8, b128_8)
|
||||
@ -275,5 +430,165 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r);
|
||||
GO1(cvtepu16, 32, pmovzxwd);
|
||||
GO1(cvtepu16, 64, pmovzxwq);
|
||||
GO1(cvtepu32, 64, pmovzxdq);
|
||||
GO2(min, 32, pminsd, a128_32, b128_32)
|
||||
GO2(max, 32, pmaxsd, a128_32, b128_32)
|
||||
GO2C(blend, 16, pblendw, a128_16, b128_16, 0)
|
||||
GO2C(blend, 16, pblendw, a128_16, b128_16, 0xff)
|
||||
GO2C(blend, 16, pblendw, a128_16, b128_16, 0xaa)
|
||||
GO2C(blend, 16, pblendw, a128_16, b128_16, 2)
|
||||
GO2C(alignr, 8, palignr, a128_8, b128_8, 0)
|
||||
GO2C(alignr, 8, palignr, a128_8, b128_8, 2)
|
||||
GO2C(alignr, 8, palignr, a128_8, b128_8, 7)
|
||||
GO2C(alignr, 8, palignr, a128_8, b128_8, 15)
|
||||
GO2C(alignr, 8, palignr, a128_8, b128_8, 16)
|
||||
GO2C(alignr, 8, palignr, a128_8, b128_8, 0xff)
|
||||
GO1ipd(movemask, movmskpd, a128_64)
|
||||
GO1pd(sqrt, psqrtpd, a128_pd)
|
||||
GO1pd(sqrt, psqrtpd, b128_pd)
|
||||
GO1pd(sqrt, psqrtpd, c128_pd)
|
||||
GO1pd(sqrt, psqrtpd, d128_pd)
|
||||
MULITGO2pd(and, andpd)
|
||||
MULITGO2pd(andnot, andnpd)
|
||||
MULITGO2pd(or, orpd)
|
||||
MULITGO2pd(xor, xorpd)
|
||||
MULITGO2pd(add, addpd)
|
||||
MULITGO2pd(mul, mulpd)
|
||||
MULITGO2pd(sub, subpd)
|
||||
MULITGO2pd(min, minpd)
|
||||
MULITGO2pd(div, divpd)
|
||||
MULITGO2pd(max, maxpd)
|
||||
GO2(unpacklo, 8, punpcklbw, a128_8, b128_8)
|
||||
GO2(unpacklo, 16, punpcklwd, a128_16, b128_16)
|
||||
GO2(unpacklo, 32, punpckldq, a128_32, b128_32)
|
||||
GO2(packs, 16, ppacksswb, a128_16, b128_16)
|
||||
GO2(cmpgt, 8, pcmpgtb, a128_8, b128_8)
|
||||
GO2(cmpgt, 16, pcmpgtw, a128_16, b128_16)
|
||||
GO2(cmpgt, 32, pcmpgtd, a128_32, b128_32)
|
||||
GO2(packus, 16, packuswb, a128_16, b128_16)
|
||||
GO2(unpackhi, 8, punpckhbw, a128_8, b128_8)
|
||||
GO2(unpackhi, 16, punpckhwd, a128_16, b128_16)
|
||||
GO2(unpackhi, 32, punpckhdq, a128_32, b128_32)
|
||||
GO2(packs, 32, ppackssdw, a128_32, b128_32)
|
||||
GO2(unpacklo, 64, punpcklqdq, a128_64, b128_64)
|
||||
GO2(unpackhi, 64, punpckhqdq, a128_64, b128_64)
|
||||
GO1C(shuffle, 32, pshufd, a128_32, 0)
|
||||
GO1C(shuffle, 32, pshufd, a128_32, 0xff)
|
||||
GO1C(shuffle, 32, pshufd, a128_32, 0xaa)
|
||||
GO1C(shuffle, 32, pshufd, a128_32, 2)
|
||||
GO1C(srli, 16, psrlw, a128_16, 0)
|
||||
GO1C(srli, 16, psrlw, a128_16, 0xff)
|
||||
GO1C(srli, 16, psrlw, a128_16, 0xaa)
|
||||
GO1C(srli, 16, psrlw, a128_16, 2)
|
||||
GO1C(srli, 32, psrld, a128_32, 0)
|
||||
GO1C(srli, 32, psrld, a128_32, 0xff)
|
||||
GO1C(srli, 32, psrld, a128_32, 0xaa)
|
||||
GO1C(srli, 32, psrld, a128_32, 2)
|
||||
GO1C(srli, 64, psrlq, a128_64, 0)
|
||||
GO1C(srli, 64, psrlq, a128_64, 0xff)
|
||||
GO1C(srli, 64, psrlq, a128_64, 0xaa)
|
||||
GO1C(srli, 64, psrlq, a128_64, 2)
|
||||
GO1C(srai, 16, psraw, a128_16, 0)
|
||||
GO1C(srai, 16, psraw, a128_16, 0xff)
|
||||
GO1C(srai, 16, psraw, a128_16, 0xaa)
|
||||
GO1C(srai, 16, psraw, a128_16, 2)
|
||||
GO1C(srai, 32, psrad, a128_32, 0)
|
||||
GO1C(srai, 32, psrad, a128_32, 0xff)
|
||||
GO1C(srai, 32, psrad, a128_32, 0xaa)
|
||||
GO1C(srai, 32, psrad, a128_32, 2)
|
||||
GO1C(slli, 16, psllw, a128_16, 0)
|
||||
GO1C(slli, 16, psllw, a128_16, 0xff)
|
||||
GO1C(slli, 16, psllw, a128_16, 0xaa)
|
||||
GO1C(slli, 16, psllw, a128_16, 2)
|
||||
GO1C(slli, 32, pslld, a128_32, 0)
|
||||
GO1C(slli, 32, pslld, a128_32, 0xff)
|
||||
GO1C(slli, 32, pslld, a128_32, 0xaa)
|
||||
GO1C(slli, 32, pslld, a128_32, 2)
|
||||
GO1C(slli, 64, psllq, a128_64, 0)
|
||||
GO1C(slli, 64, psllq, a128_64, 0xff)
|
||||
GO1C(slli, 64, psllq, a128_64, 0xaa)
|
||||
GO1C(slli, 64, psllq, a128_64, 2)
|
||||
GO2(cmpeq, 8, pcmpeqb, a128_8, b128_8)
|
||||
GO2(cmpeq, 16, pcmpeqw, a128_16, b128_16)
|
||||
GO2(cmpeq, 32, pcmpeqd, a128_32, b128_32)
|
||||
MULITGO2pd(hadd, haddpd)
|
||||
GO2(srl, 16, psrlw, a128_16, b128_16)
|
||||
GO2(srl, 32, psrld, a128_32, b128_32)
|
||||
GO2(srl, 64, psrlq, a128_64, b128_64)
|
||||
GO2(add, 64, paddq, a128_64, b128_64)
|
||||
GO2(mullo, 16, pmullw, a128_16, b128_16)
|
||||
GO2u(subs, 8, psubusb, a128_8, b128_8)
|
||||
GO2u(subs, 16, psubusw, a128_16, b128_16)
|
||||
GO2u(min, 8, pminub, a128_8, b128_8)
|
||||
GO2f(and, pand, a128_8, b128_8)
|
||||
GO2u(adds, 8, paddusb, a128_8, b128_8)
|
||||
GO2u(adds, 16, paddusw, a128_16, b128_16)
|
||||
GO2u(max, 8, pmaxub, a128_8, b128_8)
|
||||
GO2f(andnot, pandn, a128_8, b128_8)
|
||||
GO2u(avg, 8, pavgb, a128_8, b128_8)
|
||||
GO2(sra, 16, psraw, a128_16, b128_16)
|
||||
GO2(sra, 32, psrad, a128_32, b128_32)
|
||||
GO2u(avg, 16, pavgb, a128_16, b128_16)
|
||||
GO2u(mulhi, 16, pmulhuw, a128_16, b128_16)
|
||||
GO2(mulhi, 16, pmulhw, a128_16, b128_16)
|
||||
GO2(subs, 8, psubsb, a128_8, b128_8)
|
||||
GO2(subs, 16, psubsw, a128_16, b128_16)
|
||||
GO2(min, 16, pminsw, a128_16, b128_16)
|
||||
GO2f(or, por, a128_8, b128_8)
|
||||
GO2(adds, 8, paddusb, a128_8, b128_8)
|
||||
GO2(adds, 16, paddusw, a128_16, b128_16)
|
||||
GO2(max, 16, pmaxsw, a128_16, b128_16)
|
||||
GO2f(xor, pxor, a128_8, b128_8)
|
||||
GO2(sll, 16, psllw, a128_16, b128_16)
|
||||
GO2(sll, 32, pslld, a128_32, b128_32)
|
||||
GO2(sll, 64, psllq, a128_64, b128_64)
|
||||
GO2u(mul, 32, pmuludq, a128_32, b128_32)
|
||||
GO2(madd, 16, pmaddwd, a128_16, b128_16)
|
||||
GO2u(sad, 8, psadbw, a128_8, b128_8)
|
||||
GO2(sub, 8, psubb, a128_8, b128_8)
|
||||
GO2(sub, 16, psubw, a128_16, b128_16)
|
||||
GO2(sub, 32, psubd, a128_32, b128_32)
|
||||
GO2(sub, 64, psubq, a128_64, b128_64)
|
||||
GO2(add, 8, paddb, a128_8, b128_8)
|
||||
GO2(add, 16, paddw, a128_16, b128_16)
|
||||
GO2(add, 32, paddd, a128_32, b128_32)
|
||||
GO2ps(movehl, pmovhlps, a128_ps, b128_ps)
|
||||
GO2ps(unpacklo, unpcklps, a128_ps, b128_ps)
|
||||
GO2ps(unpackhi, unpckhps, a128_ps, b128_ps)
|
||||
GO2ps(movelh, pmovhps, a128_ps, b128_ps)
|
||||
GO1ps(sqrt, psqrtps, a128_ps)
|
||||
GO1ps(sqrt, psqrtps, b128_ps)
|
||||
GO1ps(sqrt, psqrtps, c128_ps)
|
||||
GO1ps(sqrt, psqrtps, d128_ps)
|
||||
//GO1ps(rsqrt, prsqrtps, a128_ps) // difference in precision
|
||||
//GO1ps(rsqrt, prsqrtps, b128_ps) // same
|
||||
//GO1ps(rsqrt, prsqrtps, c128_ps) // same
|
||||
//GO1ps(rsqrt, prsqrtps, d128_ps) // difference in the handling of NAN, (-)0, and INF in Dynarec
|
||||
//GO1ps(rcp, prcpps, a128_ps) // deference in precision
|
||||
//GO1ps(rcp, prcpps, b128_ps) // deference in precision
|
||||
//GO1ps(rcp, prcpps, c128_ps) // deference in precision
|
||||
GO1ps(rcp, prcpps, d128_ps)
|
||||
MULITGO2ps(and, andps)
|
||||
MULITGO2ps(andnot, andnps)
|
||||
MULITGO2ps(or, orps)
|
||||
MULITGO2ps(xor, xorps)
|
||||
MULITGO2ps(add, addps)
|
||||
MULITGO2ps(mul, mulps)
|
||||
MULITGO2ps(sub, subps)
|
||||
MULITGO2ps(min, minps)
|
||||
MULITGO2ps(div, divps)
|
||||
MULITGO2ps(max, maxps)
|
||||
// MULITGO2Cps(cmp, cmpps, 0) // use avx for some reason
|
||||
MULITGO2Cps(shuffle, shufps, 0)
|
||||
MULITGO2Cps(shuffle, shufps, 0x15)
|
||||
MULITGO2Cps(shuffle, shufps, 0xff)
|
||||
MULITGO2Cps(shuffle, shufps, 0x02)
|
||||
MULTIGO2sd(sqrt, sqrtsd)
|
||||
MULTIGO2sd(add, addsd)
|
||||
MULTIGO2sd(mul, mulsd)
|
||||
MULTIGO2sd(sub, subsd)
|
||||
MULTIGO2sd(min, minsd)
|
||||
MULTIGO2sd(div, divsd)
|
||||
MULTIGO2sd(max, maxsd)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user