Files
box64/src/dynarec/arm64/dynarec_arm64_f0.c

1983 lines
86 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "box64cpu.h"
#include "emu/x64emu_private.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_native.h"
#include "arm64_printer.h"
#include "dynarec_arm64_private.h"
#include "../dynarec_helper.h"
#include "dynarec_arm64_functions.h"
uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)
{
(void)ip; (void)need_epilog;
uint8_t opcode = F8;
uint8_t nextop;
uint8_t gd, ed, u8;
uint8_t wback, wb1, wb2, eb1, eb2, gb1, gb2;
int32_t i32;
int64_t i64, j64;
int64_t fixedaddress;
int unscaled, mask;
MAYUSE(eb1);
MAYUSE(eb2);
MAYUSE(gb1);
MAYUSE(gb2);
MAYUSE(wb1);
MAYUSE(wb2);
MAYUSE(j64);
while((opcode==0xF2) || (opcode==0xF3)) {
rep = opcode-0xF1;
opcode = F8;
}
while((opcode==0x36) || (opcode==0x2e) || (opcode==0x3E) || (opcode==0x26)) {
opcode = F8;
}
GETREX();
//SKIPTEST(x1); // DYNAREC_TEST doesn't work, by nature, on atomic opration
switch(opcode) {
case 0x00:
INST_NAME("LOCK ADD Eb, Gb");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGB(x2);
if (MODREG) {
if(rex.rex) {
wback = TO_NAT((nextop & 0x07) + (rex.b << 3));
wb2 = 0;
} else {
wback = (nextop&7);
wb2 = (wback>>2);
wback = TO_NAT(wback & 3);
}
UBFXw(x1, wback, wb2*8, 8);
emit_add8(dyn, ninst, x1, x2, x4, x3);
BFIx(wback, x1, wb2*8, 8);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(cpuext.atomics) {
UFLAG_IF {
LDADDALB(x2, x1, wback);
emit_add8(dyn, ninst, x1, x2, x4, x5);
} else {
STADDLB(x2, wback);
}
} else {
MARKLOCK;
LDAXRB(x1, wback);
emit_add8(dyn, ninst, x1, x2, x4, x5);
STLXRB(x4, x1, wback);
CBNZx_MARKLOCK(x4);
}
}
break;
case 0x01:
INST_NAME("LOCK ADD Ed, Gd");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGD;
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
emit_add32(dyn, ninst, rex, ed, gd, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(!ALIGNED_ATOMICxw) {
if(cpuext.uscat) {
ANDx_mask(x1, wback, 1, 0, 3); // mask = F
CMPSw_U12(x1, 16-(1<<(2+rex.w)));
B_MARK(cGT);
} else {
TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7
B_MARK(cNE);
}
}
if(cpuext.atomics) {
UFLAG_IF {
LDADDALxw(gd, x1, wback);
} else {
STADDLxw(gd, wback);
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
ADDxw_REG(x4, x1, gd);
STLXRxw(x3, x4, wback);
CBNZx_MARKLOCK(x3);
}
if(!ALIGNED_ATOMICxw) {
B_MARK2_nocond;
MARK; // unaligned! also, not enough
LDRxw_U12(x1, wback, 0);
LDAXRB(x4, wback);
SUBxw_UXTB(x4, x4, x1); // substract with the byte only
CBNZw_MARK(x4); // jump if different
ADDxw_REG(x4, x1, gd);
STLXRB(x3, x4, wback);
CBNZx_MARK(x3);
STRxw_U12(x4, wback, 0); // put the whole value
}
MARK2;
UFLAG_IF {
emit_add32(dyn, ninst, rex, x1, gd, x3, x4);
}
}
break;
case 0x08:
INST_NAME("LOCK OR Eb, Gb");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGB(x2);
if (MODREG) {
if(rex.rex) {
wback = TO_NAT((nextop & 0x07) + (rex.b << 3));
wb2 = 0;
} else {
wback = (nextop&7);
wb2 = (wback>>2);
wback = TO_NAT(wback & 3);
}
UBFXw(x1, wback, wb2*8, 8);
emit_or8(dyn, ninst, x1, x2, x4, x3);
BFIx(wback, x1, wb2*8, 8);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(cpuext.atomics) {
LDSETALB(x2, x1, wback);
UFLAG_IF {
emit_or8(dyn, ninst, x1, x2, x4, x5);
}
} else {
MARKLOCK;
LDAXRB(x1, wback);
emit_or8(dyn, ninst, x1, x2, x4, x5);
STLXRB(x4, x1, wback);
CBNZx_MARKLOCK(x4);
}
}
break;
case 0x09:
INST_NAME("LOCK OR Ed, Gd");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGD;
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
emit_or32(dyn, ninst, rex, ed, gd, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(cpuext.atomics) {
LDSETALxw(gd, x1, wback);
UFLAG_IF {
emit_or32(dyn, ninst, rex, x1, gd, x3, x4);
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
emit_or32(dyn, ninst, rex, x1, gd, x3, x4);
STLXRxw(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
case 0x0F:
nextop = F8;
switch(nextop) {
case 0xAB:
INST_NAME("LOCK BTS Ed, Gd");
if(!BOX64ENV(dynarec_safeflags)) {
SETFLAGS(X_ALL&~X_ZF, SF_SUBSET);
} else {
SETFLAGS(X_CF, SF_SUBSET);
}
nextop = F8;
GETGD;
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
wback = 0;
if(rex.w) {
ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f
} else {
ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f
}
LSRxw_REG(x4, ed, x2);
if(rex.w) {
ANDSx_mask(x4, x4, 1, 0, 0); //mask=1
} else {
ANDSw_mask(x4, x4, 0, 0); //mask=1
}
IFX(X_CF) {
BFIw(xFlags, x4, F_CF, 1);
}
MOV32w(x4, 1);
LSLxw_REG(x4, x4, x2);
ORRxw_REG(ed, ed, x4);
} else {
// Will fetch only 1 byte, to avoid alignment issue
ANDw_mask(x2, gd, 0, 0b00010); //mask=0x000000007
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
ASRxw(x1, gd, 3); // r1 = (gd>>3)
if(!rex.w && !rex.is32bits) {SXTWx(x1, x1);}
ADDz_REG_LSL(x3, wback, x1, 0); //(&ed)+=r1;
ed = x1;
wback = x3;
MOV32w(x5, 1);
if(cpuext.atomics) {
LSLw_REG(x4, x5, x2);
LDSETALB(x4, x4, wback);
IFX(X_CF) {
LSRw_REG(x4, x4, x2);
BFIw(xFlags, x4, F_CF, 1);
}
} else {
MARKLOCK;
LDAXRB(ed, wback);
LSRw_REG(x4, ed, x2);
IFX(X_CF) {
BFIw(xFlags, x4, F_CF, 1);
}
LSLw_REG(x4, x5, x2);
ORRw_REG(ed, ed, x4);
STLXRB(x4, ed, wback);
CBNZw_MARKLOCK(x4);
}
}
break;
case 0xB0:
switch(rep) {
case 0:
INST_NAME("LOCK CMPXCHG Eb, Gb");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGB(x1);
UBFXx(x6, xRAX, 0, 8);
if(MODREG) {
if(rex.rex) {
wback = TO_NAT((nextop & 7) + (rex.b << 3));
wb2 = 0;
} else {
wback = (nextop&7);
wb2 = (wback>>2)*8;
wback = TO_NAT(wback & 3);
}
UBFXx(x2, wback, wb2, 8);
wb1 = 0;
ed = x2;
UFLAG_IF {emit_cmp8(dyn, ninst, x6, ed, x3, x4, x5);}
SUBxw_REG(x6, x6, x2);
CBNZxw_MARK2(x6);
BFIx(wback, gd, wb2, 8);
MARK2;
BFIx(xRAX, x2, 0, 8);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(cpuext.atomics) {
UFLAG_IF {
MOVw_REG(x2, x6);
CASALB(x6, gd, wback);
emit_cmp8(dyn, ninst, x2, x6, x3, x4, x5);
} else {
CASALB(x6, gd, wback);
}
BFIx(xRAX, x6, 0, 8);
} else {
MARKLOCK;
LDAXRB(x2, wback);
CMPSxw_REG(x6, x2);
B_MARK(cNE);
// EAX == Ed
STLXRB(x4, gd, wback);
CBNZx_MARKLOCK(x4);
// done
MARK;
UFLAG_IF {emit_cmp8(dyn, ninst, x6, x2, x3, x4, x5);}
BFIx(xRAX, x2, 0, 8);
}
}
break;
default:
DEFAULT;
}
break;
case 0xB1:
switch(rep) {
case 0:
INST_NAME("LOCK CMPXCHG Ed, Gd");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGD;
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
wback = 0;
UFLAG_IF {
emit_cmp32(dyn, ninst, rex, xRAX, ed, x3, x4, x5);
} else {
CMPSxw_REG(xRAX, ed);
}
MOVxw_REG(x1, ed); // save value
Bcond(cNE, 4 + (rex.w ? 4 : 8));
MOVxw_REG(ed, gd);
if (!rex.w) { B_NEXT_nocond; }
MOVxw_REG(xRAX, x1);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
UFLAG_IF { MOVxw_REG(x6, xRAX); }
if(!ALIGNED_ATOMICxw) {
if(cpuext.uscat) {
ANDx_mask(x1, wback, 1, 0, 3); // mask = F
CMPSw_U12(x1, 16-(1<<(2+rex.w)));
B_MARK3(cGT);
} else {
TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7
B_MARK3(cNE);
}
}
// Aligned version
// disabling use of atomics for now, as it seems to make (at least)
// HorizonZeroDawn and Cyberpunk2077 (both from GoG) unstable
// but why?!
if (rex.w /* RAX should NOT be zero-upped if equal */ && cpuext.atomics && 0) {
UFLAG_IF {
MOVxw_REG(x1, xRAX);
CASALxw(x1, gd, wback);
MOVxw_REG(xRAX, x1);
if (!ALIGNED_ATOMICxw) {
B_MARK_nocond;
}
} else {
CASALxw(xRAX, gd, wback);
if (!ALIGNED_ATOMICxw) {
B_NEXT_nocond;
}
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
CMPSxw_REG(xRAX, x1);
Bcond(cNE, 4 + (rex.w ? 8 : 12));
// EAX == Ed
STLXRxw(x4, gd, wback);
CBNZx_MARKLOCK(x4);
// done
if (!rex.w) { B_MARK_nocond; }
MOVxw_REG(xRAX, x1);
if (!ALIGNED_ATOMICxw) {
B_MARK_nocond;
}
}
if(!ALIGNED_ATOMICxw) {
// Unaligned version
MARK3;
LDRxw_U12(x1, wback, 0);
LDAXRB(x3, wback); // dummy read, to arm the write...
SUBxw_UXTB(x3, x3, x1);
CBNZw_MARK3(x3);
CMPSxw_REG(xRAX, x1);
Bcond(cNE, 4 + (rex.w ? 12 : 16));
// EAX == Ed
STLXRB(x4, gd, wback);
CBNZx_MARK3(x4);
STRxw_U12(gd, wback, 0);
if (!rex.w) { B_MARK_nocond; }
MOVxw_REG(xRAX, x1);
}
MARK;
UFLAG_IF {
emit_cmp32(dyn, ninst, rex, x6, x1, x3, x4, x5);
}
}
break;
default:
DEFAULT;
}
break;
case 0xB3:
INST_NAME("LOCK BTR Ed, Gd");
if(!BOX64ENV(dynarec_safeflags)) {
SETFLAGS(X_ALL&~X_ZF, SF_SUBSET);
} else {
SETFLAGS(X_CF, SF_SUBSET);
}
nextop = F8;
GETGD;
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
wback = 0;
if(rex.w) {
ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f
} else {
ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f
}
IFX(X_CF) {
LSRxw_REG(x4, ed, x2);
BFIw(xFlags, x4, F_CF, 1);
}
MOV32w(x4, 1);
LSLxw_REG(x4, x4, x2);
BICxw_REG(ed, ed, x4);
} else {
// Will fetch only 1 byte, to avoid alignment issue
ANDw_mask(x2, gd, 0, 0b00010); //mask=0x000000007
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
ASRx(x1, gd, 3); // r1 = (gd>>3), there might be an issue for negative 32bits values here
if(!rex.w && !rex.is32bits) {SXTWx(x1, x1);}
ADDz_REG_LSL(x3, wback, x1, 0); //(&ed)+=r1;
ed = x1;
wback = x3;
MOV32w(x5, 1);
if(cpuext.atomics) {
LSLw_REG(x4, x5, x2);
LDCLRALB(x4, x4, wback);
IFX(X_CF) {
LSRw_REG(x4, x4, x2);
BFIw(xFlags, x4, F_CF, 1);
}
} else {
MARKLOCK;
LDAXRB(ed, wback);
IFX(X_CF) {
LSRw_REG(x4, ed, x2);
BFIw(xFlags, x4, F_CF, 1);
}
LSLw_REG(x4, x5, x2);
BICw_REG(ed, ed, x4);
STLXRB(x4, ed, wback);
CBNZw_MARKLOCK(x4);
}
}
break;
case 0xBA:
nextop = F8;
switch((nextop>>3)&7) {
case 4:
INST_NAME("LOCK BT Ed, Ib");
if(!BOX64ENV(dynarec_safeflags)) {
SETFLAGS(X_ALL&~X_ZF, SF_SUBSET);
} else {
SETFLAGS(X_CF, SF_SUBSET);
}
gd = x2;
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
u8 = F8;
u8&=rex.w?0x3f:0x1f;
IFX(X_CF) {
BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
} else {
// Will fetch only 1 byte, to avoid alignment issue
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1);
u8 = F8;
if(u8>>3) {
ADDx_U12(x3, wback, u8>>3);
wback = x3;
}
MARKLOCK;
LDAXRB(x1, wback);
ed = x1;
wback = x3;
IFX(X_CF) {
BFXILxw(xFlags, x1, u8&7, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
}
break;
case 5:
INST_NAME("LOCK BTS Ed, Ib");
if(!BOX64ENV(dynarec_safeflags)) {
SETFLAGS(X_ALL&~X_ZF, SF_SUBSET);
} else {
SETFLAGS(X_CF, SF_SUBSET);
}
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
wback = 0;
u8 = F8;
u8&=(rex.w?0x3f:0x1f);
IFX(X_CF) {
BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
mask = convert_bitmask_xw(1LL<<u8);
ORRxw_mask(ed, ed, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F);
} else {
// Will fetch only 1 byte, to avoid alignment issue
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1);
u8 = F8;
if(u8>>3) {
ADDx_U12(x3, wback, u8>>3);
wback = x3;
}
ed = x1;
if(cpuext.atomics) {
MOV32w(x4, 1<<(u8&7));
LDSETB(x4, x4, wback);
IFX(X_CF) {
BFXILw(xFlags, x4, u8&7, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
} else {
MARKLOCK;
LDAXRB(ed, wback);
IFX(X_CF) {
BFXILw(xFlags, ed, u8&7, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
mask = convert_bitmask_xw(1LL<<(u8&7));
ORRxw_mask(ed, ed, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F);
STLXRB(x4, ed, wback);
CBNZw_MARKLOCK(x4);
}
}
break;
case 6:
INST_NAME("LOCK BTR Ed, Ib");
if(!BOX64ENV(dynarec_safeflags)) {
SETFLAGS(X_ALL&~X_ZF, SF_SUBSET);
} else {
SETFLAGS(X_CF, SF_SUBSET);
}
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
wback = 0;
u8 = F8;
u8&=(rex.w?0x3f:0x1f);
IFX(X_CF) {
BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
BFCxw(ed, u8, 1);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1);
u8 = F8;
if(u8>>3) {
ADDx_U12(x3, wback, u8>>3);
wback = x3;
}
ed = x1;
if(cpuext.atomics) {
MOV32w(x4, 1<<(u8&7));
LDCLRALB(x4, x4, wback);
IFX(X_CF) {
BFXILw(xFlags, x4, u8&7, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
} else {
MARKLOCK;
LDAXRB(ed, wback);
IFX(X_CF) {
BFXILw(xFlags, ed, u8&7, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
BFCw(ed, u8&7, 1);
STLXRB(x4, ed, wback);
CBNZw_MARKLOCK(x4);
}
}
break;
case 7:
INST_NAME("LOCK BTC Ed, Ib");
if(!BOX64ENV(dynarec_safeflags)) {
SETFLAGS(X_ALL&~X_ZF, SF_SUBSET);
} else {
SETFLAGS(X_CF, SF_SUBSET);
}
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
wback = 0;
u8 = F8;
u8&=(rex.w?0x3f:0x1f);
IFX(X_CF) {
BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
MOV32w(x4, 1);
EORxw_REG_LSL(ed, ed, x4, u8);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1);
u8 = F8;
if(u8>>3) {
ADDx_U12(x3, wback, u8>>3);
wback = x3;
}
ed = x1;
if(cpuext.atomics) {
MOV32w(x4, 1<<(u8&7));
LDEORALB(x4, x4, wback);
IFX(X_CF) {
BFXILw(xFlags, x4, u8&7, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
} else {
MARKLOCK;
LDAXRB(ed, wback);
IFX(X_CF) {
BFXILw(xFlags, ed, u8&7, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0)
}
mask = convert_bitmask_xw(1LL<<(u8&7));
EORxw_mask(ed, ed, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F);
STLXRB(x4, ed, wback);
CBNZw_MARKLOCK(x4);
}
}
break;
default:
DEFAULT;
}
break;
case 0xBB:
INST_NAME("LOCK BTC Ed, Gd");
if(!BOX64ENV(dynarec_safeflags)) {
SETFLAGS(X_ALL&~X_ZF, SF_SUBSET);
} else {
SETFLAGS(X_CF, SF_SUBSET);
}
nextop = F8;
GETGD;
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
wback = 0;
if(rex.w) {
ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f
} else {
ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f
}
IFX(X_CF) {
LSRxw_REG(x4, ed, x2);
BFIw(xFlags, x4, F_CF, 1);
}
MOV32w(x4, 1);
LSLxw_REG(x4, x4, x2);
EORxw_REG(ed, ed, x4);
} else {
// Will fetch only 1 byte, to avoid alignment issue
ANDw_mask(x2, gd, 0, 0b00010); //mask=0x000000007
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
ASRx(x1, gd, 3); // r1 = (gd>>3), there might be an issue for negative 32bits values here
if(!rex.w && !rex.is32bits) {SXTWx(x1, x1);}
ADDz_REG_LSL(x3, wback, x1, 0); //(&ed)+=r1;
ed = x1;
wback = x3;
MOV32w(x5, 1);
if(cpuext.atomics) {
LSLw_REG(x4, x5, x2);
LDEORALB(x4, x4, wback);
IFX(X_CF) {
LSRw_REG(x4, x4, x2);
BFIw(xFlags, x4, F_CF, 1);
}
} else {
MARKLOCK;
LDAXRB(ed, wback);
IFX(X_CF) {
LSRw_REG(x4, ed, x2);
BFIw(xFlags, x4, F_CF, 1);
}
LSLw_REG(x4, x5, x2);
EORw_REG(ed, ed, x4);
STLXRB(x4, ed, wback);
CBNZw_MARKLOCK(x4);
}
}
break;
case 0xC0:
switch(rep) {
case 0:
INST_NAME("LOCK XADD Eb, Gb");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGB(x1);
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
GETEB(x2, 0);
gd = x2; ed = x1; // swap gd/ed
emit_add8(dyn, ninst, x1, x2, x4, x5);
GBBACK; // gb gets x2 (old ed)
EBBACK; // eb gets x1 (sum)
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(cpuext.atomics) {
UFLAG_IF {
MOVxw_REG(x3, gd);
LDADDALB(x3, gd, wback);
emit_add8(dyn, ninst, x3, gd, x4, x5);
} else {
LDADDALB(gd, gd, wback);
}
GBBACK;
} else {
MARKLOCK;
LDAXRB(x5, wback);
ADDw_REG(x4, x5, gd);
STLXRB(x3, x4, wback);
CBNZx_MARKLOCK(x3);
IFX(X_ALL|X_PEND) {
MOVxw_REG(x2, x5);
emit_add8(dyn, ninst, x2, gd, x3, x4);
}
BFIz(gb1, x5, gb2, 8);
}
}
break;
default:
DEFAULT;
}
break;
case 0xC1:
switch(rep) {
case 0:
INST_NAME("LOCK XADD Ed, Gd");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGD;
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
MOVxw_REG(x1, ed);
MOVxw_REG(ed, gd);
MOVxw_REG(gd, x1);
emit_add32(dyn, ninst, rex, ed, gd, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(!ALIGNED_ATOMICxw) {
if(cpuext.uscat) {
ANDx_mask(x1, wback, 1, 0, 3); // mask = F
CMPSw_U12(x1, 16-(1<<(2+rex.w)));
B_MARK(cGT);
} else {
TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7
B_MARK(cNE); // unaligned
}
}
if(cpuext.atomics) {
UFLAG_IF {
LDADDALxw(gd, x1, wback);
} else {
LDADDALxw(gd, gd, wback);
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
ADDxw_REG(x4, x1, gd);
STLXRxw(x3, x4, wback);
CBNZx_MARKLOCK(x3);
}
if(!ALIGNED_ATOMICxw) {
UFLAG_IF {
B_MARK2_nocond;
} else {
if(!cpuext.atomics) MOVxw_REG(gd, x1);
B_NEXT_nocond;
}
MARK;
LDRxw_U12(x1, wback, 0);
LDAXRB(x4, wback);
SUBxw_UXTB(x4, x4, x1);
CBNZw_MARK(x4);
ADDxw_REG(x4, x1, gd);
STLXRB(x3, x4, wback);
CBNZx_MARK(x3);
STRxw_U12(x4, wback, 0);
}
MARK2;
UFLAG_IF {
MOVxw_REG(x3, x1);
emit_add32(dyn, ninst, rex, x3, gd, x4, x5);
MOVxw_REG(gd, x1);
} else if(!cpuext.atomics || !ALIGNED_ATOMICxw) {
MOVxw_REG(gd, x1);
}
}
break;
default:
DEFAULT;
}
break;
case 0xC7:
// rep has no impact here
nextop = F8;
switch((nextop>>3)&7) {
case 1:
if (rex.w) {
INST_NAME("LOCK CMPXCHG16B Gq, Eq");
} else {
INST_NAME("LOCK CMPXCHG8B Gq, Eq");
}
SETFLAGS(X_ZF, SF_SUBSET);
addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(!ALIGNED_ATOMICxw) {
if(cpuext.uscat) {
if(rex.w) {
TSTx_mask(wback, 1, 0, 3);
B_MARK2(cNE);
} else {
ANDx_mask(x2, wback, 1, 0, 3); // mask = F
CMPSw_U12(x2, 8);
B_MARK2(cGT);
}
} else {
TSTx_mask(wback, 1, 0, 2+rex.w); // mask=7 or F
B_MARK2(cNE); // unaligned
}
}
if(cpuext.atomics) {
MOVx_REG(x2, xRAX);
MOVx_REG(x3, xRDX);
MOVx_REG(x4, xRBX);
MOVx_REG(x5, xRCX);
CASPALxw(x2, x4, wback);
UFLAG_IF {
CMPSxw_REG(x2, xRAX);
CCMPxw(x3, xRDX, 0, cEQ);
IFNATIVE(NF_EQ) {} else {CSETw(x1, cEQ);}
}
MOVx_REG(xRAX, x2);
MOVx_REG(xRDX, x3);
if(!ALIGNED_ATOMICxw) {
B_MARK3_nocond;
}
} else {
MARKLOCK;
LDAXPxw(x2, x3, wback);
CMPSxw_REG(xRAX, x2);
CCMPxw(xRDX, x3, 0, cEQ);
B_MARK(cNE); // EAX!=ED[0] || EDX!=Ed[1]
STLXPxw(x4, xRBX, xRCX, wback);
CBNZx_MARKLOCK(x4);
UFLAG_IF {
IFNATIVE(NF_EQ) {} else {MOV32w(x1, 1);}
}
B_MARK3_nocond;
MARK;
STLXPxw(x4, x2, x3, wback); // write back, to be sure it was "atomic"
CBNZx_MARKLOCK(x4);
MOVxw_REG(xRAX, x2);
MOVxw_REG(xRDX, x3);
UFLAG_IF {
IFNATIVE(NF_EQ) {} else {MOV32w(x1, 0);}
}
if(!ALIGNED_ATOMICxw) {
B_MARK3_nocond;
}
}
if(!ALIGNED_ATOMICxw) {
MARK2;
LDPxw_S7_offset(x2, x3, wback, 0);
LDAXRB(x5, wback);
SUBxw_UXTB(x5, x5, x2);
CBNZw_MARK2(x5);
CMPSxw_REG(xRAX, x2);
CCMPxw(xRDX, x3, 0, cEQ);
B_MARKSEG(cNE); // EAX!=ED[0] || EDX!=Ed[1]
STLXRB(x4, xRBX, wback);
CBNZx_MARK2(x4);
STPxw_S7_offset(xRBX, xRCX, wback, 0);
UFLAG_IF {
IFNATIVE(NF_EQ) {} else {MOV32w(x1, 1);}
}
B_MARK3_nocond;
MARKSEG;
STLXRB(x4, x5, wback); //write back
CBNZx_MARK2(x4);
MOVxw_REG(xRAX, x2);
MOVxw_REG(xRDX, x3);
UFLAG_IF {
IFNATIVE(NF_EQ) {} else {MOV32w(x1, 0);}
}
}
MARK3;
UFLAG_IF {
IFNATIVE(NF_EQ) {} else {BFIw(xFlags, x1, F_ZF, 1);}
}
break;
default:
DEFAULT;
}
break;
default:
DEFAULT;
}
break;
case 0x10:
INST_NAME("LOCK ADC Eb, Gb");
READFLAGS(X_CF);
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGB(x2);
if (MODREG) {
if(rex.rex) {
wback = TO_NAT((nextop & 0x07) + (rex.b << 3));
wb2 = 0;
} else {
wback = (nextop&7);
wb2 = (wback>>2);
wback = TO_NAT(wback & 3);
}
UBFXw(x1, wback, wb2*8, 8);
emit_adc8(dyn, ninst, x1, x2, x4, x5);
BFIx(wback, x1, wb2*8, 8);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
MARKLOCK;
LDAXRB(x1, wback);
emit_adc8(dyn, ninst, x1, x2, x4, x5);
STLXRB(x4, x1, wback);
CBNZx_MARKLOCK(x4);
}
break;
case 0x11:
INST_NAME("LOCK ADC Ed, Gd");
READFLAGS(X_CF);
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGD;
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
emit_adc32(dyn, ninst, rex, ed, gd, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
MARKLOCK;
LDAXRxw(x1, wback);
emit_adc32(dyn, ninst, rex, x1, gd, x4, x5);
STLXRxw(x4, x1, wback);
CBNZx_MARKLOCK(x4);
}
break;
case 0x20:
INST_NAME("LOCK AND Eb, Gb");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGD;
if(MODREG) {
GETEB(x1, 0);
GETGB(x2);
emit_and8(dyn, ninst, x1, x2, x4, x5);
EBBACK;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
GETGB(x5);
if(cpuext.atomics) {
MVNxw_REG(x1, gd);
UFLAG_IF {
LDCLRALB(x1, x1, wback);
emit_and8(dyn, ninst, x1, gd, x3, x4);
} else {
STCLRLB(x1, wback);
}
} else {
MARKLOCK;
LDAXRB(x1, wback);
emit_and8(dyn, ninst, x1, gd, x3, x4);
STLXRB(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
case 0x21:
INST_NAME("LOCK AND Ed, Gd");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGD;
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
emit_and32(dyn, ninst, rex, ed, gd, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(cpuext.atomics) {
MVNxw_REG(x1, gd);
UFLAG_IF {
LDCLRALxw(x1, x1, wback);
emit_and32(dyn, ninst, rex, x1, gd, x3, x4);
} else {
STCLRLxw(x1, wback);
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
emit_and32(dyn, ninst, rex, x1, gd, x3, x4);
STLXRxw(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
case 0x29:
INST_NAME("LOCK SUB Ed, Gd");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGD;
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
emit_sub32(dyn, ninst, rex, ed, gd, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(!ALIGNED_ATOMICxw) {
if(cpuext.uscat) {
ANDx_mask(x1, wback, 1, 0, 3); // mask = F
CMPSw_U12(x1, 16-(1<<(2+rex.w)));
B_MARK(cGT);
} else {
TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7
B_MARK(cNE);
}
}
if(cpuext.atomics && 0) { // disabled because 0x80000000 has no negative
NEGxw_REG(x1, gd);
UFLAG_IF {
LDADDALxw(x1, x1, wback);
} else {
STADDLxw(x1, wback);
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
SUBxw_REG(x4, x1, gd);
STLXRxw(x3, x4, wback);
CBNZx_MARKLOCK(x3);
}
if(!ALIGNED_ATOMICxw) {
UFLAG_IF {
B_MARK2_nocond;
} else {
B_NEXT_nocond;
}
MARK; // unaligned! also, not enough
LDRxw_U12(x1, wback, 0);
LDAXRB(x4, wback);
SUBxw_UXTB(x4, x4, x1);
CBNZw_MARK(x4);
SUBxw_REG(x4, x1, gd);
STLXRB(x3, x4, wback);
CBNZx_MARK(x3);
STRxw_U12(x4, wback, 0); // put the whole value
}
UFLAG_IF {
MARK2;
emit_sub32(dyn, ninst, rex, x1, gd, x3, x4);
}
}
break;
case 0x31:
INST_NAME("LOCK XOR Ed, Gd");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGD;
if (MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
emit_xor32(dyn, ninst, rex, ed, gd, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(!ALIGNED_ATOMICxw) {
if(cpuext.uscat) {
ANDx_mask(x1, wback, 1, 0, 3); // mask = F
CMPSw_U12(x1, 16-(1<<(2+rex.w)));
B_MARK(cGT);
} else {
TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7
B_MARK(cNE);
}
}
if(cpuext.atomics) {
UFLAG_IF {
LDEORALxw(gd, x1, wback);
} else {
STEORLxw(gd, wback);
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
EORxw_REG(x4, x1, gd);
STLXRxw(x3, x4, wback);
CBNZx_MARKLOCK(x3);
}
if(!ALIGNED_ATOMICxw) {
B_MARK2_nocond;
MARK; // unaligned! also, not enough
LDRxw_U12(x1, wback, 0);
LDAXRB(x4, wback);
SUBxw_UXTB(x4, x4, x1);
CBNZw_MARK(x4);
EORxw_REG(x4, x1, gd);
STLXRB(x3, x4, wback);
CBNZx_MARK(x3);
STRxw_U12(x4, wback, 0); // put the whole value
}
MARK2;
UFLAG_IF {
emit_xor32(dyn, ninst, rex, x1, gd, x3, x4);
}
}
break;
case 0x66:
return dynarec64_66F0(dyn, addr, ip, ninst, rex, rep, ok, need_epilog);
case 0x80:
nextop = F8;
switch((nextop>>3)&7) {
case 0: //ADD
INST_NAME("ADD Eb, Ib");
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
GETEB(x1, 1);
u8 = F8;
emit_add8c(dyn, ninst, x1, u8, x2, x4);
wb1 = 0;
EBBACK;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1);
u8 = F8;
wb1 = 1;
if(cpuext.atomics) {
MOV32w(x2, u8);
UFLAG_IF {
LDADDALB(x2, x1, wback);
emit_add8(dyn, ninst, x1, x2, x3, x4);
} else {
STADDB(x2, wback);
}
} else {
MARKLOCK;
LDAXRB(x1, wback);
emit_add8c(dyn, ninst, x1, u8, x2, x4);
STLXRB(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
case 1: //OR
INST_NAME("OR Eb, Ib");
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
GETEB(x1, 1);
u8 = F8;
emit_or8c(dyn, ninst, x1, u8, x2, x4);
wb1 = 0;
EBBACK;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1);
u8 = F8;
wb1 = 1;
if(cpuext.atomics) {
MOV32w(x2, u8);
UFLAG_IF {
LDSETALB(x2, x1, wback);
emit_or8(dyn, ninst, x1, x2, x3, x4);
} else {
STSETLB(x2, wback);
}
} else {
MARKLOCK;
LDAXRB(x1, wback);
emit_or8c(dyn, ninst, x1, u8, x2, x4);
STLXRB(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
case 2: //ADC
INST_NAME("ADC Eb, Ib");
READFLAGS(X_CF);
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
GETEB(x1, 1);
u8 = F8;
emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5);
wb1 = 0;
EBBACK;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1);
u8 = F8;
wb1 = 1;
MARKLOCK;
LDAXRB(x1, wback);
emit_adc8c(dyn, ninst, x1, u8, x2, x4, x3);
STLXRB(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
break;
case 3: //SBB
INST_NAME("SBB Eb, Ib");
READFLAGS(X_CF);
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
GETEB(x1, 1);
u8 = F8;
emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5);
wb1 = 0;
EBBACK;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1);
u8 = F8;
wb1 = 1;
MARKLOCK;
LDAXRB(x1, wback);
emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x3);
STLXRB(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
break;
case 4: //AND
INST_NAME("AND Eb, Ib");
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
GETEB(x1, 1);
u8 = F8;
emit_and8c(dyn, ninst, x1, u8, x2, x4);
wb1 = 0;
EBBACK;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1);
u8 = F8;
wb1 = 1;
if(cpuext.atomics) {
MOV32w(x2, ~u8);
UFLAG_IF {
LDCLRALB(x2, x1, wback);
emit_and8c(dyn, ninst, x1, u8, x2, x4);
} else {
STCLRLB(x2, wback);
}
} else {
MARKLOCK;
LDAXRB(x1, wback);
emit_and8c(dyn, ninst, x1, u8, x2, x4);
STLXRB(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
case 5: //SUB
INST_NAME("SUB Eb, Ib");
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
GETEB(x1, 1);
u8 = F8;
emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5);
wb1 = 0;
EBBACK;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1);
u8 = F8;
wb1 = 1;
if(cpuext.atomics) {
MOV32w(x2, -u8);
UFLAG_IF {
LDADDALB(x2, x1, wback);
emit_sub8c(dyn, ninst, x1, u8, x2, x4, x3);
} else {
STADDLB(x2, wback);
}
} else {
MARKLOCK;
LDAXRB(x1, wback);
emit_sub8c(dyn, ninst, x1, u8, x2, x4, x3);
STLXRB(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
case 6: //XOR
INST_NAME("XOR Eb, Ib");
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
GETEB(x1, 1);
u8 = F8;
emit_xor8c(dyn, ninst, x1, u8, x2, x4);
wb1 = 0;
EBBACK;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1);
u8 = F8;
wb1 = 1;
if(cpuext.atomics) {
MOV32w(x2, u8);
UFLAG_IF {
LDEORALB(x2, x1, wback);
emit_xor8(dyn, ninst, x1, x2, x3, x4);
} else {
STEORLB(x2, wback);
}
} else {
MARKLOCK;
LDAXRB(x1, wback);
emit_xor8c(dyn, ninst, x1, u8, x2, x4);
STLXRB(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
case 7: //CMP
INST_NAME("CMP Eb, Ib");
SETFLAGS(X_ALL, SF_SET_PENDING);
GETEB(x1, 1);
u8 = F8;
if(u8) {
MOV32w(x2, u8);
emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5);
} else {
emit_cmp8_0(dyn, ninst, x1, x3, x4);
}
break;
default:
DEFAULT;
}
break;
case 0x81:
case 0x83:
nextop = F8;
switch((nextop>>3)&7) {
case 0: //ADD
if(opcode==0x81) {
INST_NAME("LOCK ADD Ed, Id");
} else {
INST_NAME("LOCK ADD Ed, Ib");
}
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
if(opcode==0x81) i64 = F32S; else i64 = F8S;
ed = TO_NAT((nextop & 7) + (rex.b << 3));
MOV64xw(x5, i64);
emit_add32(dyn, ninst, rex, ed, x5, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
if(opcode==0x81) i64 = F32S; else i64 = F8S;
if((i64<=-0x1000) || (i64>=0x1000)) {
MOV64xw(x5, i64);
}
if(!ALIGNED_ATOMICxw) {
if(cpuext.uscat) {
ANDx_mask(x1, wback, 1, 0, 3); // mask = F
CMPSw_U12(x1, 16-(1<<(2+rex.w)));
B_MARK(cGT);
} else {
TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7
B_MARK(cNE);
}
}
if(cpuext.atomics) {
if((i64>-0x1000) && (i64<0x1000)) {
MOV64xw(x5, i64);
}
UFLAG_IF {
LDADDALxw(x5, x1, wback);
} else {
STADDLxw(x5, wback);
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
if(i64>=0 && i64<0x1000) {
ADDxw_U12(x4, x1, i64);
} else if(i64<0 && i64>-0x1000) {
SUBxw_U12(x4, x1, -i64);
} else {
ADDxw_REG(x4, x1, x5);
}
STLXRxw(x3, x4, wback);
CBNZx_MARKLOCK(x3);
}
if(!ALIGNED_ATOMICxw) {
B_MARK2_nocond;
MARK; // unaligned! also, not enough
LDRxw_U12(x1, wback, 0);
LDAXRB(x4, wback);
SUBxw_UXTB(x4, x4, x1);
CBNZw_MARK(x4);
if(i64>=0 && i64<0x1000) {
ADDxw_U12(x4, x1, i64);
} else if(i64<0 && i64>-0x1000) {
SUBxw_U12(x4, x1, -i64);
} else {
ADDxw_REG(x4, x1, x5);
}
STLXRB(x3, x4, wback);
CBNZx_MARK(x3);
STRxw_U12(x4, wback, 0); // put the whole value
}
MARK2;
UFLAG_IF {
if((i64<=-0x1000) || (i64>=0x1000)) {
emit_add32(dyn, ninst, rex, x1, x5, x3, x4);
} else {
emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5);
}
}
}
break;
case 1: //OR
if(opcode==0x81) {INST_NAME("LOCK OR Ed, Id");} else {INST_NAME("LOCK OR Ed, Ib");}
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
if(opcode==0x81) i64 = F32S; else i64 = F8S;
ed = TO_NAT((nextop & 7) + (rex.b << 3));
emit_or32c(dyn, ninst, rex, ed, i64, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
if(opcode==0x81) i64 = F32S; else i64 = F8S;
if(wback==xRSP && !i64) {
// this is __faststorefence
DMB_ST();
} else {
if(cpuext.atomics) {
MOV64xw(x5, i64);
UFLAG_IF {
LDSETALxw(x5, x1, wback);
emit_or32(dyn, ninst, rex, x1, x5, x3, x4);
} else {
STSETLxw(x5, wback);
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
emit_or32c(dyn, ninst, rex, x1, i64, x3, x4);
STLXRxw(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
}
break;
case 2: //ADC
if(opcode==0x81) {INST_NAME("LOCK ADC Ed, Id");} else {INST_NAME("LOCK ADC Ed, Ib");}
READFLAGS(X_CF);
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
if(opcode==0x81) i64 = F32S; else i64 = F8S;
ed = TO_NAT((nextop & 7) + (rex.b << 3));
MOV64xw(x5, i64);
emit_adc32(dyn, ninst, rex, ed, x5, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
if(opcode==0x81) i64 = F32S; else i64 = F8S;
MOV64xw(x5, i64);
MARKLOCK;
LDAXRxw(x1, wback);
emit_adc32(dyn, ninst, rex, x1, x5, x3, x4);
STLXRxw(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
break;
case 3: //SBB
if(opcode==0x81) {INST_NAME("LOCK SBB Ed, Id");} else {INST_NAME("LOCK SBB Ed, Ib");}
READFLAGS(X_CF);
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
if(opcode==0x81) i64 = F32S; else i64 = F8S;
ed = TO_NAT((nextop & 7) + (rex.b << 3));
MOV64xw(x5, i64);
emit_sbb32(dyn, ninst, rex, ed, x5, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
if(opcode==0x81) i64 = F32S; else i64 = F8S;
MOV64xw(x5, i64);
MARKLOCK;
LDAXRxw(x1, wback);
emit_sbb32(dyn, ninst, rex, x1, x5, x3, x4);
STLXRxw(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
break;
case 4: //AND
if(opcode==0x81) {INST_NAME("LOCK AND Ed, Id");} else {INST_NAME("LOCK AND Ed, Ib");}
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
if(opcode==0x81) i64 = F32S; else i64 = F8S;
ed = TO_NAT((nextop & 7) + (rex.b << 3));
MOV64xw(x5, i64);
emit_and32(dyn, ninst, rex, ed, x5, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
if(opcode==0x81) i64 = F32S; else i64 = F8S;
if(cpuext.atomics) {
MOV64xw(x5, ~i64);
UFLAG_IF {
LDCLRALxw(x5, x1, wback);
MVNxw_REG(x5, x5);
emit_and32(dyn, ninst, rex, x1, x5, x3, x4);
} else {
STCLRLxw(x5, wback);
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
emit_and32c(dyn, ninst, rex, x1, i64, x3, x4);
STLXRxw(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
case 5: //SUB
if(opcode==0x81) {INST_NAME("LOCK SUB Ed, Id");} else {INST_NAME("LOCK SUB Ed, Ib");}
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
if(opcode==0x81) i64 = F32S; else i64 = F8S;
ed = TO_NAT((nextop & 7) + (rex.b << 3));
emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
if(opcode==0x81) i64 = F32S; else i64 = F8S;
if((i64<=-0x1000) || (i64>=0x1000)) {
MOV64xw(x5, i64);
}
if(!ALIGNED_ATOMICxw) {
if(cpuext.uscat) {
ANDx_mask(x1, wback, 1, 0, 3); // mask = F
CMPSw_U12(x1, 16-(1<<(2+rex.w)));
B_MARK(cGT);
} else {
TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7
B_MARK(cNE);
}
}
if(cpuext.atomics) {
if((i64>-0x1000) && (i64<0x1000)) {
MOV64xw(x5, -i64);
} else {
NEGxw_REG(x5, x5);
}
UFLAG_IF {
LDADDALxw(x5, x1, wback);
if((i64<=-0x1000) || (i64>=0x1000))
NEGxw_REG(x5, x5);
} else {
STADDLxw(x5, wback);
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
if(i64>=0 && i64<0x1000) {
SUBxw_U12(x4, x1, i64);
} else if(i64<0 && i64>-0x1000) {
ADDxw_U12(x4, x1, -i64);
} else {
SUBxw_REG(x4, x1, x5);
}
STLXRxw(x3, x4, wback);
CBNZx_MARKLOCK(x3);
}
if(!ALIGNED_ATOMICxw) {
UFLAG_IF {
B_MARK2_nocond;
} else {
B_NEXT_nocond;
}
MARK; // unaligned! also, not enough
LDRxw_U12(x1, wback, 0);
LDAXRB(x4, wback);
SUBxw_UXTB(x4, x4, x1);
CBNZw_MARK(x4);
if(i64>=0 && i64<0x1000) {
SUBxw_U12(x4, x1, i64);
} else if(i64<0 && i64>-0x1000) {
ADDxw_U12(x4, x1, -i64);
} else {
SUBxw_REG(x4, x1, x5);
}
STLXRB(x3, x4, wback);
CBNZx_MARK(x3);
STRxw_U12(x4, wback, 0); // put the whole value
}
UFLAG_IF {
MARK2;
if((i64<=-0x1000) || (i64>=0x1000)) {
emit_sub32(dyn, ninst, rex, x1, x5, x3, x4);
} else {
emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5);
}
}
}
break;
case 6: //XOR
if(opcode==0x81) {INST_NAME("LOCK XOR Ed, Id");} else {INST_NAME("LOCK XOR Ed, Ib");}
SETFLAGS(X_ALL, SF_SET_PENDING);
if(MODREG) {
if(opcode==0x81) i64 = F32S; else i64 = F8S;
ed = TO_NAT((nextop & 7) + (rex.b << 3));
MOV64xw(x5, i64);
emit_xor32(dyn, ninst, rex, ed, x5, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
if(opcode==0x81) i64 = F32S; else i64 = F8S;
if(cpuext.atomics) {
MOV64xw(x5, i64);
UFLAG_IF {
LDEORALxw(x5, x1, wback);
emit_xor32(dyn, ninst, rex, x1, x5, x3, x4);
} else {
STEORLxw(x5, wback);
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
emit_xor32c(dyn, ninst, rex, x1, i64, x3, x4);
STLXRxw(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
case 7: //CMP
if(opcode==0x81) {INST_NAME("(LOCK) CMP Ed, Id");} else {INST_NAME("(LOCK) CMP Ed, Ib");}
SETFLAGS(X_ALL, SF_SET_PENDING);
GETED((opcode==0x81)?4:1);
// No need to LOCK, this is readonly
if(opcode==0x81) i64 = F32S; else i64 = F8S;
if(i64) {
MOV64xw(x5, i64);
emit_cmp32(dyn, ninst, rex, ed, x5, x3, x4, x6);
} else {
emit_cmp32_0(dyn, ninst, rex, ed, x3, x4);
}
break;
}
break;
case 0x86:
INST_NAME("LOCK XCHG Eb, Gb");
// Do the swap
nextop = F8;
if(MODREG) {
GETGB(x4);
if(rex.rex) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
eb1 = ed;
eb2 = 0;
} else {
ed = (nextop&7);
eb1 = TO_NAT(ed & 3);
eb2 = ((ed&4)<<1);
}
UBFXw(x1, eb1, eb2, 8);
// do the swap 14 -> ed, 1 -> gd
BFIx(gb1, x1, gb2, 8);
BFIx(eb1, x4, eb2, 8);
} else {
GETGB(x4);
addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(cpuext.atomics) {
SWPALB(x4, x1, ed);
} else {
MARKLOCK;
// do the swap with exclusive locking
LDAXRB(x1, ed);
// do the swap 14 -> strb(ed), 1 -> gd
STLXRB(x3, x4, ed);
CBNZx_MARKLOCK(x3);
}
BFIx(gb1, x1, gb2, 8);
}
break;
case 0x87:
INST_NAME("LOCK XCHG Ed, Gd");
nextop = F8;
if(MODREG) {
GETGD;
GETED(0);
MOVxw_REG(x1, gd);
MOVxw_REG(gd, ed);
MOVxw_REG(ed, x1);
} else {
GETGD;
addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(!ALIGNED_ATOMICxw) {
if(cpuext.uscat) {
ANDx_mask(x1, ed, 1, 0, 3); // mask = F
CMPSw_U12(x1, 16-(1<<(2+rex.w)));
B_MARK(cGT);
} else {
TSTx_mask(ed, 1, 0, 1+rex.w); // mask=3 or 7
B_MARK(cNE);
}
}
if(cpuext.atomics) {
SWPALxw(gd, gd, ed);
if(!ALIGNED_ATOMICxw) {
B_NEXT_nocond;
}
} else {
MARKLOCK;
LDAXRxw(x1, ed);
STLXRxw(x3, gd, ed);
CBNZx_MARKLOCK(x3);
if(!ALIGNED_ATOMICxw) {
B_MARK2_nocond;
}
}
if(!ALIGNED_ATOMICxw) {
MARK;
LDRxw_U12(x1, ed, 0);
LDAXRB(x4, ed);
SUBxw_UXTB(x4, x4, x1);
CBNZw_MARK(x4);
STLXRB(x3, gd, ed);
CBNZx_MARK(x3);
STRxw_U12(gd, ed, 0);
MARK2;
}
if(!ALIGNED_ATOMICxw || !cpuext.atomics) {
MOVxw_REG(gd, x1);
}
}
break;
case 0xF6:
nextop = F8;
switch((nextop>>3)&7) {
case 0:
case 1:
INST_NAME("LOCK TEST Eb, Ib");
SETFLAGS(X_ALL, SF_SET_PENDING);
GETEB(x1, 1);
u8 = F8;
emit_test8c(dyn, ninst, x1, u8, x3, x4, x5);
break;
case 2:
INST_NAME("LOCK NOT Eb");
if(MODREG) {
GETEB(x1, 0);
MVNw_REG(x1, x1);
EBBACK;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(cpuext.atomics) {
MOV32w(x1, 0xff);
STEORLB(x1, wback);
} else {
MARKLOCK;
LDAXRB(x1, wback);
MVNw_REG(x1, x1);
STLXRB(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
default:
DEFAULT;
}
break;
case 0xF7:
nextop = F8;
switch((nextop>>3)&7) {
case 2:
INST_NAME("LOCK NOT Ed");
if(MODREG) {
GETED(x1);
MVNw_REG(x1, x1);
EBBACK;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(cpuext.atomics) {
MOV64x(x1, ~0LL);
STEORLxw(x1, wback);
} else {
MARKLOCK;
LDAXRxw(x1, wback);
MVNw_REG(x1, x1);
STLXRxw(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
default:
DEFAULT;
}
break;
case 0xFE:
nextop = F8;
switch((nextop>>3)&7)
{
case 0: // INC Eb
INST_NAME("LOCK INC Eb");
SETFLAGS(X_ALL&~X_CF, SF_SUBSET);
if(MODREG) {
GETEB(x1, 0);
emit_inc8(dyn, ninst, x1, x2, x4);
EBBACK;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
MARKLOCK;
if(cpuext.atomics) {
MOV32w(x3, 1);
UFLAG_IF {
LDADDALB(x3, x1, wback);
emit_inc8(dyn, ninst, x1, x3, x4);
} else {
STADDLB(x3, wback);
}
} else {
LDAXRB(x1, wback);
emit_inc8(dyn, ninst, x1, x3, x4);
STLXRB(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
case 1: //DEC Eb
INST_NAME("LOCK DEC Eb");
SETFLAGS(X_ALL&~X_CF, SF_SUBSET);
if(MODREG) {
GETEB(x1, 0);
emit_dec8(dyn, ninst, x1, x2, x4);
EBBACK;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(cpuext.atomics) {
MOV32w(x3, -1);
UFLAG_IF {
LDADDALB(x3, x1, wback);
emit_dec8(dyn, ninst, x1, x3, x4);
} else {
STADDLB(x3, wback);
}
} else {
MARKLOCK;
LDAXRB(x1, wback);
emit_dec8(dyn, ninst, x1, x3, x4);
STLXRB(x3, x1, wback);
CBNZx_MARKLOCK(x3);
}
}
break;
default:
DEFAULT;
}
break;
case 0xFF:
nextop = F8;
switch((nextop>>3)&7)
{
case 0: // INC Ed
INST_NAME("LOCK INC Ed");
SETFLAGS(X_ALL&~X_CF, SF_SUBSET);
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
emit_inc32(dyn, ninst, rex, ed, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(!ALIGNED_ATOMICxw) {
if(cpuext.uscat) {
ANDx_mask(x1, wback, 1, 0, 3); // mask = F
CMPSw_U12(x1, 16-(1<<(2+rex.w)));
B_MARK(cGT);
} else {
TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7
B_MARK(cNE); // unaligned
}
}
if(cpuext.atomics) {
MOV32w(x3, 1);
UFLAG_IF {
LDADDALxw(x3, x1, wback);
} else {
STADDLxw(x3, wback);
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
ADDxw_U12(x4, x1, 1);
STLXRxw(x3, x4, wback);
CBNZx_MARKLOCK(x3);
}
if(!ALIGNED_ATOMICxw) {
UFLAG_IF {
B_MARK2_nocond;
} else {
B_NEXT_nocond;
}
MARK;
LDRxw_U12(x1, wback, 0);
LDAXRB(x4, wback);
SUBxw_UXTB(x4, x4, x1);
CBNZw_MARK(x4);
ADDxw_U12(x4, x1, 1);
STLXRB(x3, x4, wback);
CBNZw_MARK(x3);
STRxw_U12(x4, wback, 0);
}
UFLAG_IF {
MARK2;
emit_inc32(dyn, ninst, rex, x1, x3, x4);
}
}
break;
case 1: //DEC Ed
INST_NAME("LOCK DEC Ed");
SETFLAGS(X_ALL&~X_CF, SF_SUBSET);
if(MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
emit_dec32(dyn, ninst, rex, ed, x3, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
if(!ALIGNED_ATOMICxw) {
if(cpuext.uscat) {
ANDx_mask(x1, wback, 1, 0, 3); // mask = F
CMPSw_U12(x1, 16-(1<<(2+rex.w)));
B_MARK(cGT);
} else {
TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7
B_MARK(cNE); // unaligned
}
}
if(cpuext.atomics) {
MOV64xw(x3, -1);
UFLAG_IF {
LDADDALxw(x3, x1, wback);
} else {
STADDLxw(x3, wback);
}
} else {
MARKLOCK;
LDAXRxw(x1, wback);
SUBxw_U12(x4, x1, 1);
STLXRxw(x3, x4, wback);
CBNZx_MARKLOCK(x3);
}
if(!ALIGNED_ATOMICxw) {
UFLAG_IF {
B_MARK2_nocond;
} else {
B_NEXT_nocond;
}
MARK;
LDRxw_U12(x1, wback, 0);
LDAXRB(x4, wback);
SUBxw_UXTB(x4, x4, x1);
CBNZw_MARK(x4);
SUBxw_U12(x4, x1, 1);
STLXRB(x3, x4, wback);
CBNZw_MARK(x3);
STRxw_U12(x4, wback, 0);
}
UFLAG_IF {
MARK2;
emit_dec32(dyn, ninst, rex, x1, x3, x4);
}
}
break;
default:
DEFAULT;
}
break;
default:
DEFAULT;
}
return addr;
}