box64/src/dynarec/arm64/dynarec_arm64_functions.c

1173 lines
48 KiB
C

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <math.h>
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#include "debug.h"
#include "box64context.h"
#include "box64cpu.h"
#include "emu/x64emu_private.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "emu/x87emu_private.h"
#include "x64trace.h"
#include "dynarec_native.h"
#include "dynarec_arm64_private.h"
#include "dynarec_arm64_functions.h"
#include "custommem.h"
#include "bridge.h"
#include "gdbjit.h"
#include "perfmap.h"
// Get a FPU scratch reg
int fpu_get_scratch(dynarec_arm_t* dyn, int ninst)
{
int ret = SCRATCH0 + dyn->n.fpu_scratch++;
if(dyn->n.ymm_used) printf_log(LOG_INFO, "Warning, getting a scratch register after getting some YMM at inst=%d\n", ninst);
if(dyn->n.neoncache[ret].t==NEON_CACHE_YMMR || dyn->n.neoncache[ret].t==NEON_CACHE_YMMW) {
// should only happens in step 0...
dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[ret].n); // mark as purged
dyn->n.neoncache[ret].v = 0; // reset it
}
return ret;
}
// Get 2 consicutive FPU scratch reg
int fpu_get_double_scratch(dynarec_arm_t* dyn, int ninst)
{
int ret = SCRATCH0 + dyn->n.fpu_scratch;
if(dyn->n.ymm_used) printf_log(LOG_INFO, "Warning, getting a double scratch register after getting some YMM at inst=%d\n", ninst);
if(dyn->n.neoncache[ret].t==NEON_CACHE_YMMR || dyn->n.neoncache[ret].t==NEON_CACHE_YMMW) {
// should only happens in step 0...
dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[ret].n); // mark as purged
dyn->n.neoncache[ret].v = 0; // reset it
}
if(dyn->n.neoncache[ret+1].t==NEON_CACHE_YMMR || dyn->n.neoncache[ret+1].t==NEON_CACHE_YMMW) {
// should only happens in step 0...
dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[ret+1].n); // mark as purged
dyn->n.neoncache[ret+1].v = 0; // reset it
}
dyn->n.fpu_scratch+=2;
return ret;
}
// Reset scratch regs counter
void fpu_reset_scratch(dynarec_arm_t* dyn)
{
dyn->n.fpu_scratch = 0;
dyn->n.ymm_used = 0;
dyn->n.ymm_regs = 0;
dyn->n.ymm_write = 0;
dyn->n.ymm_removed = 0;
dyn->n.xmm_write = 0;
dyn->n.xmm_removed = 0;
}
// Get a x87 double reg
int fpu_get_reg_x87(dynarec_arm_t* dyn, int ninst, int t, int n)
{
int i=X870;
while (dyn->n.fpuused[i]) ++i;
if(dyn->n.neoncache[i].t==NEON_CACHE_YMMR || dyn->n.neoncache[i].t==NEON_CACHE_YMMW) {
// should only happens in step 0...
dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[i].n); // mark as purged
dyn->n.neoncache[i].v = 0; // reset it
}
dyn->n.fpuused[i] = 1;
dyn->n.neoncache[i].n = n;
dyn->n.neoncache[i].t = t;
dyn->n.news |= (1<<i);
return i; // return a Dx
}
// Free a FPU double reg
void fpu_free_reg(dynarec_arm_t* dyn, int reg)
{
// TODO: check upper limit?
dyn->n.fpuused[reg] = 0;
if(dyn->n.neoncache[reg].t==NEON_CACHE_YMMR || dyn->n.neoncache[reg].t==NEON_CACHE_YMMW) {
dyn->n.ymm_removed |= 1<<dyn->n.neoncache[reg].n;
if(dyn->n.neoncache[reg].t==NEON_CACHE_YMMW)
dyn->n.ymm_write |= 1<<dyn->n.neoncache[reg].n;
if(reg>SCRATCH0)
dyn->n.ymm_regs |= (8LL+reg-SCRATCH0)<<(dyn->n.neoncache[reg].n*4);
else
dyn->n.ymm_regs |= ((uint64_t)(reg-EMM0))<<(dyn->n.neoncache[reg].n*4);
}
if(dyn->n.neoncache[reg].t==NEON_CACHE_XMMR || dyn->n.neoncache[reg].t==NEON_CACHE_XMMW) {
dyn->n.xmm_removed |= 1<<dyn->n.neoncache[reg].n;
if(dyn->n.neoncache[reg].t==NEON_CACHE_XMMW)
dyn->n.xmm_write |= 1<<dyn->n.neoncache[reg].n;
}
if(dyn->n.neoncache[reg].t!=NEON_CACHE_ST_F && dyn->n.neoncache[reg].t!=NEON_CACHE_ST_D && dyn->n.neoncache[reg].t!=NEON_CACHE_ST_I64)
dyn->n.neoncache[reg].v = 0;
if(dyn->n.fpu_scratch && reg==SCRATCH0+dyn->n.fpu_scratch-1)
--dyn->n.fpu_scratch;
}
// Get an MMX double reg
int fpu_get_reg_emm(dynarec_arm_t* dyn, int ninst, int emm)
{
int ret = EMM0 + emm;
if(dyn->n.neoncache[ret].t==NEON_CACHE_YMMR || dyn->n.neoncache[ret].t==NEON_CACHE_YMMW) {
// should only happens in step 0...
dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[ret].n); // mark as purged
dyn->n.neoncache[ret].v = 0; // reset it
}
dyn->n.fpuused[ret] = 1;
dyn->n.neoncache[ret].t = NEON_CACHE_MM;
dyn->n.neoncache[ret].n = emm;
dyn->n.news |= (1<<(ret));
return ret;
}
// Get an XMM quad reg
int fpu_get_reg_xmm(dynarec_arm_t* dyn, int t, int xmm)
{
int i;
if(xmm>7) {
i = XMM8 + xmm - 8;
} else {
i = XMM0 + xmm;
}
dyn->n.fpuused[i] = 1;
dyn->n.neoncache[i].t = t;
dyn->n.neoncache[i].n = xmm;
dyn->n.news |= (1<<i);
return i;
}
int internal_mark_ymm(dynarec_arm_t* dyn, int t, int ymm, int reg)
{
if((dyn->n.neoncache[reg].t==NEON_CACHE_YMMR) || (dyn->n.neoncache[reg].t==NEON_CACHE_YMMW)) {
if(dyn->n.neoncache[reg].n == ymm) {
// already there!
if(t==NEON_CACHE_YMMW)
dyn->n.neoncache[reg].t=t;
return reg;
}
} else if(!dyn->n.neoncache[reg].v) {
// found a slot!
dyn->n.neoncache[reg].t=t;
dyn->n.neoncache[reg].n=ymm;
dyn->n.news |= (1<<reg);
return reg;
}
return -1;
}
int is_ymm_to_keep(dynarec_arm_t* dyn, int reg, int k1, int k2, int k3)
{
if((k1!=-1) && (dyn->n.neoncache[reg].n==k1))
return 1;
if((k2!=-1) && (dyn->n.neoncache[reg].n==k2))
return 1;
if((k3!=-1) && (dyn->n.neoncache[reg].n==k3))
return 1;
if((dyn->n.neoncache[reg].t==NEON_CACHE_YMMR || dyn->n.neoncache[reg].t==NEON_CACHE_YMMW) && (dyn->n.ymm_used&(1<<dyn->n.neoncache[reg].n)))
return 1;
return 0;
}
// Reset fpu regs counter
static void fpu_reset_reg_neoncache(neoncache_t* n)
{
n->fpu_reg = 0;
for (int i=0; i<32; ++i) {
n->fpuused[i]=0;
n->neoncache[i].v = 0;
}
n->ymm_regs = 0;
n->ymm_removed = 0;
n->ymm_used = 0;
n->ymm_write = 0;
n->xmm_removed = 0;
n->xmm_write = 0;
}
void fpu_reset_reg(dynarec_arm_t* dyn)
{
fpu_reset_reg_neoncache(&dyn->n);
}
int neoncache_no_i64(dynarec_arm_t* dyn, int ninst, int st, int a)
{
if(a==NEON_CACHE_ST_I64) {
neoncache_promote_double(dyn, ninst, st);
return NEON_CACHE_ST_D;
}
return a;
}
int neoncache_get_st(dynarec_arm_t* dyn, int ninst, int a)
{
if (dyn->insts[ninst].n.swapped) {
if(dyn->insts[ninst].n.combined1 == a)
a = dyn->insts[ninst].n.combined2;
else if(dyn->insts[ninst].n.combined2 == a)
a = dyn->insts[ninst].n.combined1;
}
for(int i=0; i<24; ++i)
if((dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_F
|| dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_D
|| dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_I64)
&& dyn->insts[ninst].n.neoncache[i].n==a)
return dyn->insts[ninst].n.neoncache[i].t;
// not in the cache yet, so will be fetched...
return NEON_CACHE_ST_D;
}
int neoncache_get_current_st(dynarec_arm_t* dyn, int ninst, int a)
{
(void)ninst;
if(!dyn->insts)
return NEON_CACHE_ST_D;
for(int i=0; i<24; ++i)
if((dyn->n.neoncache[i].t==NEON_CACHE_ST_F
|| dyn->n.neoncache[i].t==NEON_CACHE_ST_D
|| dyn->n.neoncache[i].t==NEON_CACHE_ST_I64)
&& dyn->n.neoncache[i].n==a)
return dyn->n.neoncache[i].t;
// not in the cache yet, so will be fetched...
return NEON_CACHE_ST_D;
}
int neoncache_get_st_f(dynarec_arm_t* dyn, int ninst, int a)
{
/*if(a+dyn->insts[ninst].n.stack_next-st<0)
// The STx has been pushed at the end of instructon, so stop going back
return -1;*/
for(int i=0; i<24; ++i)
if(dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_F
&& dyn->insts[ninst].n.neoncache[i].n==a)
return i;
return -1;
}
int neoncache_get_st_f_i64(dynarec_arm_t* dyn, int ninst, int a)
{
/*if(a+dyn->insts[ninst].n.stack_next-st<0)
// The STx has been pushed at the end of instructon, so stop going back
return -1;*/
for(int i=0; i<24; ++i)
if((dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_I64 || dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_F)
&& dyn->insts[ninst].n.neoncache[i].n==a)
return i;
return -1;
}
int neoncache_get_st_f_noback(dynarec_arm_t* dyn, int ninst, int a)
{
for(int i=0; i<24; ++i)
if(dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_F
&& dyn->insts[ninst].n.neoncache[i].n==a)
return i;
return -1;
}
int neoncache_get_st_f_i64_noback(dynarec_arm_t* dyn, int ninst, int a)
{
for(int i=0; i<24; ++i)
if((dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_I64 || dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_F)
&& dyn->insts[ninst].n.neoncache[i].n==a)
return i;
return -1;
}
int neoncache_get_current_st_f(dynarec_arm_t* dyn, int a)
{
for(int i=0; i<24; ++i)
if(dyn->n.neoncache[i].t==NEON_CACHE_ST_F
&& dyn->n.neoncache[i].n==a)
return i;
return -1;
}
int neoncache_get_current_st_f_i64(dynarec_arm_t* dyn, int a)
{
for(int i=0; i<24; ++i)
if((dyn->n.neoncache[i].t==NEON_CACHE_ST_I64 || dyn->n.neoncache[i].t==NEON_CACHE_ST_F)
&& dyn->n.neoncache[i].n==a)
return i;
return -1;
}
static void neoncache_promote_double_forward(dynarec_arm_t* dyn, int ninst, int maxinst, int a);
static void neoncache_promote_double_internal(dynarec_arm_t* dyn, int ninst, int maxinst, int a);
static void neoncache_promote_double_combined(dynarec_arm_t* dyn, int ninst, int maxinst, int a)
{
if(a == dyn->insts[ninst].n.combined1 || a == dyn->insts[ninst].n.combined2) {
if(a == dyn->insts[ninst].n.combined1) {
a = dyn->insts[ninst].n.combined2;
} else
a = dyn->insts[ninst].n.combined1;
int i = neoncache_get_st_f_i64_noback(dyn, ninst, a);
//if(dyn->need_dump) dynarec_log(LOG_NONE, "neoncache_promote_double_combined, ninst=%d combined%c %d i=%d (stack:%d/%d)\n", ninst, (a == dyn->insts[ninst].n.combined2)?'2':'1', a ,i, dyn->insts[ninst].n.stack_push, -dyn->insts[ninst].n.stack_pop);
if(i>=0) {
dyn->insts[ninst].n.neoncache[i].t = NEON_CACHE_ST_D;
if(dyn->insts[ninst].x87precision) dyn->need_x87check = 2;
if(!dyn->insts[ninst].n.barrier)
neoncache_promote_double_internal(dyn, ninst-1, maxinst, a-dyn->insts[ninst].n.stack_push);
// go forward is combined is not pop'd
if(a-dyn->insts[ninst].n.stack_pop>=0)
if(!((ninst+1<dyn->size) && dyn->insts[ninst+1].n.barrier))
neoncache_promote_double_forward(dyn, ninst+1, maxinst, a-dyn->insts[ninst].n.stack_pop);
}
}
}
static void neoncache_promote_double_internal(dynarec_arm_t* dyn, int ninst, int maxinst, int a)
{
while(ninst>=0) {
a+=dyn->insts[ninst].n.stack_pop; // adjust Stack depth: add pop'd ST (going backward)
int i = neoncache_get_st_f_i64(dyn, ninst, a);
//if(dyn->need_dump) dynarec_log(LOG_NONE, "neoncache_promote_double_internal, ninst=%d, a=%d st=%d:%d, i=%d\n", ninst, a, dyn->insts[ninst].n.stack, dyn->insts[ninst].n.stack_next, i);
if(i<0) return;
dyn->insts[ninst].n.neoncache[i].t = NEON_CACHE_ST_D;
if(dyn->insts[ninst].x87precision) dyn->need_x87check = 2;
// check combined propagation too
if(dyn->insts[ninst].n.combined1 || dyn->insts[ninst].n.combined2) {
if(dyn->insts[ninst].n.swapped) {
//if(dyn->need_dump) dynarec_log(LOG_NONE, "neoncache_promote_double_internal, ninst=%d swapped %d/%d vs %d with st %d\n", ninst, dyn->insts[ninst].n.combined1 ,dyn->insts[ninst].n.combined2, a, dyn->insts[ninst].n.stack);
if (a==dyn->insts[ninst].n.combined1)
a = dyn->insts[ninst].n.combined2;
else if (a==dyn->insts[ninst].n.combined2)
a = dyn->insts[ninst].n.combined1;
} else {
//if(dyn->need_dump) dynarec_log(LOG_NONE, "neoncache_promote_double_internal, ninst=%d combined %d/%d vs %d with st %d\n", ninst, dyn->insts[ninst].n.combined1 ,dyn->insts[ninst].n.combined2, a, dyn->insts[ninst].n.stack);
neoncache_promote_double_combined(dyn, ninst, maxinst, a);
}
}
a-=dyn->insts[ninst].n.stack_push; // // adjust Stack depth: remove push'd ST (going backward)
--ninst;
if(ninst<0 || a<0 || dyn->insts[ninst].n.barrier)
return;
}
}
static void neoncache_promote_double_forward(dynarec_arm_t* dyn, int ninst, int maxinst, int a)
{
while((ninst!=-1) && (ninst<maxinst) && (a>=0)) {
a+=dyn->insts[ninst].n.stack_push; // // adjust Stack depth: add push'd ST (going forward)
if((dyn->insts[ninst].n.combined1 || dyn->insts[ninst].n.combined2) && dyn->insts[ninst].n.swapped) {
//if(dyn->need_dump) dynarec_log(LOG_NONE, "neoncache_promote_double_forward, ninst=%d swapped %d/%d vs %d with st %d\n", ninst, dyn->insts[ninst].n.combined1 ,dyn->insts[ninst].n.combined2, a, dyn->insts[ninst].n.stack);
if (a==dyn->insts[ninst].n.combined1)
a = dyn->insts[ninst].n.combined2;
else if (a==dyn->insts[ninst].n.combined2)
a = dyn->insts[ninst].n.combined1;
}
int i = neoncache_get_st_f_i64_noback(dyn, ninst, a);
//if(dyn->need_dump) dynarec_log(LOG_NONE, "neoncache_promote_double_forward, ninst=%d, a=%d st=%d:%d(%d/%d), i=%d\n", ninst, a, dyn->insts[ninst].n.stack, dyn->insts[ninst].n.stack_next, dyn->insts[ninst].n.stack_push, -dyn->insts[ninst].n.stack_pop, i);
if(i<0) return;
dyn->insts[ninst].n.neoncache[i].t = NEON_CACHE_ST_D;
if(dyn->insts[ninst].x87precision) dyn->need_x87check = 2;
// check combined propagation too
if((dyn->insts[ninst].n.combined1 || dyn->insts[ninst].n.combined2) && !dyn->insts[ninst].n.swapped) {
//if(dyn->need_dump) dynarec_log(LOG_NONE, "neoncache_promote_double_forward, ninst=%d combined %d/%d vs %d with st %d\n", ninst, dyn->insts[ninst].n.combined1 ,dyn->insts[ninst].n.combined2, a, dyn->insts[ninst].n.stack);
neoncache_promote_double_combined(dyn, ninst, maxinst, a);
}
a-=dyn->insts[ninst].n.stack_pop; // adjust Stack depth: remove pop'd ST (going forward)
if(dyn->insts[ninst].x64.has_next && !dyn->insts[ninst].n.barrier)
++ninst;
else
ninst=-1;
}
if(ninst==maxinst)
neoncache_promote_double(dyn, ninst, a);
}
void neoncache_promote_double(dynarec_arm_t* dyn, int ninst, int a)
{
int i = neoncache_get_current_st_f_i64(dyn, a);
//if(dyn->need_dump) dynarec_log(LOG_NONE, "neoncache_promote_double, ninst=%d a=%d st=%d i=%d\n", ninst, a, dyn->n.stack, i);
if(i<0) return;
dyn->n.neoncache[i].t = NEON_CACHE_ST_D;
dyn->insts[ninst].n.neoncache[i].t = NEON_CACHE_ST_D;
if(dyn->insts[ninst].x87precision) dyn->need_x87check = 2;
// check combined propagation too
if(dyn->n.combined1 || dyn->n.combined2) {
if(dyn->n.swapped) {
//if(dyn->need_dump) dynarec_log(LOG_NONE, "neoncache_promote_double, ninst=%d swapped! %d/%d vs %d\n", ninst, dyn->n.combined1 ,dyn->n.combined2, a);
if(dyn->n.combined1 == a)
a = dyn->n.combined2;
else if(dyn->n.combined2 == a)
a = dyn->n.combined1;
} else {
//if(dyn->need_dump) dynarec_log(LOG_NONE, "neoncache_promote_double, ninst=%d combined! %d/%d vs %d\n", ninst, dyn->n.combined1 ,dyn->n.combined2, a);
if(dyn->n.combined1 == a)
neoncache_promote_double(dyn, ninst, dyn->n.combined2);
else if(dyn->n.combined2 == a)
neoncache_promote_double(dyn, ninst, dyn->n.combined1);
}
}
a-=dyn->insts[ninst].n.stack_push; // // adjust Stack depth: remove push'd ST (going backward)
if(!ninst || a<0) return;
neoncache_promote_double_internal(dyn, ninst-1, ninst, a);
}
int neoncache_combine_st(dynarec_arm_t* dyn, int ninst, int a, int b)
{
dyn->n.combined1=a;
dyn->n.combined2=b;
if( neoncache_get_current_st(dyn, ninst, a)==NEON_CACHE_ST_F
&& neoncache_get_current_st(dyn, ninst, b)==NEON_CACHE_ST_F )
return NEON_CACHE_ST_F;
// don't combine i64, it's only for load/store
/*if( neoncache_get_current_st(dyn, ninst, a)==NEON_CACHE_ST_I64
&& neoncache_get_current_st(dyn, ninst, b)==NEON_CACHE_ST_I64 )
return NEON_CACHE_ST_I64;*/
return NEON_CACHE_ST_D;
}
static int isCacheEmpty(dynarec_native_t* dyn, int ninst) {
if(dyn->insts[ninst].n.stack_next) {
return 0;
}
for(int i=0; i<24; ++i)
if(dyn->insts[ninst].n.neoncache[i].v) { // there is something at ninst for i
if(!(
(dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_F
|| dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_D
|| dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_I64)
&& dyn->insts[ninst].n.neoncache[i].n<dyn->insts[ninst].n.stack_pop))
return 0;
}
return 1;
}
int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) {
int i2 = dyn->insts[ninst].x64.jmp_insts;
if(i2<0)
return 1;
if((dyn->insts[i2].x64.barrier&BARRIER_FLOAT))
// if the barrier as already been apply, no transform needed
return ((dyn->insts[ninst].x64.barrier&BARRIER_FLOAT))?0:(isCacheEmpty(dyn, ninst)?0:1);
int ret = 0;
if(!i2) { // just purge
if(dyn->insts[ninst].n.stack_next)
return 1;
if(dyn->insts[ninst].ymm0_out)
return 1;
for(int i=0; i<32 && !ret; ++i)
if(dyn->insts[ninst].n.neoncache[i].v) { // there is something at ninst for i
if(!(
(dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_F
|| dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_D
|| dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_I64)
&& dyn->insts[ninst].n.neoncache[i].n<dyn->insts[ninst].n.stack_pop))
ret = 1;
}
return ret;
}
// Check if ninst can be compatible to i2
if(dyn->insts[ninst].n.stack_next != dyn->insts[i2].n.stack-dyn->insts[i2].n.stack_push) {
return 1;
}
if(dyn->insts[ninst].ymm0_out && (dyn->insts[ninst].ymm0_out&~dyn->insts[i2].ymm0_in))
return 1;
neoncache_t cache_i2 = dyn->insts[i2].n;
neoncacheUnwind(&cache_i2);
for(int i=0; i<32; ++i) {
if(dyn->insts[ninst].n.neoncache[i].v) { // there is something at ninst for i
if(!cache_i2.neoncache[i].v) { // but there is nothing at i2 for i
ret = 1;
} else if(dyn->insts[ninst].n.neoncache[i].v!=cache_i2.neoncache[i].v) { // there is something different
if(dyn->insts[ninst].n.neoncache[i].n!=cache_i2.neoncache[i].n) { // not the same x64 reg
ret = 1;
}
else if(dyn->insts[ninst].n.neoncache[i].t == NEON_CACHE_XMMR && cache_i2.neoncache[i].t == NEON_CACHE_XMMW)
{/* nothing */ }
else if(dyn->insts[ninst].n.neoncache[i].t == NEON_CACHE_YMMR && cache_i2.neoncache[i].t == NEON_CACHE_YMMW)
{/* nothing */ }
else
ret = 1;
}
} else if(cache_i2.neoncache[i].v)
ret = 1;
}
return ret;
}
void neoncacheUnwind(neoncache_t* cache)
{
if(cache->swapped) {
// unswap
int a = -1;
int b = -1;
// in neoncache
for(int j=0; j<24 && ((a==-1) || (b==-1)); ++j)
if((cache->neoncache[j].t == NEON_CACHE_ST_D || cache->neoncache[j].t == NEON_CACHE_ST_F || cache->neoncache[j].t == NEON_CACHE_ST_I64)) {
if(cache->neoncache[j].n == cache->combined1)
a = j;
else if(cache->neoncache[j].n == cache->combined2)
b = j;
}
if(a!=-1 && b!=-1) {
int tmp = cache->neoncache[a].n;
cache->neoncache[a].n = cache->neoncache[b].n;
cache->neoncache[b].n = tmp;
}
// done
cache->swapped = 0;
cache->combined1 = cache->combined2 = 0;
}
if(cache->news) {
// remove the newly created neoncache
for(int i=0; i<32; ++i)
if(cache->news&(1<<i))
cache->neoncache[i].v = 0;
cache->news = 0;
}
if(cache->stack_push) {
// unpush
for(int j=0; j<24; ++j) {
if((cache->neoncache[j].t == NEON_CACHE_ST_D || cache->neoncache[j].t == NEON_CACHE_ST_F || cache->neoncache[j].t == NEON_CACHE_ST_I64)) {
if(cache->neoncache[j].n<cache->stack_push)
cache->neoncache[j].v = 0;
else
cache->neoncache[j].n-=cache->stack_push;
}
}
cache->x87stack-=cache->stack_push;
cache->tags>>=(cache->stack_push*2);
cache->stack-=cache->stack_push;
if(cache->pushed>=cache->stack_push)
cache->pushed-=cache->stack_push;
else
cache->pushed = 0;
cache->stack_push = 0;
}
cache->x87stack+=cache->stack_pop;
cache->stack_next = cache->stack;
if(cache->stack_pop) {
if(cache->poped>=cache->stack_pop)
cache->poped-=cache->stack_pop;
else
cache->poped = 0;
cache->tags<<=(cache->stack_pop*2);
}
cache->stack_pop = 0;
cache->barrier = 0;
// And now, rebuild the x87cache info with neoncache
cache->mmxcount = 0;
cache->fpu_scratch = 0;
cache->fpu_reg = 0;
for(int i=0; i<8; ++i) {
cache->x87cache[i] = -1;
cache->mmxcache[i] = -1;
cache->x87reg[i] = 0;
cache->ssecache[i*2].v = -1;
cache->ssecache[i*2+1].v = -1;
}
int x87reg = 0;
for(int i=0; i<32; ++i) {
if(cache->neoncache[i].v) {
cache->fpuused[i] = 1;
switch (cache->neoncache[i].t) {
case NEON_CACHE_MM:
cache->mmxcache[cache->neoncache[i].n] = i;
++cache->mmxcount;
++cache->fpu_reg;
break;
case NEON_CACHE_XMMR:
case NEON_CACHE_XMMW:
cache->ssecache[cache->neoncache[i].n].reg = i;
cache->ssecache[cache->neoncache[i].n].write = (cache->neoncache[i].t==NEON_CACHE_XMMW)?1:0;
++cache->fpu_reg;
break;
case NEON_CACHE_YMMR:
case NEON_CACHE_YMMW:
cache->fpuused[i] = 0; // YMM does not mark the fpu reg as used
break;
case NEON_CACHE_ST_F:
case NEON_CACHE_ST_D:
case NEON_CACHE_ST_I64:
cache->x87cache[x87reg] = cache->neoncache[i].n;
cache->x87reg[x87reg] = i;
++x87reg;
++cache->fpu_reg;
break;
case NEON_CACHE_SCR:
cache->fpuused[i] = 0;
cache->neoncache[i].v = 0;
break;
}
} else {
cache->fpuused[i] = 0;
}
}
// add back removed XMM
if(cache->xmm_removed) {
for(int i=0; i<16; ++i)
if(cache->xmm_removed&(1<<i)) {
int reg = (i<8)?(XMM0+i):(XMM8+i-8);
cache->neoncache[reg].t = (cache->xmm_write&(1<<i))?NEON_CACHE_XMMW:NEON_CACHE_XMMR;
cache->neoncache[reg].n = i;
}
cache->xmm_write = cache->xmm_removed = 0;
}
// add back removed YMM
if(cache->ymm_removed) {
for(int i=0; i<16; ++i)
if(cache->ymm_removed&(1<<i)) {
int reg = cache->ymm_regs>>(i*4)&15;
if(reg>7)
reg = reg - 8 + SCRATCH0;
else
reg = reg + EMM0;
//if(cache->neoncache[reg].v) // this is normal when a ymm is purged to make space for another one
// printf_log(LOG_INFO, "Warning, recreating YMM%d on non empty slot %s", i, getCacheName(cache->neoncache[reg].t, cache->neoncache[reg].n));
cache->neoncache[reg].t = (cache->ymm_write&(1<<i))?NEON_CACHE_YMMW:NEON_CACHE_YMMR;
cache->neoncache[reg].n = i;
}
cache->ymm_regs = 0;
cache->ymm_write = cache->ymm_removed = 0;
}
cache->ymm_used = 0;
}
#define F8 *(uint8_t*)(addr++)
#define F32S64 (uint64_t)(int64_t)*(int32_t*)(addr+=4, addr-4)
// Get if ED will have the correct parity. Not emitting anything. Parity is 2 for DWORD or 3 for QWORD
int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity, int delta)
{
(void)dyn; (void)ninst;
uint32_t tested = (1<<parity)-1;
if((nextop&0xC0)==0xC0)
return 0; // direct register, no parity...
if(!(nextop&0xC0)) {
if((nextop&7)==4) {
uint8_t sib = F8;
int sib_reg = (sib>>3)&7;
if((sib&0x7)==5) {
uint64_t tmp = F32S64;
if (sib_reg!=4) {
// if XXXXXX+reg<<N then check parity of XXXXX and N should be enough
return ((tmp&tested)==0 && (sib>>6)>=parity)?1:0;
} else {
// just a constant...
return (tmp&tested)?0:1;
}
} else {
if(sib_reg==4 && parity<3)
return 0; // simple [reg]
// don't try [reg1 + reg2<<N], unless reg1 is ESP
return ((sib&0x7)==4 && (sib>>6)>=parity)?1:0;
}
} else if((nextop&7)==5) {
uint64_t tmp = F32S64;
tmp+=addr+delta;
return (tmp&tested)?0:1;
} else {
return 0;
}
} else {
return 0; //Form [reg1 + reg2<<N + XXXXXX]
}
}
#undef F8
#undef F32S64
const char* getCacheName(int t, int n)
{
static char buff[20];
switch(t) {
case NEON_CACHE_ST_D: sprintf(buff, "ST%d", n); break;
case NEON_CACHE_ST_F: sprintf(buff, "st%d", n); break;
case NEON_CACHE_ST_I64: sprintf(buff, "STi%d", n); break;
case NEON_CACHE_MM: sprintf(buff, "MM%d", n); break;
case NEON_CACHE_XMMW: sprintf(buff, "XMM%d", n); break;
case NEON_CACHE_XMMR: sprintf(buff, "xmm%d", n); break;
case NEON_CACHE_YMMW: sprintf(buff, "YMM%d", n); break;
case NEON_CACHE_YMMR: sprintf(buff, "ymm%d", n); break;
case NEON_CACHE_SCR: sprintf(buff, "Scratch"); break;
case NEON_CACHE_NONE: buff[0]='\0'; break;
}
return buff;
}
static register_mapping_t register_mappings[] = {
{ "rax", "x10" },
{ "eax", "w10" },
{ "ax", "x10" },
{ "ah", "x10" },
{ "al", "x10" },
{ "rcx", "x11" },
{ "ecx", "w11" },
{ "cx", "x11" },
{ "ch", "x11" },
{ "cl", "x11" },
{ "rdx", "x12" },
{ "edx", "w12" },
{ "dx", "x12" },
{ "dh", "x12" },
{ "dl", "x12" },
{ "rbx", "x13" },
{ "ebx", "w13" },
{ "bx", "x13" },
{ "bh", "x13" },
{ "bl", "x13" },
{ "rsi", "x14" },
{ "esi", "w14" },
{ "si", "x14" },
{ "sil", "x14" },
{ "rdi", "x15" },
{ "edi", "w15" },
{ "di", "x15" },
{ "dil", "x15" },
{ "rsp", "x16" },
{ "esp", "w16" },
{ "sp", "x16" },
{ "spl", "x16" },
{ "rbp", "x17" },
{ "ebp", "w17" },
{ "bp", "x17" },
{ "bpl", "x17" },
{ "r8", "x18" },
{ "r8d", "w18" },
{ "r8w", "x18" },
{ "r8b", "x18" },
{ "r9", "x19" },
{ "r9d", "w19" },
{ "r9w", "x19" },
{ "r9b", "x19" },
{ "r10", "x20" },
{ "r10d", "w20" },
{ "r10w", "x20" },
{ "r10b", "x20" },
{ "r11", "x21" },
{ "r11d", "w21" },
{ "r11w", "x21" },
{ "r11b", "x21" },
{ "r12", "x22" },
{ "r12d", "w22" },
{ "r12w", "x22" },
{ "r12b", "x22" },
{ "r13", "x23" },
{ "r13d", "w23" },
{ "r13w", "x23" },
{ "r13b", "x23" },
{ "r14", "x24" },
{ "r14d", "w24" },
{ "r14w", "x24" },
{ "r14b", "x24" },
{ "r15", "x25" },
{ "r15d", "w25" },
{ "r15w", "x25" },
{ "r15b", "x25" },
{ "rip", "x27" },
};
void printf_x64_instruction(dynarec_native_t* dyn, zydis_dec_t* dec, instruction_x64_t* inst, const char* name);
void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t rex)
{
if (!dyn->need_dump && !BOX64ENV(dynarec_gdbjit) && !BOX64ENV(dynarec_perf_map)) return;
static char buf[256];
int length = sprintf(buf, "barrier=%d state=%d/%d/%d(%d:%d->%d:%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d(%d/%d)",
dyn->insts[ninst].x64.barrier,
dyn->insts[ninst].x64.state_flags,
dyn->f.pending,
dyn->f.dfnone,
dyn->insts[ninst].f_entry.pending,
dyn->insts[ninst].f_entry.dfnone,
dyn->insts[ninst].f_exit.pending,
dyn->insts[ninst].f_exit.dfnone,
dyn->insts[ninst].x64.may_set ? "may" : "set",
dyn->insts[ninst].x64.set_flags,
dyn->insts[ninst].x64.gen_flags,
dyn->insts[ninst].x64.use_flags,
dyn->insts[ninst].x64.need_before,
dyn->insts[ninst].x64.need_after,
dyn->smwrite, dyn->insts[ninst].will_write, dyn->insts[ninst].last_write);
if (dyn->insts[ninst].nat_flags_op) {
if (dyn->insts[ninst].nat_flags_op == NAT_FLAG_OP_TOUCH && dyn->insts[ninst].before_nat_flags)
length += sprintf(buf + length, " NF:%d/read:%x", dyn->insts[ninst].nat_flags_op, dyn->insts[ninst].before_nat_flags);
else
length += sprintf(buf + length, " NF:%d", dyn->insts[ninst].nat_flags_op);
}
if (dyn->insts[ninst].use_nat_flags || dyn->insts[ninst].set_nat_flags || dyn->insts[ninst].need_nat_flags) {
length += sprintf(buf + length, " nf:%hhx/%hhx/%hhx", dyn->insts[ninst].set_nat_flags, dyn->insts[ninst].use_nat_flags, dyn->insts[ninst].need_nat_flags);
}
if (dyn->insts[ninst].invert_carry)
length += sprintf(buf + length, " CI");
if (dyn->insts[ninst].gen_inverted_carry)
length += sprintf(buf + length, " gic");
if (dyn->insts[ninst].before_nat_flags & NF_CF) {
length += sprintf(buf + length, " %ccb", dyn->insts[ninst].normal_carry_before ? 'n' : 'i');
}
if (dyn->insts[ninst].need_nat_flags & NF_CF) {
length += sprintf(buf + length, " %cc", dyn->insts[ninst].normal_carry ? 'n' : 'i');
}
if (dyn->insts[ninst].pred_sz) {
length += sprintf(buf + length, ", pred=");
for (int ii = 0; ii < dyn->insts[ninst].pred_sz; ++ii)
length += sprintf(buf + length, "%s%d", ii ? "/" : "", dyn->insts[ninst].pred[ii]);
}
if (!dyn->insts[ninst].x64.alive)
length += sprintf(buf + length, "not executed");
if (dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts >= 0) {
length += sprintf(buf + length, ", jmp=%d", dyn->insts[ninst].x64.jmp_insts);
}
if (dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts == -1)
length += sprintf(buf + length, ", jmp=out");
if (dyn->insts[ninst].x64.has_callret)
length += sprintf(buf + length, ", callret");
if (dyn->last_ip) {
length += sprintf(buf + length, ", last_ip=%p", (void*)dyn->last_ip);
}
for (int ii = 0; ii < 32; ++ii) {
switch (dyn->insts[ninst].n.neoncache[ii].t) {
case NEON_CACHE_ST_D: length += sprintf(buf + length, " D%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
case NEON_CACHE_ST_F: length += sprintf(buf + length, " S%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
case NEON_CACHE_ST_I64: length += sprintf(buf + length, " D%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
case NEON_CACHE_MM: length += sprintf(buf + length, " D%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
case NEON_CACHE_XMMW: length += sprintf(buf + length, " Q%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
case NEON_CACHE_XMMR: length += sprintf(buf + length, " Q%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
case NEON_CACHE_YMMW: length += sprintf(buf + length, " Q%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
case NEON_CACHE_YMMR: length += sprintf(buf + length, " Q%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
// case NEON_CACHE_SCR: length += sprintf(buf + length, " D%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
case NEON_CACHE_NONE:
default: break;
}
}
if (memcmp(dyn->insts[ninst].n.neoncache, dyn->n.neoncache, sizeof(dyn->n.neoncache))) {
length += sprintf(buf + length, " %s(Change:", (dyn->need_dump > 1) ? "\e[1;91m" : "");
for (int ii = 0; ii < 32; ++ii)
if (dyn->insts[ninst].n.neoncache[ii].v != dyn->n.neoncache[ii].v) {
length += sprintf(buf + length, " V%d:%s", ii, getCacheName(dyn->n.neoncache[ii].t, dyn->n.neoncache[ii].n));
length += sprintf(buf + length, "->%s", getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n));
}
length += sprintf(buf + length, ")%s", (dyn->need_dump > 1) ? "\e[0;32m" : "");
}
if (dyn->insts[ninst].n.ymm_used) {
length += sprintf(buf + length, " ymmUsed=%04x", dyn->insts[ninst].n.ymm_used);
}
if (dyn->ymm_zero || dyn->insts[ninst].ymm0_add || dyn->insts[ninst].ymm0_sub || dyn->insts[ninst].ymm0_out) {
length += sprintf(buf + length, " ymm0=(%04x/%04x+%04x-%04x=%04x)", dyn->ymm_zero, dyn->insts[ninst].ymm0_in, dyn->insts[ninst].ymm0_add, dyn->insts[ninst].ymm0_sub, dyn->insts[ninst].ymm0_out);
}
if (dyn->insts[ninst].purge_ymm) {
length += sprintf(buf + length, " purgeYmm=%04x", dyn->insts[ninst].purge_ymm);
}
if (dyn->n.stack || dyn->insts[ninst].n.stack_next || dyn->insts[ninst].n.x87stack) {
length += sprintf(buf + length, " X87:%d/%d(+%d/-%d)%d", dyn->n.stack, dyn->insts[ninst].n.stack_next, dyn->insts[ninst].n.stack_push, dyn->insts[ninst].n.stack_pop, dyn->insts[ninst].n.x87stack);
}
if (dyn->insts[ninst].n.combined1 || dyn->insts[ninst].n.combined2) {
length += sprintf(buf + length, " %s:%d/%d", dyn->insts[ninst].n.swapped ? "SWP" : "CMB", dyn->insts[ninst].n.combined1, dyn->insts[ninst].n.combined2);
}
if (dyn->need_dump) {
printf_x64_instruction(dyn, rex.is32bits ? my_context->dec32 : my_context->dec, &dyn->insts[ninst].x64, name);
dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, %s%s\n",
(dyn->need_dump > 1) ? "\e[32m" : "",
(void*)(dyn->native_start + dyn->insts[ninst].address), dyn->insts[ninst].size / 4, ninst, buf, (dyn->need_dump > 1) ? "\e[m" : "");
}
if (BOX64ENV(dynarec_gdbjit)) {
static char buf2[512];
if (BOX64ENV(dynarec_gdbjit) > 1) {
sprintf(buf2, "; %d: %d opcodes, %s", ninst, dyn->insts[ninst].size / 4, buf);
dyn->gdbjit_block = GdbJITBlockAddLine(dyn->gdbjit_block, (dyn->native_start + dyn->insts[ninst].address), buf2);
}
zydis_dec_t* dec = rex.is32bits ? my_context->dec32 : my_context->dec;
const char* inst_name = name;
if (dec) {
inst_name = DecodeX64Trace(dec, dyn->insts[ninst].x64.addr, 0);
x64disas_add_register_mapping_annotations(buf2, inst_name, register_mappings, sizeof(register_mappings) / sizeof(register_mappings[0]));
inst_name = buf2;
}
dyn->gdbjit_block = GdbJITBlockAddLine(dyn->gdbjit_block, (dyn->native_start + dyn->insts[ninst].address), inst_name);
}
if (BOX64ENV(dynarec_perf_map) && BOX64ENV(dynarec_perf_map_fd) != -1) {
writePerfMap(dyn->insts[ninst].x64.addr, dyn->native_start + dyn->insts[ninst].address, dyn->insts[ninst].size / 4, name);
}
}
void print_opcode(dynarec_native_t* dyn, int ninst, uint32_t opcode)
{
dynarec_log_prefix(0, LOG_NONE, "\t%08x\t%s\n", opcode, arm64_print(opcode, (uintptr_t)dyn->block));
}
static void x87_reset(neoncache_t* n)
{
for (int i=0; i<8; ++i)
n->x87cache[i] = -1;
n->tags = 0;
n->x87stack = 0;
n->stack = 0;
n->stack_next = 0;
n->stack_pop = 0;
n->stack_push = 0;
n->combined1 = n->combined2 = 0;
n->swapped = 0;
n->barrier = 0;
n->pushed = 0;
n->poped = 0;
for(int i=0; i<24; ++i)
if(n->neoncache[i].t == NEON_CACHE_ST_F
|| n->neoncache[i].t == NEON_CACHE_ST_D
|| n->neoncache[i].t == NEON_CACHE_ST_I64)
n->neoncache[i].v = 0;
}
static void mmx_reset(neoncache_t* n)
{
n->mmxcount = 0;
for (int i=0; i<8; ++i)
n->mmxcache[i] = -1;
}
static void sse_reset(neoncache_t* n)
{
for (int i=0; i<16; ++i)
n->ssecache[i].v = -1;
for (int i=0; i<32; ++i)
if(n->neoncache[i].t==NEON_CACHE_YMMR || n->neoncache[i].t==NEON_CACHE_YMMW)
n->neoncache[i].v = 0;
}
void fpu_reset(dynarec_native_t* dyn)
{
x87_reset(&dyn->n);
mmx_reset(&dyn->n);
sse_reset(&dyn->n);
fpu_reset_reg(dyn);
dyn->ymm_zero = 0;
}
void fpu_reset_ninst(dynarec_native_t* dyn, int ninst)
{
x87_reset(&dyn->insts[ninst].n);
mmx_reset(&dyn->insts[ninst].n);
sse_reset(&dyn->insts[ninst].n);
fpu_reset_reg_neoncache(&dyn->insts[ninst].n);
}
int fpu_is_st_freed(dynarec_native_t* dyn, int ninst, int st)
{
return (dyn->n.tags&(0b11<<(st*2)))?1:0;
}
uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag, int before)
{
if(dyn->insts[ninst].x64.set_flags && !before) {
dyn->insts[ninst].set_nat_flags |= flag;
//if(dyn->insts[ninst].x64.use_flags) {
// dyn->insts[ninst].use_nat_flags |= flag;
//}
} else {
if(before)
dyn->insts[ninst].use_nat_flags_before |= flag;
else
dyn->insts[ninst].use_nat_flags |= flag;
}
return flag;
}
uint8_t flag2native(uint8_t flags)
{
uint8_t ret = 0;
#ifdef ARM64
if(flags&X_ZF) ret|=NF_EQ;
if(flags&X_SF) ret|=NF_SF;
if(flags&X_OF) ret|=NF_VF;
if(flags&X_CF) ret|=NF_CF;
#else
// no native flags on rv64 or la64
#endif
return ret;
}
int flagIsNative(uint8_t flags)
{
if(flags&(X_AF|X_PF)) return 0;
return 1;
}
static uint8_t getNativeFlagsUsed(dynarec_arm_t* dyn, int start, uint8_t flags)
{
// propagate and check wich flags are actually used
uint8_t used_flags = 0;
int nat_flags_used = 0;
int ninst = start;
while(ninst<dyn->size) {
//printf_log(LOG_INFO, "getNativeFlagsUsed ninst:%d/%d, flags=%x, used_flags=%x(%d), nat_flags_op_before:%x, nat_flags_op:%x, need_after:%x set_nat_flags:%x nat_flags_used:%x(%x)\n", ninst, start, flags, used_flags, nat_flags_used, dyn->insts[ninst].nat_flags_op_before, dyn->insts[ninst].nat_flags_op, flag2native(dyn->insts[ninst].x64.need_after),dyn->insts[ninst].set_nat_flags, dyn->insts[ninst].use_nat_flags, dyn->insts[ninst].use_nat_flags_before);
// check if this is an opcode that generate flags but consume flags before
if(dyn->insts[ninst].nat_flags_op_before)
return 0;
// check if nat flags are used "before"
if(dyn->insts[ninst].use_nat_flags_before) {
// check if the gen flags are compatible
if(dyn->insts[ninst].use_nat_flags_before&~flags)
return 0;
nat_flags_used = 1;
used_flags|=dyn->insts[ninst].use_nat_flags_before&flags;
}
// if the opcode generate flags, return
if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && (start!=ninst)) {
if(!nat_flags_used)
return 0;
if(used_flags&~dyn->insts[ninst].set_nat_flags) {
// check partial changes that would destroy flag state
if(dyn->insts[ninst].use_nat_flags_before&flags)
return used_flags;
// check if flags are all refreshed, then it's ok
if((used_flags&dyn->insts[ninst].set_nat_flags)==used_flags)
return used_flags;
// incompatible
return 0;
}
return used_flags;
}
// check if there is a callret barrier
if(dyn->insts[ninst].x64.has_callret)
return 0;
// check if nat flags are used
if(dyn->insts[ninst].use_nat_flags) {
// check if the gen flags are compatible
if(dyn->insts[ninst].use_nat_flags&~flags)
return 0;
nat_flags_used = 1;
used_flags |= dyn->insts[ninst].use_nat_flags&flags;
}
if(ninst!=start && dyn->insts[ninst].x64.use_flags) {
// some flags not compatible with native, partial use not allowed
if(flag2native(dyn->insts[ninst].x64.use_flags)!=dyn->insts[ninst].use_nat_flags)
return 0;
// check if flags are used, but not the natives ones
//if(dyn->insts[ninst].use_nat_flags&~used_flags)
// return 0;
}
// check if flags are generated without native option
if((start!=ninst) && dyn->insts[ninst].x64.gen_flags && (flag2native(dyn->insts[ninst].x64.gen_flags&dyn->insts[ninst].x64.need_after)&used_flags)) {
if(used_flags&~flag2native(dyn->insts[ninst].x64.gen_flags&dyn->insts[ninst].x64.need_after))
return 0; // partial covert, not supported for now (TODO: this might be fixable)
else
return nat_flags_used?used_flags:0; // full covert... End of propagation
}
// check if flags are still needed
if(!(flag2native(dyn->insts[ninst].x64.need_after)&flags))
return nat_flags_used?used_flags:0;
// check if flags are destroyed, cancel the use then
if(dyn->insts[ninst].nat_flags_op && (start!=ninst))
return 0;
// update used flags
//used_flags |= (flag2native(dyn->insts[ninst].x64.need_after)&flags);
// go next
if(!dyn->insts[ninst].x64.has_next) {
// check if it's a jump to an opcode with only 1 preds, then just follow the jump
int jmp = dyn->insts[ninst].x64.jmp_insts;
if(dyn->insts[ninst].x64.jmp && (jmp!=-1) && (getNominalPred(dyn, jmp)==ninst))
ninst = jmp;
else
return nat_flags_used?used_flags:0;
} else
++ninst;
}
return nat_flags_used?used_flags:0;
}
static void propagateNativeFlags(dynarec_arm_t* dyn, int start)
{
int ninst = start;
// those are the flags generated by the opcode and used later on
uint8_t flags = dyn->insts[ninst].set_nat_flags&flag2native(dyn->insts[ninst].x64.need_after);
//check if they are actualy used before starting
//printf_log(LOG_INFO, "propagateNativeFlags called for start=%d, flags=%x, will need:%x\n", start, flags, flag2native(dyn->insts[ninst].x64.need_after));
if(!flags) return;
// also check if some native flags are used but not genereated here
if(flag2native(dyn->insts[ninst].x64.use_flags)&~flags) return;
uint8_t used_flags = getNativeFlagsUsed(dyn, start, flags);
//printf_log(LOG_INFO, " will use:%x, carry:%d, generate inverted carry:%d\n", used_flags, used_flags&NF_CF, dyn->insts[ninst].gen_inverted_carry);
if(!used_flags) return; // the flags wont be used, so just cancel
int nc = dyn->insts[ninst].gen_inverted_carry?0:1;
int carry = used_flags&NF_CF;
// propagate
while(ninst<dyn->size) {
// check if this is an opcode that generate flags but consume flags before
if((start!=ninst) && dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH) {
if(dyn->insts[ninst].use_nat_flags_before) {
dyn->insts[ninst].before_nat_flags |= used_flags;
if(carry) dyn->insts[ninst].normal_carry_before = nc;
}
// if the opcode generate flags, return
return;
}
// check if flags are generated without native option
if((start!=ninst) && dyn->insts[ninst].x64.gen_flags && (flag2native(dyn->insts[ninst].x64.gen_flags&dyn->insts[ninst].x64.need_after)&used_flags))
return;
// mark the opcode
uint8_t use_flags = flag2native(dyn->insts[ninst].x64.need_before|dyn->insts[ninst].x64.need_after);
if(dyn->insts[ninst].x64.use_flags) use_flags |= flag2native(dyn->insts[ninst].x64.use_flags); // should not change anything
//printf_log(LOG_INFO, " marking ninst=%d with %x | %x&%x => %x\n", ninst, dyn->insts[ninst].need_nat_flags, used_flags, use_flags, dyn->insts[ninst].need_nat_flags | (used_flags&use_flags));
dyn->insts[ninst].need_nat_flags |= used_flags&use_flags;
if(carry) dyn->insts[ninst].normal_carry = nc;
if(carry && dyn->insts[ninst].invert_carry) nc = 0;
// check if flags are still needed
if(!(flag2native(dyn->insts[ninst].x64.need_after)&used_flags))
return;
// go next
if(!dyn->insts[ninst].x64.has_next) {
// check if it's a jump to an opcode with only 1 preds, then just follow the jump
int jmp = dyn->insts[ninst].x64.jmp_insts;
if(dyn->insts[ninst].x64.jmp && (jmp!=-1) && (getNominalPred(dyn, jmp)==ninst))
ninst = jmp;
else
return;
} else
++ninst;
}
}
void updateNativeFlags(dynarec_native_t* dyn)
{
if(!BOX64ENV(dynarec_nativeflags))
return;
// forward check if native flags are used
for(int ninst=0; ninst<dyn->size; ++ninst)
if(flag2native(dyn->insts[ninst].x64.gen_flags) && (dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)) {
propagateNativeFlags(dyn, ninst);
}
}
void rasNativeState(dynarec_arm_t* dyn, int ninst)
{
dyn->insts[ninst].nat_flags_op = dyn->insts[ninst].set_nat_flags = dyn->insts[ninst].use_nat_flags = dyn->insts[ninst].need_nat_flags = 0;
}
int nativeFlagsNeedsTransform(dynarec_arm_t* dyn, int ninst)
{
int jmp = dyn->insts[ninst].x64.jmp_insts;
if(jmp<0)
return 0;
if(!dyn->insts[ninst].x64.need_after || !dyn->insts[jmp].x64.need_before)
return 0;
if(dyn->insts[ninst].set_nat_flags)
return 0;
uint8_t flags_before = dyn->insts[ninst].need_nat_flags;
uint8_t nc_before = dyn->insts[ninst].normal_carry;
if(dyn->insts[ninst].invert_carry)
nc_before = 0;
uint8_t flags_after = dyn->insts[jmp].need_nat_flags;
uint8_t nc_after = dyn->insts[jmp].normal_carry;
if(dyn->insts[jmp].nat_flags_op==NAT_FLAG_OP_TOUCH) {
flags_after = dyn->insts[jmp].before_nat_flags;
nc_after = dyn->insts[jmp].normal_carry_before;
}
uint8_t flags_x86 = flag2native(dyn->insts[jmp].x64.need_before);
flags_x86 &= ~flags_after;
if((flags_before&NF_CF) && (flags_after&NF_CF) && (nc_before!=nc_after))
return 1;
// all flags_after should be present and none remaining flags_x86
if(((flags_before&flags_after)!=flags_after) || (flags_before&flags_x86))
return 1;
return 0;
}
void fpu_save_and_unwind(dynarec_arm_t* dyn, int ninst, neoncache_t* cache)
{
memcpy(cache, &dyn->insts[ninst].n, sizeof(neoncache_t));
neoncacheUnwind(&dyn->insts[ninst].n);
}
void fpu_unwind_restore(dynarec_arm_t* dyn, int ninst, neoncache_t* cache)
{
memcpy(&dyn->insts[ninst].n, cache, sizeof(neoncache_t));
}