[RV64_DYNAREC] Added codegen for unaligned stores (#2289)

* [RV64_DYNAREC] Added another special SIGBUS case

* [RV64_DYNAREC] Added codegen for unaligned stores
This commit is contained in:
xctan 2025-01-24 15:10:45 +08:00 committed by GitHub
parent 7099774a3c
commit 8c1ffca530
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 357 additions and 22 deletions

View File

@ -961,6 +961,7 @@ if(RV64_DYNAREC)
${DYNAREC_SRC}
"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_functions.c"
"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_arch.c"
"${BOX64_ROOT}/src/dynarec/rv64/rv64_printer.c"
"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_jmpnext.c"
"${BOX64_ROOT}/src/rv64detect.c"

View File

@ -74,6 +74,7 @@
#include "rv64/rv64_printer.h"
#include "rv64/dynarec_rv64_private.h"
#include "rv64/dynarec_rv64_functions.h"
#include "rv64/dynarec_rv64_arch.h"
// Limit here is unconditionnal jump, that is signed 21bits
#define MAXBLOCK_SIZE ((1<<20)-200)
@ -81,11 +82,11 @@
#define UPDATE_SPECIFICS(A)
#define PREUPDATE_SPECIFICS(A) updateNativeFlags(A)
#define ARCH_SIZE(A) 0
#define ARCH_FILL(A, B) {}
#define ARCH_SIZE(A) get_size_arch(A)
#define ARCH_FILL(A, B) populate_arch(A, B)
#define ARCH_ADJUST(A, B, C, D) {}
#define STOP_NATIVE_FLAGS(A, B) {}
#define ARCH_UNALIGNED(A, B) 0
#define ARCH_UNALIGNED(A, B) arch_unaligned(A, B)
#else
#error Unsupported platform
#endif

View File

@ -404,8 +404,20 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
if (MODREG) { // reg <= reg
MVxw(TO_NAT((nextop & 7) + (rex.b << 3)), gd);
} else { // mem <= reg
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0);
SDxw(gd, ed, fixedaddress);
IF_UNALIGNED(ip) {
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, (1 << (2 + rex.w)) - 1, 0);
for (int i = 0; i < (1 << (2 + rex.w)); i++) {
if (i == 0) {
SB(gd, ed, fixedaddress);
} else {
SRLI(x3, gd, i * 8);
SB(x3, ed, fixedaddress + i);
}
}
} else {
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0);
SDxw(gd, ed, fixedaddress);
}
SMWRITELOCK(lock);
}
break;

View File

@ -392,14 +392,32 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
ed = TO_NAT((nextop & 7) + (rex.b << 3));
MOV64xw(ed, i64);
} else { // mem <= i32
addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, 1, 4);
i64 = F32S;
if (i64) {
MOV64x(x3, i64);
ed = x3;
} else
ed = xZR;
SDxw(ed, wback, fixedaddress);
IF_UNALIGNED(ip) {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, (1 << (2 + rex.w)) - 1, 4);
i64 = F32S;
if (i64) {
MOV64x(x4, i64);
ed = x4;
} else
ed = xZR;
for (int i = 0; i < (1 << (2 + rex.w)); i++) {
if (i == 0 || ed == xZR) {
SB(ed, wback, fixedaddress + i);
} else {
SRLI(x3, ed, i * 8);
SB(x3, wback, fixedaddress + i);
}
}
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, 1, 4);
i64 = F32S;
if (i64) {
MOV64x(x3, i64);
ed = x3;
} else
ed = xZR;
SDxw(ed, wback, fixedaddress);
}
SMWRITELOCK(lock);
}
break;

View File

@ -180,11 +180,33 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
INST_NAME("MOVUPS Ex,Gx");
nextop = F8;
GETGX();
GETEX(x2, 0, 8);
LD(x3, gback, gdoffset + 0);
LD(x4, gback, gdoffset + 8);
SD(x3, wback, fixedaddress + 0);
SD(x4, wback, fixedaddress + 8);
IF_UNALIGNED(ip) {
GETEX(x2, 0, 15);
LD(x3, gback, gdoffset + 0);
LD(x4, gback, gdoffset + 8);
for (int i = 0; i < 8; i++) {
if (i == 0) {
SB(x3, wback, fixedaddress);
} else {
SRLI(x5, x3, i * 8);
SB(x5, wback, fixedaddress + i);
}
}
for (int i = 0; i < 8; i++) {
if (i == 0) {
SB(x4, wback, fixedaddress + 8);
} else {
SRLI(x5, x4, i * 8);
SB(x5, wback, fixedaddress + i + 8);
}
}
} else {
GETEX(x2, 0, 8);
LD(x3, gback, gdoffset + 0);
LD(x4, gback, gdoffset + 8);
SD(x3, wback, fixedaddress + 0);
SD(x4, wback, fixedaddress + 8);
}
if (!MODREG)
SMWRITE2();
break;

View File

@ -0,0 +1,231 @@
#include <stddef.h>
#include <stdio.h>
#include <signal.h>
#include <ucontext.h>
#include <string.h>
#include "debug.h"
#include "dynablock.h"
#include "x64emu.h"
#include "emu/x64emu_private.h"
#include "x64run.h"
#include "emu/x64run_private.h"
#include "dynarec/dynablock_private.h"
#include "dynarec_rv64_arch.h"
#include "dynarec_rv64_functions.h"
#include "dynarec_rv64_private.h"
//order might be important, so define SUPER for the right one
#define SUPER() \
GO(flags) \
GO(x87) \
GO(mmx) \
GO(sse) \
GO(ymm) \
typedef struct arch_flags_s
{
uint8_t ignore:1;
} arch_flags_t;
#define X87_ST_D 0
#define X87_ST_F 1
#define X87_ST_I64 2
#define XMM0 0
#define X870 XMM0 + 16
#define EMM0 XMM0 + 16
typedef struct arch_x87_s
{
int8_t delta; //up to +/-7
uint8_t x87; // 1bit is STx present
uint16_t x87_type; // 2bits per STx type
uint32_t x87_pos; //4bits per STx position (well, 3 would be enough)
} arch_x87_t;
typedef struct arch_mmx_s
{
uint8_t mmx; //1bit for each mmx reg present
} arch_mmx_t;
typedef struct arch_sse_s
{
uint16_t sse; //1bit for each sse reg present
} arch_sse_t;
typedef struct arch_ymm_s
{
uint16_t ymm0; // 1bit for ymm0
uint16_t ymm; // 1bit for each ymm present
uint64_t ymm_pos; // 4bits for position of each ymm present
} arch_ymm_t;
typedef struct arch_arch_s
{
#define GO(A) uint16_t A:1;
SUPER()
#undef GO
uint16_t unaligned:1;
uint16_t seq:10; // how many instruction on the same values
} arch_arch_t;
typedef struct arch_build_s
{
#define GO(A) uint8_t A:1;
SUPER()
#undef GO
uint8_t unaligned;
#define GO(A) arch_##A##_t A##_;
SUPER()
#undef GO
} arch_build_t;
static int arch_build(dynarec_rv64_t* dyn, int ninst, arch_build_t* arch)
{
memset(arch, 0, sizeof(arch_build_t));
// todo
// opcode can handle unaligned
arch->unaligned = dyn->insts[ninst].unaligned;
return arch->flags + arch->x87 + arch->mmx + arch->sse + arch->ymm + arch->unaligned;
}
size_t get_size_arch(dynarec_rv64_t* dyn)
{
arch_build_t build = {0};
arch_build_t previous = {0};
size_t sz = 0;
int seq = 0;
int nseq = 0;
int last = 0;
if(!dyn->size) return 0;
for(int i=0; i<dyn->size; ++i) {
last = arch_build(dyn, i, &build);
if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<10)-1)) && i) {
// same sequence, increment
++seq;
} else {
seq = 0;
++nseq;
memcpy(&previous, &build, sizeof(arch_build_t));
sz+=sizeof(arch_arch_t);
#define GO(A) if(build.A) sz+=sizeof(arch_##A##_t);
SUPER()
#undef GO
}
}
if(nseq==1 && !last)
return 0; //empty, no flags, no nothing
return sz;
}
static void build_next(arch_arch_t* arch, arch_build_t* build)
{
#define GO(A) arch->A = build->A;
SUPER()
#undef GO
arch->unaligned = build->unaligned;
arch->seq = 0;
void* p = ((void*)arch)+sizeof(arch_arch_t);
#define GO(A) \
if(arch->A) { \
memcpy(p, &build->A##_, sizeof(arch_ ##A##_t)); \
p+=sizeof(arch_##A##_t); \
}
SUPER()
#undef GO
}
static int sizeof_arch(arch_arch_t* arch)
{
int sz = sizeof(arch_arch_t);
#define GO(A) if(arch->A) sz+=sizeof(arch_##A##_t);
SUPER()
#undef GO
return sz;
}
void populate_arch(dynarec_rv64_t* dyn, void* p)
{
arch_build_t build = {0};
arch_build_t previous = {0};
arch_arch_t* arch = p;
arch_arch_t* next = p;
int seq = 0;
for(int i=0; i<dyn->size; ++i) {
arch_build(dyn, i, &build);
if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<10)-1)) && i) {
// same sequence, increment
seq++;
arch->seq = seq;
} else {
arch = next;
build_next(arch, &build);
seq = 0;
memcpy(&previous, &build, sizeof(arch_build_t));
int sz = sizeof_arch(arch);
next = (arch_arch_t*)((uintptr_t)arch+sz);
}
}
}
int getX64AddressInst(dynablock_t* db, uintptr_t x64pc); // define is signal.c
void adjust_arch(dynablock_t* db, x64emu_t* emu, ucontext_t* p, uintptr_t x64pc)
{
if(!db->arch_size || !db->arch)
return;
int ninst = getX64AddressInst(db, x64pc);
dynarec_log(LOG_INFO, "adjust_arch(...), db=%p, x64pc=%p, nints=%d", db, (void*)x64pc, ninst);
if(ninst<0) {
dynarec_log(LOG_INFO, "\n");
return;
}
if(ninst==0) {
dynarec_log(LOG_INFO, "\n");
CHECK_FLAGS(emu);
return;
}
// look for state at ninst-1
arch_arch_t* arch = db->arch;
arch_arch_t* next = arch;
#define GO(A) arch_##A##_t* A = NULL;
SUPER()
#undef GO
int i = 0;
while(i<ninst-1) {
arch = next;
i += 1+arch->seq;
dynarec_log(LOG_INFO, "[ seq=%d%s%s%s%s%s ] ", arch->seq, arch->flags?" Flags":"", arch->x87?" x87":"", arch->mmx?" MMX":"", arch->sse?" SSE":"", arch->ymm?" YMM":"");
next = (arch_arch_t*)((uintptr_t)next + sizeof_arch(arch));
}
int sz = sizeof(arch_arch_t);
#define GO(A) \
if(arch->A) { \
A = (arch_##A##_t*)((uintptr_t)arch + sz); \
sz+=sizeof(arch_##A##_t); \
}
SUPER()
#undef GO
// todo
dynarec_log(LOG_INFO, "\n");
}
int arch_unaligned(dynablock_t* db, uintptr_t x64pc)
{
if(!db->arch_size || !db->arch)
return 0;
int ninst = getX64AddressInst(db, x64pc);
if(ninst<0) {
return 0;
}
// look for state at ninst
arch_arch_t* arch = db->arch;
arch_arch_t* next = arch;
int i = -1;
while(i<ninst) {
arch = next;
i += 1+arch->seq;
next = (arch_arch_t*)((uintptr_t)next + sizeof_arch(arch));
}
return arch->unaligned;
}

View File

@ -0,0 +1,20 @@
#ifndef __DYNAREC_RV_ARCH_H__
#define __DYNAREC_RV_ARCH_H__
#include <stddef.h>
#include <ucontext.h>
#include "x64emu.h"
#include "box64context.h"
#include "dynarec.h"
#include "dynarec_rv64_private.h"
// get size of arch specific info (can be 0)
size_t get_size_arch(dynarec_rv64_t* dyn);
//populate the array
void populate_arch(dynarec_rv64_t* dyn, void* p);
//adjust flags and more
void adjust_arch(dynablock_t* db, x64emu_t* emu, ucontext_t* p, uintptr_t x64pc);
// get if instruction can be regenerated for unaligned access
int arch_unaligned(dynablock_t* db, uintptr_t x64pc);
#endif // __DYNAREC_RV_ARCH_H__

View File

@ -398,8 +398,30 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
INST_NAME("MOVDQU Ex,Gx");
nextop = F8;
GETGX();
GETEX(x2, 0, 8);
SSE_LOOP_MV_Q2(x3);
IF_UNALIGNED(ip) {
GETEX(x2, 0, 15);
LD(x3, gback, gdoffset + 0);
LD(x4, gback, gdoffset + 8);
for (int i = 0; i < 8; i++) {
if (i == 0) {
SB(x3, wback, fixedaddress);
} else {
SRLI(x5, x3, i * 8);
SB(x5, wback, fixedaddress + i);
}
}
for (int i = 0; i < 8; i++) {
if (i == 0) {
SB(x4, wback, fixedaddress + 8);
} else {
SRLI(x5, x4, i * 8);
SB(x5, wback, fixedaddress + i + 8);
}
}
} else {
GETEX(x2, 0, 8);
SSE_LOOP_MV_Q2(x3);
}
if (!MODREG) SMWRITE2();
break;
case 0xAE:

View File

@ -814,6 +814,10 @@
#define IFX2X(A, B) if ((dyn->insts[ninst].x64.gen_flags == (A) || dyn->insts[ninst].x64.gen_flags == (B) || dyn->insts[ninst].x64.gen_flags == ((A) | (B))))
#define IFXN(A, B) if ((dyn->insts[ninst].x64.gen_flags & (A) && !(dyn->insts[ninst].x64.gen_flags & (B))))
#ifndef IF_UNALIGNED
#define IF_UNALIGNED(A) if(is_addr_unaligned(A))
#endif
#define STORE_REG(A) SD(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
#define LOAD_REG(A) LD(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))

View File

@ -93,3 +93,6 @@
else if (dyn->vector_sew == VECTOR_SEWNA && (set)) \
dyn->vector_sew = VECTOR_SEW8; \
} while (0)
// mark opcode as "unaligned" possible only if the current address is not marked as already unaligned
#define IF_UNALIGNED(A) if((dyn->insts[ninst].unaligned=(is_addr_unaligned(A)?0:1)))

View File

@ -132,6 +132,7 @@ typedef struct instruction_rv64_s {
uint8_t nat_flags_carry:1;
uint8_t nat_flags_sign:1;
uint8_t nat_flags_needsign:1;
uint8_t unaligned:1; // this opcode can be re-generated for unaligned special case
uint8_t nat_flags_op1;
uint8_t nat_flags_op2;
flagcache_t f_exit; // flags status at end of instruction

View File

@ -1000,14 +1000,14 @@ int sigbus_specialcases(siginfo_t* info, void * ucntx, void* pc, void* _fpsimd,
uint32_t funct3 = GET_FIELD(inst, 14, 12);
uint32_t opcode = GET_FIELD(inst, 6, 0);
if ((opcode == 0b0100011 || opcode == 0b0100111 /* F */) && (funct3 == 0b010 /* (F)SW */ || funct3 == 0b011 /* (F)SD */)) {
if ((opcode == 0b0100011 || opcode == 0b0100111 /* F */) && (funct3 == 0b010 /* (F)SW */ || funct3 == 0b011 /* (F)SD */ || funct3 == 0b001 /* SH */)) {
int val = (inst >> 20) & 0x1f;
int dest = (inst >> 15) & 0x1f;
int64_t imm = (GET_FIELD(inst, 31, 25) << 5) | (GET_FIELD(inst, 11, 7));
imm = SIGN_EXT(imm, 12);
volatile uint8_t *addr = (void *)(p->uc_mcontext.__gregs[dest] + imm);
uint64_t value = opcode == 0b0100011 ? p->uc_mcontext.__gregs[val] : p->uc_mcontext.__fpregs.__d.__f[val<<1];
for(int i = 0; i < (funct3 == 0b010 ? 4 : 8); ++i) {
for(int i = 0; i < (funct3 == 0b010 ? 4 : funct3 == 0b011 ? 8 : 2); ++i) {
addr[i] = (value >> (i * 8)) & 0xff;
}
p->uc_mcontext.__gregs[0] += 4; // pc += 4