OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /
    from Rev 1480 to Rev 1481
    Reverse comparison

Rev 1480 → Rev 1481

/trunk/or1ksim/cpu/or32/dyn_rec_stubs.c
153,6 → 153,7
recompile_page(dp);
 
fprintf(stderr, "Recompiled page length: %i\n", dp->host_len);
fprintf(stderr, "Recompiled to: %p\n", dp->host_page);
fprintf(stderr, "Dumping reced page to disk...\n");
 
f = fopen(argv[2], "w");
170,8 → 171,10
printf("--- Recompiled or disassembly end ---\n");
*/
 
dump_xrefs(dp, stdout);
 
printf("--- Recompiled offsets ---\n");
for(i = 0; i < (PAGE_LEN / 4); i++)
printf("%"PRIxADDR": %x\n", i * 4, dp->locs[i] - dp->host_page);
printf("--- Recompiled offsets end ---\n");
destruct_automata();
 
return 0;
252,9 → 255,13
return NULL;
}
 
static struct dev_memarea dummy_area = {
delayr: 1,
};
 
struct dev_memarea *verify_memoryarea(oraddr_t addr)
{
return NULL;
return &dummy_area;
}
 
void sim_done (void)
/trunk/or1ksim/cpu/or32/dyn32_defs.h
25,11 → 25,13
unsigned int num_ops_param;
unsigned int ops_param_len;
unsigned int *ops_param;
int jump_local; /* Is this instruction a page-local jump */
unsigned int *jump_local_loc; /* Points to the parameter that holds the location of the jump */
struct x_ref *xref; /* Cross-reference of the page-local jump */
unsigned int jump_local; /* Parameter index that holds the location of the jump */
oraddr_t jump_local_loc; /* Location to jump to (relative to start of page */
unsigned int not_jump_loc; /* Location to jump if not jumping (l.bf/l.bnf) */
int xref; /* Is this location cross referenced? */
oraddr_t insn_addr; /* Physical address of the instruction */
unsigned int reg_t[3]; /* Which registers are in the temporaries? */
unsigned int reg_t[3]; /* Which registers are in the temporaries (before the instruction)? */
unsigned int reg_t_d[3]; /* Which registers are in the temporaries (after the instruction? */
struct op_queue *prev;
struct op_queue *next;
};
/trunk/or1ksim/cpu/or32/op_support.c
45,14 → 45,19
#include "rec_i386.h"
 
/* Stuff that is really a `micro' operation but is rather big (or for some other
* reason (like calling exit()) */
* reason like calling exit()) */
 
void upd_reg_from_t(oraddr_t pc)
void upd_reg_from_t(oraddr_t pc, int bound)
{
int reg;
 
reg = cpu_state.curr_page->ts[(pc & (PAGE_SIZE - 1)) / 2];
pc = ((pc & (PAGE_SIZE - 1)) / 4);
 
if(bound) {
reg = cpu_state.curr_page->ts_bound[pc + 1];
} else
reg = cpu_state.curr_page->ts_during[pc];
 
if(reg & 0x1f)
cpu_state.reg[reg & 0x1f] = cpu_state.t0;
 
65,8 → 70,8
 
void op_support_nop_exit(void)
{
upd_reg_from_t(get_pc());
PRINTF("exit(%"PRIdREG")\n", cpu_state.reg[3]);
upd_reg_from_t(get_pc(), 0);
PRINTF("exit(%"PRIxREG")\n", cpu_state.reg[3]);
fprintf(stderr, "@reset : cycles %lld, insn #%lld\n",
runtime.sim.reset_cycles, runtime.cpu.reset_instructions);
fprintf(stderr, "@exit : cycles %lld, insn #%lld\n", runtime.sim.cycles,
77,8 → 82,10
/* FIXME: Implement emulation of a stalled cpu
if (config.debug.gdb_enabled)
set_stall_state (1);
else
runtime.sim.cont_run = 0;
else {
handle_sim_command();
sim_done();
}
*/
exit(0);
}
94,19 → 101,19
 
void op_support_nop_printf(void)
{
upd_reg_from_t(get_pc());
upd_reg_from_t(get_pc(), 0);
simprintf(cpu_state.reg[4], cpu_state.reg[3]);
}
 
void op_support_nop_report(void)
{
upd_reg_from_t(get_pc());
upd_reg_from_t(get_pc(), 0);
PRINTF("report(0x%"PRIxREG");\n", cpu_state.reg[3]);
}
 
void op_support_nop_report_imm(int imm)
{
upd_reg_from_t(get_pc());
upd_reg_from_t(get_pc(), 0);
PRINTF("report %i (0x%"PRIxREG");\n", imm, cpu_state.reg[3]);
}
 
118,9 → 125,11
void do_jump(oraddr_t addr)
{
struct dyn_page *target_dp;
struct x_ref *xref;
oraddr_t phys_page;
 
/* Temporaries are always shipped out */
cpu_state.ts_current = 1;
 
/* The pc is set to the location of the jump in op_set_pc_preemt(_check) and
* then it is incermented by 4 when the scheduler is run. If a scheduled job
* so happens to raise an exception cpu_state.delay_insn will still be set and
156,21 → 165,6
if(phys_page < 0x100)
target_dp->dirty = 1;
 
/* Check if this location is cross-referenced */
if(!(xref = find_host_x_ref(target_dp->xrefs, phys_page))) {
target_dp->dirty = 1;
xref = add_to_xrefs(target_dp, phys_page);
if(cpu_state.curr_page)
add_to_held_xrefs(cpu_state.curr_page, xref);
} else {
/* Only increment reference count if this page didn't already */
if(cpu_state.curr_page && !find_held_x_ref(cpu_state.curr_page->held_xrefs,
phys_page)) {
xref->ref++;
add_to_held_xrefs(cpu_state.curr_page, xref);
}
}
 
if(target_dp->dirty)
recompile_page(target_dp);
 
186,160 → 180,28
 
cpu_state.ts_current = 0;
 
/* Initially this (and do_rfe/handle_except) returned the address that we
* should jump to and then the recompiled code performed the jump. This was
* no problem if the jump was trully an interpage jump or if the location
* didn't need recompileation. If the jump is page local and the page needs
* recompileation there is a very high probability that the page will move in
* memory and then the return address that is on the stack will point to
* memory that has already been freed, sometimes leading to crashes */
/* Initially this returned the address that we should jump to and then the
* recompiled code performed the jump. This was no problem if the jump was
* trully an interpage jump or if the location didn't need recompileation. If
* the jump is page local and the page needs recompileation there is a very
* high probability that the page will move in memory and then the return
* address that is on the stack will point to memory that has already been
* freed, sometimes leading to crashes */
/* This looks like it could really be simpler, but no it can't. The only
* issue here is the stack: it has to be unwound. This function is called
* from except_handle, which generally ends up quite high on the stack... */
or_longjmp(xref->dyn_addr);
enter_dyn_code(phys_page, target_dp);
}
 
/* l.rfe is a hard instruction to emulate. One could just call
* do_jump(cpu_state.sprs[SPR_EPCR_BASE]), but then the location that we jump to
* will get cross referenced and because the page that contains the exception
* handlers is very rearly marked as dirty it will accumulate alot of held
* cross references over time. */
void do_rfe(void)
/* Wrapper around analysis() that contains all the recompiler specific stuff */
void op_support_analysis(void)
{
struct dyn_page *target_dp;
struct x_ref *xref;
oraddr_t phys_page;
int already_held = 0;
 
set_pc(cpu_state.sprs[SPR_EPCR_BASE]);
 
phys_page = immu_translate(cpu_state.sprs[SPR_EPCR_BASE]);
 
/* Same reason as in do_jump() */
runtime.sim.mem_cycles = 0;
 
/* op_do_sched has run by the time this is run, which makes the pc point to
* the instruction after l.rfe. */
printf("Returning from exception to %"PRIxADDR" from %"PRIxADDR"\n",
phys_page, cpu_state.sprs[SPR_PPC]);
 
target_dp = find_dynd_page(phys_page);
 
if(!target_dp)
target_dp = new_dp(phys_page);
 
/* Since writes to the 0x0-0xff range do not dirtyfy a page recompile the 0x0
* page if the jump is to that location */
if(phys_page < 0x100)
target_dp->dirty = 1;
 
/* Check if this location is cross-referenced */
if(!(xref = find_host_x_ref(target_dp->xrefs, phys_page))) {
xref = add_to_xrefs(target_dp, phys_page);
/* Calling dirtyfy_page is real tempting but if we get to the situation were
* the l.rfe instruction and the location to which it returns to are on the
* same page then all the exception cross references will get removed and
* this will result in excessive recompileations of this page */
target_dp->dirty = 1;
 
/* There is alot of code (especially in linux) that do loops like this:
* int a;
* // Stuff such that b gets on another page than a
* int b;
* for(i = 0; i < (some big value); i++) {
* a = b;
* // Some more stuff
* }
* Here a DTLB miss will happen on every acess to a and b and l.rfe will
* always return to the same locations but since the previous l.rfe to this
* page was to a different location the page will get recompiled each time a
* or b is acessed. This is why the last NUM_RFE_HELD returns are `cached'.
*/
if(++cpu_state.rfe_held_xref_pos == NUM_RFE_HELD)
cpu_state.rfe_held_xref_pos = 0;
 
if(cpu_state.rfe_held_xrefs[cpu_state.rfe_held_xref_pos])
cpu_state.rfe_held_xrefs[cpu_state.rfe_held_xref_pos]->ref--;
 
cpu_state.rfe_held_xrefs[cpu_state.rfe_held_xref_pos] = xref;
} else {
/* Make sure we increase this cross reference's reference count, since it is
* decremented below. */
xref->ref++;
already_held = 1;
}
 
if(target_dp->dirty)
recompile_page(target_dp);
 
if(already_held)
xref->ref--;
 
cpu_state.curr_page = target_dp;
 
/* FIXME: If the page is backed by more than one type of memory, this will
* produce wrong results */
if(cpu_state.sprs[SPR_SR] & SPR_SR_IME)
/* Add the mmu hit delay to the cycle counter */
upd_cycles_dec(target_dp->delayr - config.immu.hitdelay);
upd_sim_cycles();
if(ADDR_PAGE(cpu_state.pc) != cpu_state.pc)
upd_reg_from_t(cpu_state.pc - (cpu_state.delay_insn ? 4 : 0), 0);
else
upd_cycles_dec(target_dp->delayr);
 
cpu_state.ts_current = 0;
 
/* See the comment at the end of do_jump */
or_longjmp(xref->dyn_addr);
upd_reg_from_t(cpu_state.pc, 0);
runtime.cpu.instructions++;
analysis(&cpu_state.iqueue);
}
 
/* Handles an exception. */
void handle_except(oraddr_t except)
{
struct dyn_page *target_dp;
struct x_ref *xref;
 
/* NOTE: It is known when this code will be run. It is therefore not
* necessary to have to plough through cpu_state.curr_page->ts to store the
* temporaries. On the other hand, except_handle is also called from the
* scheduler, therefore we don't know when it is called and we can't move the
* temporaries to their permanent storeage in the recompiled code. */
 
/* op_do_sched has run by the time we run this, which makes the pc point to
* the next instruction. */
printf("Exception %"PRIxADDR" (%s) from %"PRIxADDR"\n", except,
except_name(except), get_pc() - 4);
 
set_pc(except);
 
target_dp = find_dynd_page(except);
 
if(!target_dp)
target_dp = new_dp(except);
 
/* Check if this location is cross-referenced */
if(!(xref = find_host_x_ref(target_dp->xrefs, except))) {
/* See the comment in do_rfe for why dirtyfy page is not called */
target_dp->dirty = 1;
xref = add_to_xrefs(target_dp, except);
} else {
/* If this cross reference is scheduled for removal increment its reference
* count */
if(!xref->ref)
xref->ref++;
}
 
if(target_dp->dirty)
recompile_page(target_dp);
 
cpu_state.curr_page = target_dp;
 
/* FIXME: If the page is backed by more than one type of memory, this will
* produce wrong results */
/* Address translation is disabled above (no need to add hitdelay) */
upd_cycles_dec(target_dp->delayr);
 
cpu_state.ts_current = 0;
 
/* See the comment at the end of do_jump */
or_longjmp(xref->dyn_addr);
}
 
/trunk/or1ksim/cpu/or32/op.c
51,9 → 51,6
#include "op_i386.h"
 
/* FIXME: Move this */
void analysis (struct iqueue_entry *current);
 
/* FIXME: Move this */
#define PAGE_LEN 8192
 
/*
117,7 → 114,42
do_scheduler();
}
 
/* Helper function. Hopefully it will get inlined */
/* do_scheduler wrapper for instructions that are in the delay slot */
void do_sched_wrap_delay(void)
{
save_t_temporary();
upd_sim_cycles();
env->ts_current = 1;
/* The PC gets set to the location of the jump, but do_sched increments that
* so pull it back here to point to the right location again. This could be
* done in op_add_pc/op_set_pc_pc_delay but that would enlarge the recompiled
* code. */
//env->pc -= 4;
do_scheduler();
env->ts_current = 0;
}
 
void enter_dyn_code(oraddr_t addr, struct dyn_page *dp)
{
uint16_t reg;
 
addr &= PAGE_SIZE - 1;
addr >>= 2;
 
reg = dp->ts_bound[addr];
 
if(reg & 0x1f)
t0 = cpu_state.reg[reg & 0x1f];
 
if((reg >> 5) & 0x1f)
t1 = cpu_state.reg[(reg >> 5) & 0x1f];
 
if((reg >> 10) & 0x1f)
t2 = cpu_state.reg[(reg >> 10) & 0x1f];
 
or_longjmp(dp->locs[addr]);
}
 
__or_dynop void op_t0_imm(void)
{
t0 = OP_PARAM1;
178,24 → 210,14
t2 = t1;
}
 
__or_dynop void op_set_pc_delay_t0(void)
__or_dynop void op_set_pc_pc_delay(void)
{
env->pc_delay = t0;
env->delay_insn = 1;
env->sprs[SPR_PPC] = get_pc();
/* pc_delay is pulled back 4 since imediatly after this is run, the scheduler
* runs which also increments it by 4 */
set_pc(env->pc_delay - 4);
}
 
__or_dynop void op_set_pc_delay_t1(void)
{
env->pc_delay = t1;
env->delay_insn = 1;
}
 
__or_dynop void op_set_pc_delay_t2(void)
{
env->pc_delay = t2;
env->delay_insn = 1;
}
 
__or_dynop void op_set_pc_delay_imm(void)
{
env->pc_delay = get_pc() + (orreg_t)OP_PARAM1;
216,46 → 238,35
 
__or_dynop void op_do_jump(void)
{
do_jump(get_pc());
}
 
__or_dynop void op_do_jump_delay(void)
{
do_jump(env->pc_delay);
}
 
/* Only used to handle branch instruction ie. j.bf and j.bnf */
__or_dynop void op_do_jump_check(void)
__or_dynop void op_clear_delay_insn(void)
{
if(env->delay_insn) {
env->delay_insn = 0;
do_jump(env->pc_delay);
}
env->delay_insn = 0;
}
 
/* Only used to jump out to the next page */
__or_dynop void op_do_jump_pc(void)
__or_dynop void op_set_delay_insn(void)
{
do_jump(get_pc());
env->delay_insn = 1;
}
 
__or_dynop void op_clear_delay_insn(void)
__or_dynop void op_check_delay_slot(void)
{
env->delay_insn = 0;
if(!env->delay_insn)
OP_JUMP(OP_PARAM1);
}
 
__or_dynop void op_jmp_imm(void)
{
env->ts_current = 0;
set_pc(env->pc_delay);
OP_JUMP(OP_PARAM1);
}
 
__or_dynop void op_jmp_imm_check(void)
{
if(env->delay_insn) {
env->ts_current = 0;
env->delay_insn = 0;
set_pc(env->pc_delay);
OP_JUMP(OP_PARAM1);
}
}
 
__or_dynop void op_set_flag(void)
{
env->sprs[SPR_SR] |= SPR_SR_F;
266,19 → 277,41
env->sprs[SPR_SR] &= ~SPR_SR_F;
}
 
/* Used for the l.bf instruction. Therefore if the flag is not set, jump over
* all the jumping stuff */
__or_dynop void op_check_flag(void)
{
if(!(env->sprs[SPR_SR] & SPR_SR_F)) {
HANDLE_SCHED(do_sched_wrap, "no_sched_chk_flg");
OP_JUMP(OP_PARAM1);
}
}
 
/* Used for l.bf if the delay slot instruction is on another page */
__or_dynop void op_check_flag_delay(void)
{
if(env->sprs[SPR_SR] & SPR_SR_F) {
env->delay_insn = 1;
env->pc_delay = get_pc() + (orreg_t)OP_PARAM1;
env->delay_insn = 1;
}
}
 
/* Used for the l.bnf instruction. Therefore if the flag is set, jump over all
* the jumping stuff */
__or_dynop void op_check_not_flag(void)
{
if(env->sprs[SPR_SR] & SPR_SR_F) {
HANDLE_SCHED(do_sched_wrap, "no_sched_chk_not_flg");
OP_JUMP(OP_PARAM1);
}
}
 
/* Used for l.bnf if the delay slot instruction is on another page */
__or_dynop void op_check_not_flag_delay(void)
{
if(!(env->sprs[SPR_SR] & SPR_SR_F)) {
env->delay_insn = 1;
env->pc_delay = get_pc() + (orreg_t)OP_PARAM1;
env->delay_insn = 1;
}
}
 
287,22 → 320,12
env->ts_current = 1;
}
 
__or_dynop void op_set_pc_preemt(void)
__or_dynop void op_add_pc(void)
{
env->ts_current = 1;
env->sprs[SPR_PPC] = get_pc();
set_pc(env->pc_delay);
/* FIXME: Optimise */
set_pc(get_pc() + OP_PARAM1);
}
 
__or_dynop void op_set_pc_preemt_check(void)
{
if(env->delay_insn) {
env->ts_current = 1;
env->sprs[SPR_PPC] = get_pc();
set_pc(env->pc_delay);
}
}
 
__or_dynop void op_nop_exit(void)
{
upd_sim_cycles();
315,7 → 338,7
{
upd_sim_cycles();
op_support_nop_reset();
handle_except(EXCEPT_RESET);
do_jump(EXCEPT_RESET);
}
 
__or_dynop void op_nop_printf(void)
347,7 → 370,7
/* Do exception */
env->sprs[SPR_EEAR_BASE] = get_pc() - 4;
env->delay_insn = 0;
handle_except(EXCEPT_ILLEGAL);
do_jump(EXCEPT_ILLEGAL);
}
}
 
357,7 → 380,7
if(!t0) {
/* Do exception */
env->sprs[SPR_EEAR_BASE] = get_pc();
handle_except(EXCEPT_ILLEGAL);
do_jump(EXCEPT_ILLEGAL);
}
}
 
367,7 → 390,7
/* Do exception */
env->sprs[SPR_EEAR_BASE] = get_pc() - 4;
env->delay_insn = 0;
handle_except(EXCEPT_ILLEGAL);
do_jump(EXCEPT_ILLEGAL);
}
}
 
377,7 → 400,7
if(!t1) {
/* Do exception */
env->sprs[SPR_EEAR_BASE] = get_pc();
handle_except(EXCEPT_ILLEGAL);
do_jump(EXCEPT_ILLEGAL);
}
}
 
387,7 → 410,7
/* Do exception */
env->sprs[SPR_EEAR_BASE] = get_pc() - 4;
env->delay_insn = 0;
handle_except(EXCEPT_ILLEGAL);
do_jump(EXCEPT_ILLEGAL);
}
}
 
396,7 → 419,7
if(!t2) {
/* Do exception */
env->sprs[SPR_EEAR_BASE] = get_pc();
handle_except(EXCEPT_ILLEGAL);
do_jump(EXCEPT_ILLEGAL);
}
}
 
405,9 → 428,8
env->iqueue.insn_index = OP_PARAM1;
env->iqueue.insn = OP_PARAM2;
env->iqueue.insn_addr = get_pc();
upd_sim_cycles();
runtime.cpu.instructions++;
analysis(&env->iqueue);
save_t_temporary();
op_support_analysis();
FORCE_RET;
}
 
998,23 → 1020,14
env->reg[LINK_REGNO] = get_pc() + 8;
}
 
__or_dynop void op_set_rfe_pc(void)
{
set_pc(env->sprs[SPR_EPCR_BASE] - 4);
}
 
__or_dynop void op_prep_rfe(void)
{
env->sprs[SPR_SR] = env->sprs[SPR_ESR_BASE] | SPR_SR_FO;
env->sprs[SPR_PPC] = get_pc();
env->ts_current = 1;
set_pc(env->sprs[SPR_EPCR_BASE] - 4);
}
 
__or_dynop void op_rfe(void)
{
do_rfe();
FORCE_RET;
}
 
static inline void prep_except(oraddr_t epcr_base)
{
env->sprs[SPR_EPCR_BASE] = epcr_base;
1042,50 → 1055,59
__or_dynop void op_prep_sys_delay(void)
{
env->delay_insn = 0;
env->ts_current = 1;
prep_except(get_pc() - 4);
set_pc(EXCEPT_SYSCALL - 4);
}
 
__or_dynop void op_prep_sys(void)
{
env->ts_current = 1;
prep_except(get_pc() + 4);
set_pc(EXCEPT_SYSCALL - 4);
}
 
__or_dynop void op_prep_trap_delay(void)
{
env->ts_current = 1;
env->delay_insn = 0;
prep_except(get_pc() - 4);
set_pc(EXCEPT_TRAP - 4);
}
 
__or_dynop void op_prep_trap(void)
{
env->ts_current = 1;
prep_except(get_pc());
set_pc(EXCEPT_TRAP - 4);
}
 
__or_dynop void op_do_except(void)
{
handle_except(OP_PARAM1);
}
 
/* FIXME: This `instruction' should be split up like the l.trap and l.sys
* instructions are done */
__or_dynop void op_illegal_delay(void)
{
env->delay_insn = 0;
env->ts_current = 1;
env->sprs[SPR_EEAR_BASE] = get_pc() - 4;
handle_except(EXCEPT_ILLEGAL);
do_jump(EXCEPT_ILLEGAL - 4);
}
 
__or_dynop void op_illegal(void)
{
env->sprs[SPR_EEAR_BASE] = get_pc();
handle_except(EXCEPT_ILLEGAL);
do_jump(EXCEPT_ILLEGAL);
}
 
__or_dynop void op_do_sched(void)
{
handle_sched();
HANDLE_SCHED(do_sched_wrap, "no_sched");
}
 
__or_dynop void op_do_sched_delay(void)
{
HANDLE_SCHED(do_sched_wrap_delay, "no_sched_delay");
}
 
__or_dynop void op_macc(void)
{
env->sprs[SPR_MACLO] = 0;
/trunk/or1ksim/cpu/or32/op_support.h
22,8 → 22,7
void op_support_nop_printf(void);
void op_support_nop_report(void);
void op_support_nop_report_imm(int imm);
void op_support_analysis(void);
void do_jump(oraddr_t addr);
void do_rfe(void);
void handle_except(oraddr_t except);
 
void upd_reg_from_t(oraddr_t pc);
void upd_reg_from_t(oraddr_t pc, int bound);
/trunk/or1ksim/cpu/or32/dyn_rec.c
288,7 → 288,6
static void *sigsegv_addr = NULL;
 
void dyn_ret_stack_prot(void);
void dump_held_xrefs(struct dyn_page *dp, FILE *f);
 
void dyn_sigsegv_debug(int u, siginfo_t *siginf, void *dat)
{
337,27 → 336,6
}
sigsegv_state++;
case 2:
/* Dump the x-refs to disk */
for(dp = cpu_state.dyn_pages; dp; dp = dp->next) {
printf("Dumping cross references of 0x%"PRIxADDR" to disk\n", dp->or_page);
 
sprintf(filen, "or_xref.%"PRIxADDR, dp->or_page);
if(!(f = fopen(filen, "w"))) {
fprintf(stderr, "Unable to open %s to dump cross references to: %s\n",
filen, strerror(errno));
continue;
}
 
fprintf(f, "Cross references in the page:\n");
dump_xrefs(dp, f);
 
fprintf(f, "\nCross references held by this page:\n");
dump_held_xrefs(dp, f);
 
fclose(f);
}
sigsegv_state++;
case 3:
/* Dump the contents of the stack */
printf("Stack dump: ");
fflush(stdout);
380,36 → 358,11
fflush(stdout);
}
sigsegv_state++;
case 4:
case 3:
sim_done();
}
}
 
void dump_xrefs(struct dyn_page *dp, FILE *f)
{
struct x_ref *xref;
 
fprintf(f, "--- Cross reference dump for %"PRIxADDR" at %p ---\n",
dp->or_page, dp->host_page);
for(xref = dp->xrefs; xref; xref = xref->next) {
fprintf(f, "x-refed or location: 0x%"PRIxADDR", host-location: %p, ref: %i\n",
xref->or_addr, xref->dyn_addr, xref->ref);
}
fprintf(f, "--- Cross reference dump end ---\n");
}
 
void dump_held_xrefs(struct dyn_page *dp, FILE *f)
{
struct x_ref **xrefs;
 
fprintf(f, "--- Held cross reference dump for %"PRIxADDR" at %p ---\n",
dp->or_page, dp->host_page);
for(xrefs = dp->held_xrefs; *xrefs; xrefs++)
fprintf(f, "Holds an x-ref to 0x%"PRIxADDR", host-location: %p, ref: %i\n",
(*xrefs)->or_addr, (*xrefs)->dyn_addr, (*xrefs)->ref);
fprintf(f, "--- Held cross reference dump end ---\n");
}
 
static void add_to_dp(struct dyn_page *new)
{
struct dyn_page *cur;
433,12 → 386,8
struct dyn_page *dp = malloc(sizeof(struct dyn_page));
dp->or_page = ADDR_PAGE(page);
 
/* Allocate xref terminator */
dp->xrefs = NULL;
dp->locs = malloc(sizeof(void *) * (PAGE_LEN / 4));
 
dp->held_xrefs = malloc(sizeof(struct x_ref *));
dp->held_xrefs[0] = NULL;
 
dp->host_len = 0;
dp->host_page = NULL;
dp->dirty = 1;
462,51 → 411,6
return NULL;
}
 
/* Finds the dynamicly recompiled location of the given or address */
struct x_ref *find_host_x_ref(struct x_ref *x_refs, oraddr_t addr)
{
/* FIXME: Optimise this by knowing that the x_refs array is orderd */
while(x_refs && (x_refs->or_addr != addr)) x_refs = x_refs->next;
 
return x_refs;
}
 
static void remove_xref(struct dyn_page *dp, struct x_ref *xref)
{
struct x_ref *prev_xref;
 
if(dp->xrefs == xref) {
dp->xrefs = xref->next;
free(xref);
return;
}
 
prev_xref = dp->xrefs;
while(prev_xref->next != xref)
prev_xref = prev_xref->next;
 
prev_xref->next = xref->next;
free(xref);
}
 
struct x_ref *find_held_x_ref(struct x_ref **held_xrefs, oraddr_t or_addr)
{
/* FIXME: Order this list in add_to_held_xrefs below and optimise this */
while(*held_xrefs && ((*held_xrefs)->or_addr != or_addr)) held_xrefs++;
return *held_xrefs;
}
 
void add_to_held_xrefs(struct dyn_page *dp, struct x_ref *xref)
{
unsigned int i;
 
for(i = 0; dp->held_xrefs[i]; i++);
 
dp->held_xrefs = realloc(dp->held_xrefs, sizeof(struct x_ref *) * (i + 2));
dp->held_xrefs[i] = xref;
dp->held_xrefs[++i] = NULL;
}
 
/* This is called whenever the immu is either enabled/disabled or reconfigured
* while enabled. This checks if an itlb miss would occour and updates the immu
* hit delay counter */
563,12 → 467,13
oraddr_t pc = get_pc();
 
if(!cpu_state.ts_current)
upd_reg_from_t(pc);
upd_reg_from_t(pc, 0);
 
if(add_normal && do_stats) {
cpu_state.iqueue.insn_addr = pc;
cpu_state.iqueue.insn = eval_insn_direct(pc, &brk, 1);
cpu_state.iqueue.insn_index = insn_decode(cpu_state.iqueue.insn);
runtime.cpu.instructions++;
analysis(&cpu_state.iqueue);
}
 
585,18 → 490,10
/* Signals a page as dirty */
void dirtyfy_page(struct dyn_page *dp)
{
struct x_ref **held_xrefs;
struct x_ref *xref;
oraddr_t check;
 
printf("Dirtyfying page 0x%"PRIxADDR"\n", dp->or_page);
 
/* decrease the reference counts of the xrefs that we hold */
for(held_xrefs = dp->held_xrefs; *held_xrefs; held_xrefs++)
(*held_xrefs)->ref--;
dp->held_xrefs = realloc(dp->held_xrefs, sizeof(struct x_ref *));
dp->held_xrefs[0] = NULL;
 
dp->dirty = 1;
 
/* If the execution is currently in the page that was touched then recompile
604,21 → 501,12
check = cpu_state.delay_insn ? cpu_state.pc_delay : get_pc() + 4;
if(ADDR_PAGE(check) == dp->or_page) {
run_sched_out_of_line(1);
if(!(xref = find_host_x_ref(dp->xrefs, check))) {
xref = add_to_xrefs(dp, check);
add_to_held_xrefs(dp, xref);
} else {
if(!find_held_x_ref(dp->held_xrefs, check)) {
add_to_held_xrefs(dp, xref);
xref->ref++;
}
}
recompile_page(dp);
 
cpu_state.delay_insn = 0;
 
/* Jump out to the next instruction */
or_longjmp(xref->dyn_addr);
do_jump(check);
}
}
 
626,6 → 514,16
{
int i;
 
/* Before takeing the temporaries out, temporarily remove the op_do_sched
* operation such that dyn_page->ts_bound shall be correct before the
* scheduler runs */
if(end && opq->num_ops && (opq->ops[opq->num_ops - 1] == op_do_sched_indx)) {
opq->num_ops--;
ship_gprs_out_t(opq, end, reg_t);
gen_op_do_sched(opq, 1);
return;
}
 
for(i = 0; i < NUM_T_REGS; i++) {
if(reg_t[i] < 32)
gen_op_move_gpr_t[i][reg_t[i]](opq, end);
692,12 → 590,17
opq->num_ops++;
}
 
static void gen_op_mark_loc(struct op_queue *opq, int end)
{
add_to_opq(opq, end, op_mark_loc_indx);
}
 
/* Adds a parameter to the opq */
void add_to_op_params(struct op_queue *opq, int end, unsigned long param)
{
if(opq->num_ops_param == opq->ops_param_len) {
opq->ops_param_len += OPS_ENLARGE_BY * sizeof(int);
if(!(opq->ops_param = realloc(opq->ops_param, opq->ops_param_len))) {
opq->ops_param_len += OPS_ENLARGE_BY;
if(!(opq->ops_param = realloc(opq->ops_param, opq->ops_param_len * sizeof(int)))) {
fprintf(stderr, "OOM\n");
exit(1);
}
735,7 → 638,6
{
struct sigaction sigact;
struct op_queue *opq;
struct x_ref *xref;
unsigned int i;
 
cpu_state.opqs = NULL;
752,6 → 654,7
opq->ops = NULL;
opq->ops_param_len = 0;
opq->ops_param = NULL;
opq->xref = 0;
 
if(cpu_state.opqs)
cpu_state.opqs->prev = opq;
762,18 → 665,6
 
opq->prev = NULL;
 
/* Allocate the x-ref structures that will be used for the infinite loop
* instruction (l.j 0). Allocate a whole page's worth just to make sure that
* we will have enough */
for(i = 0; i < (PAGE_LEN / 4); i++) {
if(!(xref = malloc(sizeof(struct x_ref)))) {
fprintf(stderr, "Out-of-memory while allocateing x-ref structures\n");
exit(1);
}
xref->next = cpu_state.inf_xrefs;
cpu_state.inf_xrefs = xref;
}
 
/* Just some value that we'll use as the base for our stack */
rec_stack_base = get_sp();
 
787,14 → 678,7
if(sigaction(SIGSEGV, &sigact, NULL))
printf("WARN: Unable to install SIGSEGV handler! Don't expect to be able to debug the recompiler.\n");
 
/* Allocate memory for the rfe corss reference cache */
if(!(cpu_state.rfe_held_xrefs = malloc(sizeof(struct xref *) * NUM_RFE_HELD))) {
printf("OOM\n");
exit(1);
}
cpu_state.rfe_held_xref_pos = 0;
memset(cpu_state.rfe_held_xrefs, 0, sizeof(struct xref *) * NUM_RFE_HELD);
 
/* Do architecture specific initialisation */
init_dyn_rec();
 
/* FIXME: Find a better place for this */
807,189 → 691,93
printf("Recompile engine up and running\n");
}
 
/* rec_page is a physical address */
void recompile_page(struct dyn_page *dyn)
/* Adds code to the opq for the instruction pointed to by addr */
static void recompile_insn(struct op_queue *opq, oraddr_t addr, int delay_insn)
{
unsigned int j, k;
unsigned int reg_t[NUM_T_REGS];
unsigned int insn_index;
unsigned int pres_t[NUM_T_REGS]; /* Which temporary to preserve */
unsigned int insn_index;
int delay_insn = 0; /* Is the next instruction to be decoded in a delay slot*/
enum insn_type delay_insn_type = 0;
uint32_t insn;
orreg_t param[3];
int i, j, k;
int param_t[3]; /* Which temporary the parameters reside in */
int param_r[3]; /* is parameter a register */
orreg_t param[3];
int param_num;
struct op_queue *opq = NULL;
oraddr_t rec_addr = dyn->or_page;
oraddr_t rec_page = dyn->or_page;
struct x_ref *xref;
uint32_t insn;
int breakp;
struct dyn_page *prev_dp;
 
struct insn_op_struct *opd;
 
/* The start of the next page */
rec_page += PAGE_LEN;
breakp = 0;
insn = eval_insn(addr, &breakp);
 
printf("Recompileing page %"PRIxADDR"\n", rec_addr);
fflush(stdout);
/* FIXME: If a breakpoint is set at this location, insert exception code */
if(breakp) {
fprintf(stderr, "FIXME: Insert breakpoint code\n");
}
 
/* Mark all temporaries as not containing a register */
for(j = 0; j < NUM_T_REGS; j++)
reg_t[j] = 32; /* Out-of-range registers */
insn_index = insn_decode(insn);
 
dyn->delayr = -verify_memoryarea(rec_addr)->delayr;
/* Copy over the state of the temporaries to the next opq */
memcpy(opq->reg_t_d, opq->reg_t, sizeof(opq->reg_t));
 
dyn->carrys_delay_slot = 0;
/* Check if we have an illegal instruction */
if(insn_index == -1) {
gen_l_invalid(opq, NULL, NULL, delay_insn);
return;
}
 
/* Check if the previous page carries a delay slot over to this page */
if((prev_dp = find_dynd_page(rec_addr - PAGE_LEN)))
delay_insn = prev_dp->carrys_delay_slot;
/* If we are recompileing an instruction that has a delay slot and is in the
* delay slot, ignore it. This is undefined behavour. */
if(delay_insn && ((or32_opcodes[insn_index].func_unit == it_jump) ||
(or32_opcodes[insn_index].func_unit == it_branch)))
return;
 
for(opq = cpu_state.opqs; rec_addr < rec_page; rec_addr += 4, opq = opq->next) {
opq->num_ops = 0;
opq->num_ops_param = 0;
opq->jump_local = 0;
/* figure out instruction operands */
for(i = 0; i < NUM_T_REGS; i++)
pres_t[i] = 0;
 
opq->insn_addr = rec_addr;
param_t[0] = T_NONE;
param_t[1] = T_NONE;
param_t[2] = T_NONE;
param_r[0] = 0;
param_r[1] = 0;
param_r[2] = 0;
param_num = 0;
 
breakp = 0;
insn = eval_insn(rec_addr, &breakp);
opd = op_start[insn_index];
while(1) {
param[param_num] = eval_operand_val(insn, opd);
 
/* FIXME: If a breakpoint is set at this location, insert exception code */
if(breakp) {
fprintf(stderr, "FIXME: Insert breakpoint code\n");
}
 
insn_index = insn_decode(insn);
 
/* FIXME: Optimise this by knowing that dyn->x_refs is ordered (ie. Don't
* call find_host_x_ref) */
/* Check if this location is cross referenced */
if((xref = find_host_x_ref(dyn->xrefs, rec_addr))) {
/* If the x-refs reference count reached zero remove it */
if(xref->ref) {
/* If the current address is cross-referenced, the temporaries shall be
* in an undefined state, so we must assume that no registers reside in
* them */
/* Ship out the current set of registers from the temporaries */
if(opq->prev) {
ship_gprs_out_t(opq->prev, 1, reg_t);
if(opd->type & OPTYPE_REG) {
/* check which temporary the register is in, if any */
for(i = 0; i < NUM_T_REGS; i++) {
if(opq->reg_t_d[i] == param[param_num]) {
param_t[param_num] = i;
pres_t[i] = 1;
}
for(j = 0; j < NUM_T_REGS; j++)
reg_t[j] = 32;
} else {
/* Remove x-ref */
remove_xref(dyn, xref);
}
}
 
memcpy(opq->reg_t, reg_t, sizeof(reg_t));
param_num++;
while(!(opd->type & OPTYPE_OP)) opd++;
if(opd->type & OPTYPE_LAST)
break;
opd++;
}
 
/* Check if we have an illegal instruction */
if(insn_index == -1) {
gen_l_invalid(opq, param_t, param, delay_insn);
if(delay_insn) {
/* There is no need to do any jump handleing stuff as the instruction
* will generate an exception */
if(opq->prev->jump_local == 2) {
opq->prev->xref->next = cpu_state.inf_xrefs;
cpu_state.inf_xrefs = opq->prev->xref;
}
opq->prev->jump_local = 0;
delay_insn = 0;
/* Jump instructions are special since they have a delay slot and thus they
* need to control the exact operation sequence. Special case these, here to
* avoid haveing loads of if(.func_unit != it_jump && != it_branch) below */
if((or32_opcodes[insn_index].func_unit == it_jump) ||
(or32_opcodes[insn_index].func_unit == it_branch)) {
/* Ship the jump-to register out (if it exists). It requires special
* handleing, which is done in gen_j_reg. */
for(i = 0; i < NUM_T_REGS; i++) {
if(pres_t[i]) {
gen_op_move_gpr_t[i][opq->reg_t_d[i]](opq->prev, 1);
opq->reg_t_d[i] = 32;
opq->reg_t[i] = 32;
}
continue;
}
 
/* figure out instruction operands */
for(j = 0; j < NUM_T_REGS; j++)
pres_t[j] = 0;
 
param_t[0] = T_NONE;
param_t[1] = T_NONE;
param_t[2] = T_NONE;
param_r[0] = 0;
param_r[1] = 0;
param_r[2] = 0;
param_num = 0;
 
opd = op_start[insn_index];
while(1) {
param[param_num] = eval_operand_val(insn, opd);
 
if(opd->type & OPTYPE_REG) {
/* check which temporary the register is in, if any */
for(j = 0; j < NUM_T_REGS; j++) {
if(reg_t[j] == param[param_num]) {
param_t[param_num] = j;
pres_t[j] = 1;
}
}
}
 
param_num++;
while(!(opd->type & OPTYPE_OP)) opd++;
if(opd->type & OPTYPE_LAST)
break;
opd++;
}
 
opd = op_start[insn_index];
 
/* Before an exception takes place, all registers must be stored. */
if((or32_opcodes[insn_index].func_unit == it_exception)) {
if(opq->prev) {
ship_gprs_out_t(opq->prev, 1, reg_t);
for(j = 0; j < NUM_T_REGS; j++) {
opq->prev->reg_t[j] = 32;
reg_t[j] = 32;
}
}
}
 
for(j = 0; j < param_num; j++, opd++) {
while(!(opd->type & OPTYPE_OP)) opd++;
if(!(opd->type & OPTYPE_REG))
continue;
 
/* Never, ever, move r0 into a temporary */
if(!param[j])
continue;
 
/* Check if this register has been moved into a temporary in a previous
* operand */
for(k = 0; k < NUM_T_REGS; k++) {
if(reg_t[k] == param[j]) {
/* Yes, this register is already in a temporary */
pres_t[k] = 1;
reg_t[k] = param[j];
param_t[j] = k;
break;
}
}
if(k != NUM_T_REGS)
continue;
 
if((param_t[j] != T_NONE))
continue;
 
/* Search for an unused temporary */
k = find_unused_t(pres_t, reg_t);
if(reg_t[k] < 32) {
gen_op_move_gpr_t[k][reg_t[k]](opq->prev, 1);
opq->reg_t[k] = 32;
}
pres_t[k] = 1;
reg_t[k] = param[j];
param_t[j] = k;
/* FIXME: Only generate code to move the register into a temporary if it
* is used as a source operand */
gen_op_move_t_gpr[k][reg_t[k]](opq, 1);
}
 
/* FIXME: Do this in a more elegent way */
if(!strncmp(or32_opcodes[insn_index].name, "l.jal", 5)) {
/* In the case of a l.jal instruction, make sure that LINK_REGNO is not in
998,229 → 786,235
* after the delay slot instruction has executed and so it overwrittes the
* `return address'. */
for(k = 0; k < NUM_T_REGS; k++) {
if(reg_t[k] == LINK_REGNO) {
if(opq->reg_t_d[k] == LINK_REGNO) {
gen_op_move_gpr_t[k][LINK_REGNO](opq, 1);
reg_t[k] = 32;
opq->reg_t[k] = 32;
opq->reg_t_d[k] = 32;
break;
}
}
}
 
/* Store the state of the temporaries into dyn->ts */
dyn->ts[(rec_addr & (PAGE_LEN - 1)) / 2] = 0;
if(reg_t[0] < 32)
dyn->ts[(rec_addr & (PAGE_LEN - 1)) / 2] = reg_t[0];
if(reg_t[1] < 32)
dyn->ts[(rec_addr & (PAGE_LEN - 1)) / 2] |= reg_t[1] << 5;
if(reg_t[2] < 32)
dyn->ts[(rec_addr & (PAGE_LEN - 1)) / 2] |= reg_t[2] << 10;
/* Jump instructions don't have a disposition */
or32_opcodes[insn_index].exec(opq, param_t, param, delay_insn);
 
/* To get the execution log correct for instructions like l.lwz r4,0(r4) the
* effective address needs to be calculated before the instruction is
* simulated */
if(do_stats) {
/* Find any disposition in the instruction */
opd = op_start[insn_index];
for(j = 0; j < param_num; j++, opd++) {
while(!(opd->type & OPTYPE_OP)) opd++;
if(!(opd->type & OPTYPE_DIS))
continue;
/* Analysis is done by the individual jump instructions */
/* Jump instructions don't touch runtime.sim.mem_cycles */
/* Jump instructions run their own scheduler */
return;
}
 
if(!param[j + 1])
gen_op_store_insn_ea(opq, 1, param[j]);
else
calc_insn_ea_table[param_t[j + 1]](opq, 1, param[j]);
/* Before an exception takes place, all registers must be stored. */
if((or32_opcodes[insn_index].func_unit == it_exception)) {
if(opq->prev) {
ship_gprs_out_t(opq->prev, 1, opq->reg_t_d);
for(i = 0; i < NUM_T_REGS; i++) {
opq->reg_t_d[i] = 32;
opq->reg_t[i] = 32;
}
}
}
 
or32_opcodes[insn_index].exec(opq, param_t, param, delay_insn);
opd = op_start[insn_index];
 
/* If any sort of analysis is done, store all temporaries and run
* analysis() */
if(do_stats) {
ship_gprs_out_t(opq, 1, reg_t);
for(j = 0; j < NUM_T_REGS; j++)
reg_t[j] = 32;
for(j = 0; j < param_num; j++, opd++) {
while(!(opd->type & OPTYPE_OP)) opd++;
if(!(opd->type & OPTYPE_REG))
continue;
 
gen_op_analysis(opq, 1, insn_index, insn);
}
/* Never, ever, move r0 into a temporary */
if(!param[j])
continue;
 
/* The call to join_mem_cycles() could be put into the individual operations
* that emulate the load/store instructions, but then it would be added to
* the cycle counter before analysis() is called, which not how the complex
* execution modell does it. */
if((or32_opcodes[insn_index].func_unit == it_load) ||
(or32_opcodes[insn_index].func_unit == it_store))
gen_op_join_mem_cycles(opq, 1);
 
/* If a delay sloted instruction is in the delay slot, avoid doing a jump on
* the first delay sloted instruction. The problem with not doing the above
* is that the 0x00000000 instruction is a jump instruction, which is used
* for padding, and if there ends up being a jump instruction directly after
* some padding and the code jumps to this location (as with the mmu test)
* the jump instruction will set cpu_state.pc_delay but code will get
* generated after the jump instruction and before the delay slot
* instruciton to check cpu_state.pc_delay and jump out if it is set and so
* we end up jumping out to the padding instruction. With some thought, the
* 0x00000000 opcode could really have been encoded to some arithmetic
* operation that would end up nop-ing (or better yet, to the l.nop 0
* instruction itself) */
/* If we came up to a page local jump and because it is the delay slot of
* another delay sloted instruction the case below is skipped and
* opq->prev->jump_local will remain set to 1, fix this by reseting it now*/
if(delay_insn && ((or32_opcodes[insn_index].func_unit == it_jump) ||
(or32_opcodes[insn_index].func_unit == it_branch))) {
/* We don't generate code to do the relocation so there will be none.
* Avoid haveing a reference to it */
/* Also remove the cross reference to it */
if(opq->prev) {
if(opq->prev->jump_local == 2) {
opq->prev->xref->next = cpu_state.inf_xrefs;
cpu_state.inf_xrefs = opq->prev->xref;
/* Check if this register has been moved into a temporary in a previous
* operand */
for(k = 0; k < NUM_T_REGS; k++) {
if(opq->reg_t_d[k] == param[j]) {
/* Yes, this register is already in a temporary */
if(or32_opcodes[insn_index].func_unit != it_jump) {
pres_t[k] = 1;
param_t[j] = k;
}
opq->prev->jump_local = 0;
break;
}
delay_insn = 0;
}
if(k != NUM_T_REGS)
continue;
 
/* In the case of an instruction in the delay slot the pc must be updated
* before op_do_sched runs because if it so happens to generate an exception
* it will think that we are still executeing the delay slot instruction
* which infact we have just executed and then SPR_EPCR_BASE will end up
* pointing to the delay slot instruction, which is wrong. If the delay
* slot instruction is an exception instruction (l.trap/l.sys) the exception
* must appear to have been generated in the delay slot */
if(delay_insn && (or32_opcodes[insn_index].func_unit != it_exception)) {
if(xref || (delay_insn_type == it_branch))
gen_op_set_pc_preemt_check(opq, 1);
else /* delay_insn_tyte == it_jump */
gen_op_set_pc_preemt(opq, 1);
/* Move temporaries to their permanent storage */
ship_gprs_out_t(opq, 1, reg_t);
if(param_t[j] != T_NONE)
continue;
 
/* Search for an unused temporary */
k = find_unused_t(pres_t, opq->reg_t_d);
if(opq->reg_t_d[k] < 32) {
/* FIXME: Only ship the temporary out if it has been used as a destination
* register */
gen_op_move_gpr_t[k][opq->reg_t_d[k]](opq->prev, 1);
opq->reg_t[k] = 32;
opq->reg_t_d[k] = 32;
}
pres_t[k] = 1;
opq->reg_t_d[k] = param[j];
param_t[j] = k;
/* FIXME: Only generate code to move the register into a temporary if it
* is used as a source operand */
gen_op_move_t_gpr[k][opq->reg_t_d[k]](opq, 0);
}
 
/* Same reason as for the above case */
if(or32_opcodes[insn_index].func_unit == it_exception) {
/* FIXME: Do the instruction switch below in a more elegent way */
if(!strcmp(or32_opcodes[insn_index].name, "l.rfe")) {
gen_op_set_rfe_pc(opq, 1);
} else if(!strcmp(or32_opcodes[insn_index].name, "l.sys")) {
gen_op_set_except_pc(opq, 1, EXCEPT_SYSCALL - 4);
} else { /* or32_opcodes[insn_index].name == "l.trap" */
gen_op_set_except_pc(opq, 1, EXCEPT_TRAP - 4);
}
gen_op_set_ts_current(opq, 1);
/* To get the execution log correct for instructions like l.lwz r4,0(r4) the
* effective address needs to be calculated before the instruction is
* simulated */
if(do_stats) {
/* Find any disposition in the instruction */
opd = op_start[insn_index];
for(j = 0; j < param_num; j++, opd++) {
while(!(opd->type & OPTYPE_OP)) opd++;
if(!(opd->type & OPTYPE_DIS))
continue;
 
if(!param[j + 1])
gen_op_store_insn_ea(opq, 1, param[j]);
else
calc_insn_ea_table[param_t[j + 1]](opq, 1, param[j]);
}
}
 
or32_opcodes[insn_index].exec(opq, param_t, param, delay_insn);
 
if(or32_opcodes[insn_index].func_unit != it_exception) {
if(do_stats)
gen_op_analysis(opq, 1, insn_index, insn);
}
 
/* The call to join_mem_cycles() could be put into the individual operations
* that emulate the load/store instructions, but then it would be added to
* the cycle counter before analysis() is called, which is not how the complex
* execution model does it. */
if((or32_opcodes[insn_index].func_unit == it_load) ||
(or32_opcodes[insn_index].func_unit == it_store))
gen_op_join_mem_cycles(opq, 1);
 
/* Delay slot instructions get a special scheduler, thus don't generate it
* here */
if((or32_opcodes[insn_index].func_unit != it_exception) && !delay_insn)
gen_op_do_sched(opq, 1);
}
 
/* If this is an exception instruction then we still need to perform the
* exception */
if(or32_opcodes[insn_index].func_unit == it_exception) {
/* FIXME: Do the instruction switch below in a more elegent way */
if(!strcmp(or32_opcodes[insn_index].name, "l.rfe")) {
gen_op_rfe(opq, 1);
} else if(!strcmp(or32_opcodes[insn_index].name, "l.sys")) {
gen_op_do_except(opq, 1, EXCEPT_SYSCALL);
} else { /* or32_opcodes[insn_index].name == "l.trap" */
gen_op_do_except(opq, 1, EXCEPT_TRAP);
}
}
/* rec_page is a physical address */
void recompile_page(struct dyn_page *dyn)
{
unsigned int j;
struct op_queue *opq = cpu_state.opqs;
oraddr_t rec_addr = dyn->or_page;
oraddr_t rec_page = dyn->or_page;
void **loc;
 
/* FIXME: If the delay slot is cross referenced after we have stuck the jump
* instruction in the operations queue we will genererate temporary->
* register code after the jump, which will be unreachable. This is no
* problem as all temporaries are stored in anticipation for a jump. */
/* FIXME: If the delay slot is cross referenced we should generate the
* conditional jump code as we do below. This will not happen if the delay
* slot is cross referenced after we generate the operations for the jump */
/* FIXME: If the instruction in the delay slot is an exception instruction
* the code that we generate below will be unreachable since the exception
* instruction jumps to the exection vector */
/* Generate code to jump out to the proper location */
if(delay_insn) {
for(j = 0; j < NUM_T_REGS; j++)
reg_t[j] = 32;
/* The start of the next page */
rec_page += PAGE_LEN;
 
if(xref || (delay_insn_type == it_branch)) {
/* If the delay-slot instruction is cross referenced, then we have to
* check env->delay_insn */
if(opq->prev && opq->prev->jump_local) {
gen_op_jmp_imm_check(opq, 1, 0);
opq->prev->jump_local_loc = &opq->ops_param[opq->num_ops_param - 1];
} else {
gen_op_do_jump_check(opq, 1);
}
} else if(delay_insn_type == it_jump) {
gen_op_clear_delay_insn(opq, 1);
if(opq->prev && opq->prev->jump_local) {
/* The 0 will get patched when the page-local jumps get patched */
gen_op_jmp_imm(opq, 1, 0);
/* FIXME: opq->ops_param is realloced with realloc and so we risk a
* reallocation in which the location ends up moveing in memory */
opq->prev->jump_local_loc = &opq->ops_param[opq->num_ops_param - 1];
} else {
gen_op_do_jump(opq, 1);
}
}
delay_insn = 0;
printf("Recompileing page %"PRIxADDR"\n", rec_addr);
fflush(stdout);
 
/* Mark all temporaries as not containing a register */
for(j = 0; j < NUM_T_REGS; j++)
opq->reg_t[j] = 32; /* Out-of-range registers */
 
dyn->delayr = -verify_memoryarea(rec_addr)->delayr;
 
opq->num_ops = 0;
opq->num_ops_param = 0;
 
/* Insert code to check if the first instruction is exeucted in a delay slot*/
gen_op_check_delay_slot(opq, 1, 0);
recompile_insn(opq, rec_addr, 1);
ship_gprs_out_t(opq, 1, opq->reg_t_d);
gen_op_do_sched_delay(opq, 1);
gen_op_clear_delay_insn(opq, 1);
gen_op_do_jump_delay(opq, 1);
gen_op_mark_loc(opq, 1);
 
for(j = 0; j < NUM_T_REGS; j++)
opq->reg_t[j] = 32; /* Out-of-range registers */
 
for(; rec_addr < rec_page; rec_addr += 4, opq = opq->next) {
if(opq->prev) {
opq->num_ops = 0;
opq->num_ops_param = 0;
}
opq->jump_local = -1;
opq->not_jump_loc = -1;
 
/* Set flag for next instruction to be in a delay slot */
if((or32_opcodes[insn_index].func_unit == it_jump) ||
(or32_opcodes[insn_index].func_unit == it_branch)) {
delay_insn = 1;
delay_insn_type = or32_opcodes[insn_index].func_unit;
opq->insn_addr = rec_addr;
 
/* Check if this location is cross referenced */
if(opq->xref) {
/* If the current address is cross-referenced, the temporaries shall be
* in an undefined state, so we must assume that no registers reside in
* them */
/* Ship out the current set of registers from the temporaries */
if(opq->prev)
ship_gprs_out_t(opq->prev, 1, opq->reg_t);
 
for(j = 0; j < NUM_T_REGS; j++)
opq->reg_t[j] = 32;
}
}
 
if(delay_insn) {
dyn->carrys_delay_slot = 1;
/* Quick hack to avoid dereferencing an uninitialised pointer below with
* *opq->jump_local_loc */
if(opq->prev->jump_local == 2) {
/* FIXME: In this case the delay slot instruction won't get executed */
opq->prev->xref->next = cpu_state.inf_xrefs;
cpu_state.inf_xrefs = opq->prev->xref;
}
opq->prev->jump_local = 0;
recompile_insn(opq, rec_addr, 0);
 
/* Store the state of the temporaries */
memcpy(opq->next->reg_t, opq->reg_t_d, sizeof(opq->reg_t));
}
 
dyn->dirty = 0;
 
/* Store the state of the temporaries */
dyn->ts_bound[PAGE_LEN >> 2] = dyn->ts_during[j];
 
/* Ship temporaries out to the corrisponding registers */
ship_gprs_out_t(opq->prev, 1, reg_t);
ship_gprs_out_t(opq->prev, 1, opq->reg_t);
 
opq->num_ops = 0;
opq->num_ops_param = 0;
opq->jump_local = 0;
opq->not_jump_loc = -1;
opq->jump_local = -1;
 
/* Insert code to jump to the next page */
gen_op_set_ts_current(opq, 1);
gen_op_do_jump_pc(opq, 1);
gen_op_do_jump(opq, 1);
 
/* Generate the code */
gen_code(cpu_state.opqs, dyn);
 
/* Patch the x-ref table */
for(xref = dyn->xrefs; xref; xref = xref->next)
xref->dyn_addr = dyn->host_page + (unsigned int)xref->dyn_addr;
/* Fix up the locations */
for(loc = dyn->locs; loc < &dyn->locs[PAGE_LEN / 4]; loc++)
*loc += (unsigned int)dyn->host_page;
 
cpu_state.opqs->ops_param[0] += (unsigned int)dyn->host_page;
 
/* Search for page-local jumps */
for(opq = cpu_state.opqs; opq; opq = opq->next) {
if(opq->jump_local) {
if(opq->jump_local == 2)
/* This cross reference was not patched above so patch it now */
opq->xref->dyn_addr = dyn->host_page + (unsigned int)opq->xref->dyn_addr;
for(opq = cpu_state.opqs, j = 0; j < (PAGE_LEN / 4); opq = opq->next, j++) {
if(opq->jump_local != -1)
opq->ops_param[opq->jump_local] =
(unsigned int)dyn->locs[opq->jump_local_loc >> 2];
 
*opq->jump_local_loc = (unsigned int)opq->xref->dyn_addr;
if(opq->jump_local == 2) {
/* Return the xref to the pool of infinite loop cross references */
opq->xref->next = cpu_state.inf_xrefs;
cpu_state.inf_xrefs = opq->xref;
}
}
if(opq->not_jump_loc != -1)
opq->ops_param[opq->not_jump_loc] = (unsigned int)dyn->locs[j + 1];
 
/* Store the state of the temporaries into dyn->ts_bound */
dyn->ts_bound[j] = 0;
if(opq->reg_t[0] < 32)
dyn->ts_bound[j] = opq->reg_t[0];
if(opq->reg_t[1] < 32)
dyn->ts_bound[j] |= opq->reg_t[1] << 5;
if(opq->reg_t[2] < 32)
dyn->ts_bound[j] |= opq->reg_t[2] << 10;
 
dyn->ts_during[j] = 0;
if(opq->reg_t_d[0] < 32)
dyn->ts_during[j] = opq->reg_t_d[0];
if(opq->reg_t_d[1] < 32)
dyn->ts_during[j] |= opq->reg_t_d[1] << 5;
if(opq->reg_t_d[2] < 32)
dyn->ts_during[j] |= opq->reg_t_d[2] << 10;
}
 
/* Patch the relocations */
1230,58 → 1024,12
/* Since eval_insn is called to get the instruction, runtime.sim.mem_cycles is
* updated but the recompiler expectes it to start a 0, so reset it */
runtime.sim.mem_cycles = 0;
 
#if 0
This is very usefull during debuging
/* Count the number of infinite loop cross references (to make sure that we
* returned them all) */
for(j = 0, xref = cpu_state.inf_xrefs; xref; xref = xref->next) {
printf("Cross reference to %"PRIxADDR" is here\n", xref->or_addr);
j++;
}
 
if(j != (PAGE_LEN / 4)) {
fprintf(stderr, "Infinite loop cross references are leaked!\n");
fprintf(stderr, "Number in free list now: %i, meant to be: %i\n", j, PAGE_LEN / 4);
exit(1);
}
#endif
 
}
 
struct x_ref *add_to_xrefs(struct dyn_page *dp, oraddr_t addr)
{
struct x_ref *new;
struct x_ref *cur;
struct x_ref *prev;
 
new = malloc(sizeof(struct x_ref));
 
new->ref = 1;
new->or_addr = addr;
 
/* Find the location to insert the address */
for(cur = dp->xrefs, prev = NULL; cur; prev = cur, cur = cur->next) {
if(cur->or_addr > addr)
break;
}
 
if(prev)
prev->next = new;
else
dp->xrefs = new;
new->next = cur;
 
return new;
}
 
/* Returns non-zero if the jump is into this page, 0 otherwise */
static int find_jump_loc(oraddr_t j_ea, struct op_queue *opq)
{
struct dyn_page *dp;
int i;
struct x_ref *xref = NULL;
int *ops;
 
/* Mark the jump as non page local if the delay slot instruction is on the
* next page to the jump instruction. This should not be needed */
1294,37 → 1042,26
 
/* The jump is into the page currently undergoing dynamic recompilation */
 
/* FIXME: It would be great if we didn't have to do this (find_dynd...) (it is
* already passed to recompile_page) */
dp = find_dynd_page(j_ea);
 
/* Check if we have already x-refed this location */
if((xref = find_host_x_ref(dp->xrefs, j_ea))) {
/* If we have already x-refed this location, don't x-ref it again */
if(!find_held_x_ref(dp->held_xrefs, j_ea)) {
xref->ref++;
add_to_held_xrefs(dp, xref);
}
} else {
/* Stick this address into the page's x-ref table */
xref = add_to_xrefs(dp, j_ea);
add_to_held_xrefs(dp, xref);
}
 
opq->xref = xref;
 
/* If we haven't got to the location of the jump, everything is ok */
if(j_ea > opq->insn_addr)
if(j_ea > opq->insn_addr) {
/* Find the corissponding opq and mark it as cross referenced */
for(i = (j_ea - opq->insn_addr) / 4; i; i--)
opq = opq->next;
opq->xref = 1;
return 1;
}
 
/* Insert temporary -> register code before the jump ea and register ->
* temporary at the x-ref address */
while(opq->insn_addr > j_ea) opq = opq->prev;
for(i = (opq->insn_addr - j_ea) / 4; i; i--)
opq = opq->prev;
 
if(!opq->prev)
/* We're at the begining of a page, no need to do anything */
return 1;
 
/* Found location, insert code */
 
ship_gprs_out_t(opq->prev, 1, opq->reg_t);
 
for(i = 0; i < NUM_T_REGS; i++) {
1334,21 → 1071,124
}
}
 
/* In the event of a page local jump that jumps backwards (l.j -4) the cross
* reference to the target may not have existed when the jump-ed to adress was
* recompiled and if the jump-ed to address is in the delay slot of another
* jump instruction an op_jmp_imm_check operation must be generated and not an
* op_jmp_imm operation */
for(ops = opq->ops, i = 0; i < opq->num_ops; i++, ops++) {
if(*ops == op_jmp_imm_indx)
*ops = op_jmp_imm_check_indx;
else if(*ops == op_set_pc_preemt_indx)
*ops = op_set_pc_preemt_check_indx;
}
opq->xref = 1;
 
return 1;
}
 
static void gen_j_imm(struct op_queue *opq, oraddr_t off)
{
int jump_local;
int i;
int reg_t[NUM_T_REGS];
 
off <<= 2;
 
jump_local = find_jump_loc(opq->insn_addr + off, opq);
 
if(ADDR_PAGE(opq->insn_addr) != ADDR_PAGE(opq->insn_addr + 4)) {
gen_op_set_pc_delay_imm(opq, 1, off);
gen_op_do_sched(opq, 1);
return;
}
 
gen_op_set_delay_insn(opq, 1);
gen_op_do_sched(opq, 1);
 
/* Recompileing the delay slot instruction must see the temoraries being in
* the state after the jump/branch instruction not before */
memcpy(reg_t, opq->reg_t, sizeof(reg_t));
memcpy(opq->reg_t, opq->reg_t_d, sizeof(reg_t));
 
/* Generate the delay slot instruction */
recompile_insn(opq, opq->insn_addr + 4, 1);
 
memcpy(opq->reg_t, reg_t, sizeof(reg_t));
 
ship_gprs_out_t(opq, 1, opq->reg_t_d);
 
gen_op_add_pc(opq, 1, (orreg_t)off - 8);
gen_op_clear_delay_insn(opq, 1);
gen_op_do_sched_delay(opq, 1);
 
if(jump_local) {
gen_op_jmp_imm(opq, 1, 0);
opq->jump_local = opq->num_ops_param - 1;
opq->jump_local_loc = (opq->insn_addr + (orreg_t)off) & (PAGE_LEN - 1);
} else
gen_op_do_jump(opq, 1);
}
 
static const generic_gen_op set_pc_delay_gpr[32] = {
NULL,
gen_op_move_gpr1_pc_delay,
gen_op_move_gpr2_pc_delay,
gen_op_move_gpr3_pc_delay,
gen_op_move_gpr4_pc_delay,
gen_op_move_gpr5_pc_delay,
gen_op_move_gpr6_pc_delay,
gen_op_move_gpr7_pc_delay,
gen_op_move_gpr8_pc_delay,
gen_op_move_gpr9_pc_delay,
gen_op_move_gpr10_pc_delay,
gen_op_move_gpr11_pc_delay,
gen_op_move_gpr12_pc_delay,
gen_op_move_gpr13_pc_delay,
gen_op_move_gpr14_pc_delay,
gen_op_move_gpr15_pc_delay,
gen_op_move_gpr16_pc_delay,
gen_op_move_gpr17_pc_delay,
gen_op_move_gpr18_pc_delay,
gen_op_move_gpr19_pc_delay,
gen_op_move_gpr20_pc_delay,
gen_op_move_gpr21_pc_delay,
gen_op_move_gpr22_pc_delay,
gen_op_move_gpr23_pc_delay,
gen_op_move_gpr24_pc_delay,
gen_op_move_gpr25_pc_delay,
gen_op_move_gpr26_pc_delay,
gen_op_move_gpr27_pc_delay,
gen_op_move_gpr28_pc_delay,
gen_op_move_gpr29_pc_delay,
gen_op_move_gpr30_pc_delay,
gen_op_move_gpr31_pc_delay };
 
static void gen_j_reg(struct op_queue *opq, unsigned int gpr, int insn_index,
uint32_t insn)
{
int i;
int reg_t[NUM_T_REGS];
 
if(do_stats)
gen_op_analysis(opq, 1, insn_index, insn);
 
if(!gpr)
gen_op_clear_pc_delay(opq, 1);
else
set_pc_delay_gpr[gpr](opq, 1);
 
gen_op_do_sched(opq, 1);
 
/* Recompileing the delay slot instruction must see the temoraries being in
* the state after the jump/branch instruction not before */
memcpy(reg_t, opq->reg_t, sizeof(reg_t));
memcpy(opq->reg_t, opq->reg_t_d, sizeof(reg_t));
 
/* Generate the delay slot instruction */
gen_op_set_delay_insn(opq, 1);
recompile_insn(opq, opq->insn_addr + 4, 1);
 
memcpy(opq->reg_t, reg_t, sizeof(reg_t));
 
ship_gprs_out_t(opq, 1, opq->reg_t_d);
 
gen_op_set_pc_pc_delay(opq, 1);
gen_op_clear_delay_insn(opq, 1);
gen_op_do_sched_delay(opq, 1);
 
gen_op_do_jump_delay(opq, 1);
}
 
/*------------------------------[ Operation generation for an instruction ]---*/
/* FIXME: Flag setting is not done in any instruction */
/* FIXME: Since r0 is not moved into a temporary, check all arguments below! */
1493,15 → 1333,64
void gen_l_bf(struct op_queue *opq, int param_t[3], orreg_t param[3],
int delay_slot)
{
opq->jump_local = find_jump_loc(opq->insn_addr + (orreg_t)(param[0] << 2), opq);
gen_op_check_flag(opq, 1, param[0] << 2);
int i;
if(do_stats)
gen_op_analysis(opq, 1, 3, 0x10000000 | (param[0] & 0x03ffffff));
 
/* The temporaries are expected to be shiped out after the execution of the
* branch instruction wether it branched or not */
if(opq->prev) {
ship_gprs_out_t(opq->prev, 1, opq->reg_t);
for(i = 0; i < NUM_T_REGS; i++) {
opq->reg_t[i] = 32;
opq->reg_t_d[i] = 32;
}
}
 
if(ADDR_PAGE(opq->insn_addr) != ADDR_PAGE(opq->insn_addr + 4)) {
gen_op_check_flag_delay(opq, 1, param[0] << 2);
gen_op_do_sched(opq, 1);
opq->not_jump_loc = -1;
return;
}
gen_op_check_flag(opq, 1, 0);
opq->not_jump_loc = opq->num_ops_param - 1;
 
gen_j_imm(opq, param[0]);
}
 
void gen_l_bnf(struct op_queue *opq, int param_t[3], orreg_t param[3],
int delay_slot)
{
opq->jump_local = find_jump_loc(opq->insn_addr + (orreg_t)(param[0] << 2), opq);
gen_op_check_not_flag(opq, 1, param[0] << 2);
int i;
if(do_stats)
gen_op_analysis(opq, 1, 2, 0x0c000000 | (param[0] & 0x03ffffff));
 
/* The temporaries are expected to be shiped out after the execution of the
* branch instruction wether it branched or not */
if(opq->prev) {
ship_gprs_out_t(opq->prev, 1, opq->reg_t);
for(i = 0; i < NUM_T_REGS; i++) {
opq->reg_t[i] = 32;
opq->reg_t_d[i] = 32;
}
}
 
if(ADDR_PAGE(opq->insn_addr) != ADDR_PAGE(opq->insn_addr + 4)) {
gen_op_check_not_flag_delay(opq, 1, param[0] << 2);
gen_op_do_sched(opq, 1);
opq->not_jump_loc = -1;
return;
}
 
gen_op_check_not_flag(opq, 1, 0);
opq->not_jump_loc = opq->num_ops_param - 1;
 
gen_j_imm(opq, param[0]);
 
/* The temporaries don't get shiped out if the branch is not taken */
memcpy(opq->next->reg_t, opq->reg_t, sizeof(opq->reg_t));
}
 
static const generic_gen_op l_cmov_t_table[NUM_T_REGS][NUM_T_REGS][NUM_T_REGS] = {
1609,8 → 1498,6
void gen_l_div(struct op_queue *opq, int param_t[3], orreg_t param[3],
int delay_slot)
{
/* Cross reference this location, since an ILLEGAL exception may happen */
find_jump_loc(opq->insn_addr, opq);
if(!param[2]) {
/* There is no option. This _will_ cause an illeagal exception */
if(!delay_slot)
1654,8 → 1541,6
void gen_l_divu(struct op_queue *opq, int param_t[3], orreg_t param[3],
int delay_slot)
{
/* Cross reference this location, since an ILLEGAL exception may happen */
find_jump_loc(opq->insn_addr, opq);
if(!param[2]) {
/* There is no option. This _will_ cause an illeagal exception */
if(!delay_slot)
1816,62 → 1701,37
void gen_l_j(struct op_queue *opq, int param_t[3], orreg_t param[3],
int delay_slot)
{
gen_op_set_pc_delay_imm(opq, 1, param[0] << 2);
if(do_stats)
gen_op_analysis(opq, 1, 0, param[0] & 0x03ffffff);
 
/* Don't allocate a seporate x-ref structure for the infinite loop instruction
* (l.j 0) */
if(!param[0]) {
opq->jump_local = 2;
opq->xref = cpu_state.inf_xrefs;
opq->xref->or_addr = opq->insn_addr;
cpu_state.inf_xrefs = opq->xref->next;
return;
}
 
opq->jump_local = find_jump_loc(opq->insn_addr + (orreg_t)(param[0] << 2), opq);
gen_j_imm(opq, param[0]);
}
 
void gen_l_jal(struct op_queue *opq, int param_t[3], orreg_t param[3],
int delay_slot)
{
/* It is highly likely that the location that was jumped to will `return'.
* Therefore, insert a cross reference at that address */
find_jump_loc(opq->insn_addr + 8, opq);
/* Store the return address */
gen_op_store_link_addr_gpr(opq, 1);
 
gen_l_j(opq, param_t, param, delay_slot);
if(do_stats)
gen_op_analysis(opq, 1, 1, 0x04000000 | (param[0] & 0x03ffffff));
 
/* Store the return address */
gen_op_store_link_addr_gpr(opq, 1);
gen_j_imm(opq, param[0]);
}
 
static const generic_gen_op set_pc_delay_t[NUM_T_REGS] =
{ gen_op_set_pc_delay_t0, gen_op_set_pc_delay_t1, gen_op_set_pc_delay_t2 };
 
void gen_l_jr(struct op_queue *opq, int param_t[3], orreg_t param[3],
int delay_slot)
{
/* Treat all jumps as non page-local */
opq->jump_local = 0;
 
if(!param[0]) {
gen_op_clear_pc_delay(opq, 1);
return;
}
 
set_pc_delay_t[param_t[0]](opq, 1);
gen_j_reg(opq, param[0], 104, 0x14000000 | (param[0] << 11));
}
 
void gen_l_jalr(struct op_queue *opq, int param_t[3], orreg_t param[3],
int delay_slot)
{
/* It is highly likely that the location that was jumped to will `return'.
* Therefore, insert a cross reference at that address */
find_jump_loc(opq->insn_addr + 8, opq);
 
gen_l_jr(opq, param_t, param, delay_slot);
 
/* Store the return address */
gen_op_store_link_addr_gpr(opq, 1);
 
gen_j_reg(opq, param[0], 105, 0x18000000 | (param[0] << 11));
}
 
/* FIXME: Optimise all load instruction when the disposition == 0 */
2295,7 → 2155,12
void gen_l_rfe(struct op_queue *opq, int param_t[3], orreg_t param[3],
int delay_slot)
{
if(do_stats)
gen_op_analysis(opq, 1, 12, 0x24000000);
 
gen_op_prep_rfe(opq, 1);
gen_op_do_sched(opq, 1);
gen_op_do_jump(opq, 1);
}
 
/* FIXME: All store instructions should be optimised when the disposition = 0 */
2991,14 → 2856,16
void gen_l_sys(struct op_queue *opq, int param_t[3], orreg_t param[3],
int delay_slot)
{
/* Since we *know* that we *will* jump to the next instruction, insert an xref
* there */
find_jump_loc(opq->insn_addr + 4, opq);
if(do_stats)
gen_op_analysis(opq, 1, 7, 0x20000000 | param[0]);
 
if(!delay_slot)
gen_op_prep_sys(opq, 1);
else
gen_op_prep_sys_delay(opq, 1);
 
gen_op_do_sched(opq, 1);
gen_op_do_jump(opq, 1);
}
 
/* FIXME: This will not work if the l.trap is in a delay slot */
3005,9 → 2872,8
void gen_l_trap(struct op_queue *opq, int param_t[3], orreg_t param[3],
int delay_slot)
{
/* Since we *know* that we *will* jump to the next instruction, insert an xref
* there */
find_jump_loc(opq->insn_addr + 4, opq);
if(do_stats)
gen_op_analysis(opq, 1, 8, 0x22000000);
 
if(!delay_slot)
gen_op_prep_trap(opq, 1);
3070,9 → 2936,6
void gen_l_invalid(struct op_queue *opq, int param_t[3], orreg_t param[3],
int delay_slot)
{
/* The program running on openrisc may decide to patch this location, so
* just cross reference this location just-in-case */
find_jump_loc(opq->insn_addr, opq);
if(!delay_slot)
gen_op_illegal(opq, 1);
else
/trunk/or1ksim/cpu/or32/dyn_rec.h
21,48 → 21,38
#ifndef DYN_REC_H
#define DYN_REC_H
 
struct x_ref {
void *dyn_addr; /* Recompiled address */
oraddr_t or_addr; /* The or address of the x-ref (physical) */
unsigned int ref; /* How many times the x-ref is referenced */
struct x_ref *next;
};
 
/* Each dynamically recompiled page has one of these */
struct dyn_page {
oraddr_t or_page;
void *host_page;
unsigned int host_len;
int carrys_delay_slot; /* Is the delay-slot of the last insn on the next page?*/
int dirty; /* Is recompiled page invalid? */
struct x_ref *xrefs; /* what's referenced in this page */
struct x_ref **held_xrefs; /* The xrefs that this page holds */
int delayr; /* delayr of memory backing this page */
uint16_t ts[4096]; /* What registers the temporaries back */
uint16_t ts_during[2048]; /* What registers the temporaries back (during the
* instruction) */
uint16_t ts_bound[2049]; /* What registers the temporaries back (on the
* begining boundry of the instruction) */
void **locs; /* Openrisc locations in the recompiled code */
struct dyn_page *next;
};
 
struct dyn_page *find_dynd_page(oraddr_t addr);
struct x_ref *find_host_x_ref(struct x_ref *x_refs, oraddr_t addr);
struct x_ref *add_to_xrefs(struct dyn_page *dp, oraddr_t addr);
void recompile_page(struct dyn_page *dyn);
struct dyn_page *new_dp(oraddr_t page);
void add_to_held_xrefs(struct dyn_page *dp, struct x_ref *xref);
void add_to_opq(struct op_queue *opq, int end, int op);
void add_to_op_params(struct op_queue *opq, int end, unsigned long param);
void *enough_host_page(struct dyn_page *dp, void *cur, unsigned int *len,
unsigned int amount);
void dirtyfy_page(struct dyn_page *dp);
struct x_ref *find_held_x_ref(struct x_ref **held_xrefs, oraddr_t or_addr);
void init_dyn_recomp(void);
void jump_dyn_code(oraddr_t addr);
void dump_xrefs(struct dyn_page *dp, FILE *f);
void run_sched_out_of_line(int add_normal);
void recheck_immu(int got_en_dis);
void enter_dyn_code(oraddr_t addr, struct dyn_page *dp);
 
extern void *rec_stack_base;
 
#define IMMU_GOT_ENABLED 1
#define IMMU_GOT_DISABLED 2
 
#define NUM_RFE_HELD 100
 
#endif
/trunk/or1ksim/cpu/or32/op_t_reg_mov_op.h
47,3 → 47,10
{
env->reg[REG] = t2;
}
 
__or_dynop void glue(glue(op_move_gpr, REG), _pc_delay)(void)
{
env->pc_delay = env->reg[REG];
env->delay_insn = 1;
}
 
/trunk/or1ksim/cpu/or32/dyngen.c
47,6 → 47,8
static const char *gen_code_proto =
"void gen_code(struct op_queue *opq, struct dyn_page *dp);\n"
"void patch_relocs(struct op_queue *opq, void *host_page);\n"
"\n"
"#define op_mark_loc_indx 0\n"
"\n";
 
static const char *c_sw_file_head =
71,21 → 73,17
" unsigned int host_len = dp->host_len;\n"
" void *host_cur = dp->host_page;\n"
" oraddr_t pc = dp->or_page;\n"
" struct x_ref *next_x_ref = dp->xrefs;\n"
" void **loc = dp->locs;\n"
"\n"
" while(opq) {\n"
" /* For now, only store offsets in the x-ref table */\n"
" if(next_x_ref && (next_x_ref->or_addr == pc)) {\n"
" next_x_ref->dyn_addr = (void *)(host_cur - dp->host_page);\n"
" next_x_ref = next_x_ref->next;\n"
" }\n"
" if(opq->next)\n"
" *loc++ = (void *)(host_cur - dp->host_page);"
"\n"
" /* Patch the dyn_addr of the xrefs for infinite loops */\n"
" if(opq->jump_local == 2)\n"
" opq->xref->dyn_addr = (void *)(host_cur - dp->host_page);\n"
"\n"
" for(i = 0, ops = opq->ops; i < opq->num_ops; i++, ops++) {\n"
" switch(*ops) {\n";
" switch(*ops) {\n"
" case op_mark_loc_indx:\n"
" opq->ops_param[0] = host_cur - dp->host_page;\n"
" break;\n";
 
static const char *c_sw_file_tail =
" }\n"
119,8 → 117,8
"#include \"%s\"\n"
"\n"
"void do_scheduler(void); /* FIXME: Remove */\n"
"void analysis(struct iqueue_entry *current); /* FIXME: Remove */\n"
"void do_sched_wrap(void); /* FIXME: Remove */\n"
"void do_sched_wrap_delay(void); /* FIXME: Remove */\n"
"void simprintf(oraddr_t stackaddr, unsigned long regparam); /* FIXME: Remove */\n"
"\n"
"void patch_relocs(struct op_queue *opq, void *host_page)\n"
/trunk/or1ksim/cpu/or32/rec_i386.h
20,28 → 20,6
 
#include "common_i386.h"
 
extern void *rec_stack_base;
 
/* Sets the stack to a specified value */
static inline void or_longjmp(void *loc) __attribute__((noreturn));
static inline void or_longjmp(void *loc)
{
/* We push a trampoline address (dyn_ret_stack_prot) onto the stack to be able
* to detect if any ret instructions found their way into an operation. */
asm("\tmovl %0, %%eax\n"
"\tmovl %1, %%esp\n"
"\tmovl $%2, %%ebp\n"
"\tpush $dyn_ret_stack_prot\n"
"\tpush $dyn_ret_stack_prot\n"
"\tpush $dyn_ret_stack_prot\n"
"\tjmp *%%eax\n"
:
: "m" (loc),
"m" (rec_stack_base),
"m" (cpu_state));
}
 
 
/* Initialises the recompiler (architechture specific). */
static inline void init_dyn_rec(void)
{
/trunk/or1ksim/cpu/or32/op_i386.h
25,15 → 25,12
 
/* Handles the scheduler and PC updateing. Yes, useing MMX is a requirement. It
* just won't change. This must be as compact as possible */
static inline void handle_sched(void)
{
asm("paddd %%mm1, %%mm0\n"
"\tmovd %%mm0, %%eax\n"
"\ttestl %%eax, %%eax\n"
"\tjg .no_need_run_sched\n"
"\tcall do_sched_wrap\n"
"\t.no_need_run_sched:" : : );
}
#define HANDLE_SCHED(func, jmp) asm("paddd %%mm1, %%mm0\n" \
"\tmovd %%mm0, %%eax\n" \
"\ttestl %%eax, %%eax\n" \
"\tjg ." jmp "\n" \
"\tcall "#func"\n" \
"\t." jmp ":" : : )
 
static inline int32_t do_cycles(void)
{
55,3 → 52,23
runtime.sim.mem_cycles = 0;
}
 
static inline void or_longjmp(void *loc) __attribute__((noreturn));
static inline void or_longjmp(void *loc)
{
/* We push a trampoline address (dyn_ret_stack_prot) onto the stack to be able
* to detect if any ret instructions found their way into an operation. */
asm("\tmovl %0, %%eax\n"
"\tmovl %1, %%esp\n"
"\tmovl $%2, %%ebp\n"
"\tpush $dyn_ret_stack_prot\n"
"\tpush $dyn_ret_stack_prot\n"
"\tpush $dyn_ret_stack_prot\n"
"\tjmp *%%eax\n"
:
: "m" (loc),
"m" (rec_stack_base),
"m" (cpu_state));
}
 
 
 
/trunk/or1ksim/cpu/common/execute.h
58,14 → 58,6
/* Micro operation queue. Only used to speed up recompile_page */
struct op_queue *opqs;
 
/* Cross references that reference the jumps from the infinite loop
* instruction (l.j 0) */
struct x_ref *inf_xrefs;
 
/* rfe cross-reference cache */
struct x_ref **rfe_held_xrefs;
unsigned int rfe_held_xref_pos;
 
/* Set if all temporaries are stored */
int ts_current;
 
/trunk/or1ksim/cpu/or1k/except.c
138,9 → 138,9
#if DYNAMIC_EXECUTION
/* In immu_translate except_handle is called with except_handle(..., virtaddr) */
/* Add the immu miss delay to the cycle counter */
if(!immu_ex_from_insn)
if(!immu_ex_from_insn) {
mtspr(SPR_EPCR_BASE, get_pc() - (cpu_state.delay_insn ? 4 : 0));
else
} else
/* This exception came from an l.mtspr instruction in which case the pc
* points to the l.mtspr instruction when in acutal fact, it is the next
* instruction that would have faulted/missed. ea is used instead of
148,9 → 148,12
* in the delay slot of a page local jump the fault must happen on the
* instruction that was jumped to. This is handled in recheck_immu. */
mtspr(SPR_EPCR_BASE, ea);
run_sched_out_of_line(immu_ex_from_insn);
immu_ex_from_insn = 0;
break;
run_sched_out_of_line(immu_ex_from_insn);
/* Save the registers that are in the temporaries */
if(!cpu_state.ts_current)
upd_reg_from_t(cpu_state.pc, !immu_ex_from_insn);
immu_ex_from_insn = 0;
break;
#endif
/* All these exceptions happen during a simulated instruction */
case EXCEPT_BUSERR:
165,6 → 168,14
* function jumps out to the exception vector the scheduler would never have
* a chance to run, therefore run it now */
run_sched_out_of_line(1);
/* Save the registers that are in the temporaries */
if(!cpu_state.ts_current) {
if(cpu_state.delay_insn &&
(ADDR_PAGE(cpu_state.pc) == ADDR_PAGE(cpu_state.pc - 4)))
upd_reg_from_t(cpu_state.pc - 4, 0);
else
upd_reg_from_t(cpu_state.pc, 0);
}
#endif
mtspr(SPR_EPCR_BASE, cpu_state.pc - (cpu_state.delay_insn ? 4 : 0));
break;
188,6 → 199,10
* of the last instruction executed and not the next one, to which the pc
* now points to */
cpu_state.pc -= 4;
 
/* Save the registers that are in the temporaries */
if(!cpu_state.ts_current)
upd_reg_from_t(cpu_state.pc, 1);
#endif
break;
}
199,16 → 214,12
 
/* Complex/simple execution strictly don't need this because of the
* next_delay_insn thingy but in the dynamic execution modell that doesn't
* exist and thus cpu_state.insn_delay would stick in the exception handler
* exist and thus cpu_state.delay_insn would stick in the exception handler
* causeing grief if the first instruction of the exception handler is also in
* the delay slot of the previous instruction */
cpu_state.delay_insn = 0;
 
#if DYNAMIC_EXECUTION
/* Save the registers that are in the temporaries */
if(!cpu_state.ts_current)
upd_reg_from_t(cpu_state.pc);
 
cpu_state.pc = except_vector;
cpu_state.ts_current = 0;
jump_dyn_code(except_vector);

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.