URL
https://opencores.org/ocsvn/or1k/or1k/trunk
Subversion Repositories or1k
Compare Revisions
- This comparison shows the changes necessary to convert path
/
- from Rev 1480 to Rev 1481
- ↔ Reverse comparison
Rev 1480 → Rev 1481
/trunk/or1ksim/cpu/or32/dyn_rec_stubs.c
153,6 → 153,7
recompile_page(dp); |
|
fprintf(stderr, "Recompiled page length: %i\n", dp->host_len); |
fprintf(stderr, "Recompiled to: %p\n", dp->host_page); |
fprintf(stderr, "Dumping reced page to disk...\n"); |
|
f = fopen(argv[2], "w"); |
170,8 → 171,10
printf("--- Recompiled or disassembly end ---\n"); |
*/ |
|
dump_xrefs(dp, stdout); |
|
printf("--- Recompiled offsets ---\n"); |
for(i = 0; i < (PAGE_LEN / 4); i++) |
printf("%"PRIxADDR": %x\n", i * 4, dp->locs[i] - dp->host_page); |
printf("--- Recompiled offsets end ---\n"); |
destruct_automata(); |
|
return 0; |
252,9 → 255,13
return NULL; |
} |
|
static struct dev_memarea dummy_area = { |
delayr: 1, |
}; |
|
struct dev_memarea *verify_memoryarea(oraddr_t addr) |
{ |
return NULL; |
return &dummy_area; |
} |
|
void sim_done (void) |
/trunk/or1ksim/cpu/or32/dyn32_defs.h
25,11 → 25,13
unsigned int num_ops_param; |
unsigned int ops_param_len; |
unsigned int *ops_param; |
int jump_local; /* Is this instruction a page-local jump */ |
unsigned int *jump_local_loc; /* Points to the parameter that holds the location of the jump */ |
struct x_ref *xref; /* Cross-reference of the page-local jump */ |
unsigned int jump_local; /* Parameter index that holds the location of the jump */ |
oraddr_t jump_local_loc; /* Location to jump to (relative to start of page */ |
unsigned int not_jump_loc; /* Location to jump if not jumping (l.bf/l.bnf) */ |
int xref; /* Is this location cross referenced? */ |
oraddr_t insn_addr; /* Physical address of the instruction */ |
unsigned int reg_t[3]; /* Which registers are in the temporaries? */ |
unsigned int reg_t[3]; /* Which registers are in the temporaries (before the instruction)? */ |
unsigned int reg_t_d[3]; /* Which registers are in the temporaries (after the instruction? */ |
struct op_queue *prev; |
struct op_queue *next; |
}; |
/trunk/or1ksim/cpu/or32/op_support.c
45,14 → 45,19
#include "rec_i386.h" |
|
/* Stuff that is really a `micro' operation but is rather big (or for some other |
* reason (like calling exit()) */ |
* reason like calling exit()) */ |
|
void upd_reg_from_t(oraddr_t pc) |
void upd_reg_from_t(oraddr_t pc, int bound) |
{ |
int reg; |
|
reg = cpu_state.curr_page->ts[(pc & (PAGE_SIZE - 1)) / 2]; |
pc = ((pc & (PAGE_SIZE - 1)) / 4); |
|
if(bound) { |
reg = cpu_state.curr_page->ts_bound[pc + 1]; |
} else |
reg = cpu_state.curr_page->ts_during[pc]; |
|
if(reg & 0x1f) |
cpu_state.reg[reg & 0x1f] = cpu_state.t0; |
|
65,8 → 70,8
|
void op_support_nop_exit(void) |
{ |
upd_reg_from_t(get_pc()); |
PRINTF("exit(%"PRIdREG")\n", cpu_state.reg[3]); |
upd_reg_from_t(get_pc(), 0); |
PRINTF("exit(%"PRIxREG")\n", cpu_state.reg[3]); |
fprintf(stderr, "@reset : cycles %lld, insn #%lld\n", |
runtime.sim.reset_cycles, runtime.cpu.reset_instructions); |
fprintf(stderr, "@exit : cycles %lld, insn #%lld\n", runtime.sim.cycles, |
77,8 → 82,10
/* FIXME: Implement emulation of a stalled cpu |
if (config.debug.gdb_enabled) |
set_stall_state (1); |
else |
runtime.sim.cont_run = 0; |
else { |
handle_sim_command(); |
sim_done(); |
} |
*/ |
exit(0); |
} |
94,19 → 101,19
|
void op_support_nop_printf(void) |
{ |
upd_reg_from_t(get_pc()); |
upd_reg_from_t(get_pc(), 0); |
simprintf(cpu_state.reg[4], cpu_state.reg[3]); |
} |
|
void op_support_nop_report(void) |
{ |
upd_reg_from_t(get_pc()); |
upd_reg_from_t(get_pc(), 0); |
PRINTF("report(0x%"PRIxREG");\n", cpu_state.reg[3]); |
} |
|
void op_support_nop_report_imm(int imm) |
{ |
upd_reg_from_t(get_pc()); |
upd_reg_from_t(get_pc(), 0); |
PRINTF("report %i (0x%"PRIxREG");\n", imm, cpu_state.reg[3]); |
} |
|
118,9 → 125,11
void do_jump(oraddr_t addr) |
{ |
struct dyn_page *target_dp; |
struct x_ref *xref; |
oraddr_t phys_page; |
|
/* Temporaries are always shipped out */ |
cpu_state.ts_current = 1; |
|
/* The pc is set to the location of the jump in op_set_pc_preemt(_check) and |
* then it is incermented by 4 when the scheduler is run. If a scheduled job |
* so happens to raise an exception cpu_state.delay_insn will still be set and |
156,21 → 165,6
if(phys_page < 0x100) |
target_dp->dirty = 1; |
|
/* Check if this location is cross-referenced */ |
if(!(xref = find_host_x_ref(target_dp->xrefs, phys_page))) { |
target_dp->dirty = 1; |
xref = add_to_xrefs(target_dp, phys_page); |
if(cpu_state.curr_page) |
add_to_held_xrefs(cpu_state.curr_page, xref); |
} else { |
/* Only increment reference count if this page didn't already */ |
if(cpu_state.curr_page && !find_held_x_ref(cpu_state.curr_page->held_xrefs, |
phys_page)) { |
xref->ref++; |
add_to_held_xrefs(cpu_state.curr_page, xref); |
} |
} |
|
if(target_dp->dirty) |
recompile_page(target_dp); |
|
186,160 → 180,28
|
cpu_state.ts_current = 0; |
|
/* Initially this (and do_rfe/handle_except) returned the address that we |
* should jump to and then the recompiled code performed the jump. This was |
* no problem if the jump was trully an interpage jump or if the location |
* didn't need recompileation. If the jump is page local and the page needs |
* recompileation there is a very high probability that the page will move in |
* memory and then the return address that is on the stack will point to |
* memory that has already been freed, sometimes leading to crashes */ |
/* Initially this returned the address that we should jump to and then the |
* recompiled code performed the jump. This was no problem if the jump was |
* trully an interpage jump or if the location didn't need recompileation. If |
* the jump is page local and the page needs recompileation there is a very |
* high probability that the page will move in memory and then the return |
* address that is on the stack will point to memory that has already been |
* freed, sometimes leading to crashes */ |
/* This looks like it could really be simpler, but no it can't. The only |
* issue here is the stack: it has to be unwound. This function is called |
* from except_handle, which generally ends up quite high on the stack... */ |
or_longjmp(xref->dyn_addr); |
enter_dyn_code(phys_page, target_dp); |
} |
|
/* l.rfe is a hard instruction to emulate. One could just call |
* do_jump(cpu_state.sprs[SPR_EPCR_BASE]), but then the location that we jump to |
* will get cross referenced and because the page that contains the exception |
* handlers is very rearly marked as dirty it will accumulate alot of held |
* cross references over time. */ |
void do_rfe(void) |
/* Wrapper around analysis() that contains all the recompiler specific stuff */ |
void op_support_analysis(void) |
{ |
struct dyn_page *target_dp; |
struct x_ref *xref; |
oraddr_t phys_page; |
int already_held = 0; |
|
set_pc(cpu_state.sprs[SPR_EPCR_BASE]); |
|
phys_page = immu_translate(cpu_state.sprs[SPR_EPCR_BASE]); |
|
/* Same reason as in do_jump() */ |
runtime.sim.mem_cycles = 0; |
|
/* op_do_sched has run by the time this is run, which makes the pc point to |
* the instruction after l.rfe. */ |
printf("Returning from exception to %"PRIxADDR" from %"PRIxADDR"\n", |
phys_page, cpu_state.sprs[SPR_PPC]); |
|
target_dp = find_dynd_page(phys_page); |
|
if(!target_dp) |
target_dp = new_dp(phys_page); |
|
/* Since writes to the 0x0-0xff range do not dirtyfy a page recompile the 0x0 |
* page if the jump is to that location */ |
if(phys_page < 0x100) |
target_dp->dirty = 1; |
|
/* Check if this location is cross-referenced */ |
if(!(xref = find_host_x_ref(target_dp->xrefs, phys_page))) { |
xref = add_to_xrefs(target_dp, phys_page); |
/* Calling dirtyfy_page is real tempting but if we get to the situation were |
* the l.rfe instruction and the location to which it returns to are on the |
* same page then all the exception cross references will get removed and |
* this will result in excessive recompileations of this page */ |
target_dp->dirty = 1; |
|
/* There is alot of code (especially in linux) that do loops like this: |
* int a; |
* // Stuff such that b gets on another page than a |
* int b; |
* for(i = 0; i < (some big value); i++) { |
* a = b; |
* // Some more stuff |
* } |
* Here a DTLB miss will happen on every acess to a and b and l.rfe will |
* always return to the same locations but since the previous l.rfe to this |
* page was to a different location the page will get recompiled each time a |
* or b is acessed. This is why the last NUM_RFE_HELD returns are `cached'. |
*/ |
if(++cpu_state.rfe_held_xref_pos == NUM_RFE_HELD) |
cpu_state.rfe_held_xref_pos = 0; |
|
if(cpu_state.rfe_held_xrefs[cpu_state.rfe_held_xref_pos]) |
cpu_state.rfe_held_xrefs[cpu_state.rfe_held_xref_pos]->ref--; |
|
cpu_state.rfe_held_xrefs[cpu_state.rfe_held_xref_pos] = xref; |
} else { |
/* Make sure we increase this cross reference's reference count, since it is |
* decremented below. */ |
xref->ref++; |
already_held = 1; |
} |
|
if(target_dp->dirty) |
recompile_page(target_dp); |
|
if(already_held) |
xref->ref--; |
|
cpu_state.curr_page = target_dp; |
|
/* FIXME: If the page is backed by more than one type of memory, this will |
* produce wrong results */ |
if(cpu_state.sprs[SPR_SR] & SPR_SR_IME) |
/* Add the mmu hit delay to the cycle counter */ |
upd_cycles_dec(target_dp->delayr - config.immu.hitdelay); |
upd_sim_cycles(); |
if(ADDR_PAGE(cpu_state.pc) != cpu_state.pc) |
upd_reg_from_t(cpu_state.pc - (cpu_state.delay_insn ? 4 : 0), 0); |
else |
upd_cycles_dec(target_dp->delayr); |
|
cpu_state.ts_current = 0; |
|
/* See the comment at the end of do_jump */ |
or_longjmp(xref->dyn_addr); |
upd_reg_from_t(cpu_state.pc, 0); |
runtime.cpu.instructions++; |
analysis(&cpu_state.iqueue); |
} |
|
/* Handles an exception. */ |
void handle_except(oraddr_t except) |
{ |
struct dyn_page *target_dp; |
struct x_ref *xref; |
|
/* NOTE: It is known when this code will be run. It is therefore not |
* necessary to have to plough through cpu_state.curr_page->ts to store the |
* temporaries. On the other hand, except_handle is also called from the |
* scheduler, therefore we don't know when it is called and we can't move the |
* temporaries to their permanent storeage in the recompiled code. */ |
|
/* op_do_sched has run by the time we run this, which makes the pc point to |
* the next instruction. */ |
printf("Exception %"PRIxADDR" (%s) from %"PRIxADDR"\n", except, |
except_name(except), get_pc() - 4); |
|
set_pc(except); |
|
target_dp = find_dynd_page(except); |
|
if(!target_dp) |
target_dp = new_dp(except); |
|
/* Check if this location is cross-referenced */ |
if(!(xref = find_host_x_ref(target_dp->xrefs, except))) { |
/* See the comment in do_rfe for why dirtyfy page is not called */ |
target_dp->dirty = 1; |
xref = add_to_xrefs(target_dp, except); |
} else { |
/* If this cross reference is scheduled for removal increment its reference |
* count */ |
if(!xref->ref) |
xref->ref++; |
} |
|
if(target_dp->dirty) |
recompile_page(target_dp); |
|
cpu_state.curr_page = target_dp; |
|
/* FIXME: If the page is backed by more than one type of memory, this will |
* produce wrong results */ |
/* Address translation is disabled above (no need to add hitdelay) */ |
upd_cycles_dec(target_dp->delayr); |
|
cpu_state.ts_current = 0; |
|
/* See the comment at the end of do_jump */ |
or_longjmp(xref->dyn_addr); |
} |
|
/trunk/or1ksim/cpu/or32/op.c
51,9 → 51,6
#include "op_i386.h" |
|
/* FIXME: Move this */ |
void analysis (struct iqueue_entry *current); |
|
/* FIXME: Move this */ |
#define PAGE_LEN 8192 |
|
/* |
117,7 → 114,42
do_scheduler(); |
} |
|
/* Helper function. Hopefully it will get inlined */ |
/* do_scheduler wrapper for instructions that are in the delay slot */ |
void do_sched_wrap_delay(void) |
{ |
save_t_temporary(); |
upd_sim_cycles(); |
env->ts_current = 1; |
/* The PC gets set to the location of the jump, but do_sched increments that |
* so pull it back here to point to the right location again. This could be |
* done in op_add_pc/op_set_pc_pc_delay but that would enlarge the recompiled |
* code. */ |
//env->pc -= 4; |
do_scheduler(); |
env->ts_current = 0; |
} |
|
void enter_dyn_code(oraddr_t addr, struct dyn_page *dp) |
{ |
uint16_t reg; |
|
addr &= PAGE_SIZE - 1; |
addr >>= 2; |
|
reg = dp->ts_bound[addr]; |
|
if(reg & 0x1f) |
t0 = cpu_state.reg[reg & 0x1f]; |
|
if((reg >> 5) & 0x1f) |
t1 = cpu_state.reg[(reg >> 5) & 0x1f]; |
|
if((reg >> 10) & 0x1f) |
t2 = cpu_state.reg[(reg >> 10) & 0x1f]; |
|
or_longjmp(dp->locs[addr]); |
} |
|
__or_dynop void op_t0_imm(void) |
{ |
t0 = OP_PARAM1; |
178,24 → 210,14
t2 = t1; |
} |
|
__or_dynop void op_set_pc_delay_t0(void) |
__or_dynop void op_set_pc_pc_delay(void) |
{ |
env->pc_delay = t0; |
env->delay_insn = 1; |
env->sprs[SPR_PPC] = get_pc(); |
/* pc_delay is pulled back 4 since imediatly after this is run, the scheduler |
* runs which also increments it by 4 */ |
set_pc(env->pc_delay - 4); |
} |
|
__or_dynop void op_set_pc_delay_t1(void) |
{ |
env->pc_delay = t1; |
env->delay_insn = 1; |
} |
|
__or_dynop void op_set_pc_delay_t2(void) |
{ |
env->pc_delay = t2; |
env->delay_insn = 1; |
} |
|
__or_dynop void op_set_pc_delay_imm(void) |
{ |
env->pc_delay = get_pc() + (orreg_t)OP_PARAM1; |
216,46 → 238,35
|
__or_dynop void op_do_jump(void) |
{ |
do_jump(get_pc()); |
} |
|
__or_dynop void op_do_jump_delay(void) |
{ |
do_jump(env->pc_delay); |
} |
|
/* Only used to handle branch instruction ie. j.bf and j.bnf */ |
__or_dynop void op_do_jump_check(void) |
__or_dynop void op_clear_delay_insn(void) |
{ |
if(env->delay_insn) { |
env->delay_insn = 0; |
do_jump(env->pc_delay); |
} |
env->delay_insn = 0; |
} |
|
/* Only used to jump out to the next page */ |
__or_dynop void op_do_jump_pc(void) |
__or_dynop void op_set_delay_insn(void) |
{ |
do_jump(get_pc()); |
env->delay_insn = 1; |
} |
|
__or_dynop void op_clear_delay_insn(void) |
__or_dynop void op_check_delay_slot(void) |
{ |
env->delay_insn = 0; |
if(!env->delay_insn) |
OP_JUMP(OP_PARAM1); |
} |
|
__or_dynop void op_jmp_imm(void) |
{ |
env->ts_current = 0; |
set_pc(env->pc_delay); |
OP_JUMP(OP_PARAM1); |
} |
|
__or_dynop void op_jmp_imm_check(void) |
{ |
if(env->delay_insn) { |
env->ts_current = 0; |
env->delay_insn = 0; |
set_pc(env->pc_delay); |
OP_JUMP(OP_PARAM1); |
} |
} |
|
__or_dynop void op_set_flag(void) |
{ |
env->sprs[SPR_SR] |= SPR_SR_F; |
266,19 → 277,41
env->sprs[SPR_SR] &= ~SPR_SR_F; |
} |
|
/* Used for the l.bf instruction. Therefore if the flag is not set, jump over |
* all the jumping stuff */ |
__or_dynop void op_check_flag(void) |
{ |
if(!(env->sprs[SPR_SR] & SPR_SR_F)) { |
HANDLE_SCHED(do_sched_wrap, "no_sched_chk_flg"); |
OP_JUMP(OP_PARAM1); |
} |
} |
|
/* Used for l.bf if the delay slot instruction is on another page */ |
__or_dynop void op_check_flag_delay(void) |
{ |
if(env->sprs[SPR_SR] & SPR_SR_F) { |
env->delay_insn = 1; |
env->pc_delay = get_pc() + (orreg_t)OP_PARAM1; |
env->delay_insn = 1; |
} |
} |
|
/* Used for the l.bnf instruction. Therefore if the flag is set, jump over all |
* the jumping stuff */ |
__or_dynop void op_check_not_flag(void) |
{ |
if(env->sprs[SPR_SR] & SPR_SR_F) { |
HANDLE_SCHED(do_sched_wrap, "no_sched_chk_not_flg"); |
OP_JUMP(OP_PARAM1); |
} |
} |
|
/* Used for l.bnf if the delay slot instruction is on another page */ |
__or_dynop void op_check_not_flag_delay(void) |
{ |
if(!(env->sprs[SPR_SR] & SPR_SR_F)) { |
env->delay_insn = 1; |
env->pc_delay = get_pc() + (orreg_t)OP_PARAM1; |
env->delay_insn = 1; |
} |
} |
|
287,22 → 320,12
env->ts_current = 1; |
} |
|
__or_dynop void op_set_pc_preemt(void) |
__or_dynop void op_add_pc(void) |
{ |
env->ts_current = 1; |
env->sprs[SPR_PPC] = get_pc(); |
set_pc(env->pc_delay); |
/* FIXME: Optimise */ |
set_pc(get_pc() + OP_PARAM1); |
} |
|
__or_dynop void op_set_pc_preemt_check(void) |
{ |
if(env->delay_insn) { |
env->ts_current = 1; |
env->sprs[SPR_PPC] = get_pc(); |
set_pc(env->pc_delay); |
} |
} |
|
__or_dynop void op_nop_exit(void) |
{ |
upd_sim_cycles(); |
315,7 → 338,7
{ |
upd_sim_cycles(); |
op_support_nop_reset(); |
handle_except(EXCEPT_RESET); |
do_jump(EXCEPT_RESET); |
} |
|
__or_dynop void op_nop_printf(void) |
347,7 → 370,7
/* Do exception */ |
env->sprs[SPR_EEAR_BASE] = get_pc() - 4; |
env->delay_insn = 0; |
handle_except(EXCEPT_ILLEGAL); |
do_jump(EXCEPT_ILLEGAL); |
} |
} |
|
357,7 → 380,7
if(!t0) { |
/* Do exception */ |
env->sprs[SPR_EEAR_BASE] = get_pc(); |
handle_except(EXCEPT_ILLEGAL); |
do_jump(EXCEPT_ILLEGAL); |
} |
} |
|
367,7 → 390,7
/* Do exception */ |
env->sprs[SPR_EEAR_BASE] = get_pc() - 4; |
env->delay_insn = 0; |
handle_except(EXCEPT_ILLEGAL); |
do_jump(EXCEPT_ILLEGAL); |
} |
} |
|
377,7 → 400,7
if(!t1) { |
/* Do exception */ |
env->sprs[SPR_EEAR_BASE] = get_pc(); |
handle_except(EXCEPT_ILLEGAL); |
do_jump(EXCEPT_ILLEGAL); |
} |
} |
|
387,7 → 410,7
/* Do exception */ |
env->sprs[SPR_EEAR_BASE] = get_pc() - 4; |
env->delay_insn = 0; |
handle_except(EXCEPT_ILLEGAL); |
do_jump(EXCEPT_ILLEGAL); |
} |
} |
|
396,7 → 419,7
if(!t2) { |
/* Do exception */ |
env->sprs[SPR_EEAR_BASE] = get_pc(); |
handle_except(EXCEPT_ILLEGAL); |
do_jump(EXCEPT_ILLEGAL); |
} |
} |
|
405,9 → 428,8
env->iqueue.insn_index = OP_PARAM1; |
env->iqueue.insn = OP_PARAM2; |
env->iqueue.insn_addr = get_pc(); |
upd_sim_cycles(); |
runtime.cpu.instructions++; |
analysis(&env->iqueue); |
save_t_temporary(); |
op_support_analysis(); |
FORCE_RET; |
} |
|
998,23 → 1020,14
env->reg[LINK_REGNO] = get_pc() + 8; |
} |
|
__or_dynop void op_set_rfe_pc(void) |
{ |
set_pc(env->sprs[SPR_EPCR_BASE] - 4); |
} |
|
__or_dynop void op_prep_rfe(void) |
{ |
env->sprs[SPR_SR] = env->sprs[SPR_ESR_BASE] | SPR_SR_FO; |
env->sprs[SPR_PPC] = get_pc(); |
env->ts_current = 1; |
set_pc(env->sprs[SPR_EPCR_BASE] - 4); |
} |
|
__or_dynop void op_rfe(void) |
{ |
do_rfe(); |
FORCE_RET; |
} |
|
static inline void prep_except(oraddr_t epcr_base) |
{ |
env->sprs[SPR_EPCR_BASE] = epcr_base; |
1042,50 → 1055,59
__or_dynop void op_prep_sys_delay(void) |
{ |
env->delay_insn = 0; |
env->ts_current = 1; |
prep_except(get_pc() - 4); |
set_pc(EXCEPT_SYSCALL - 4); |
} |
|
__or_dynop void op_prep_sys(void) |
{ |
env->ts_current = 1; |
prep_except(get_pc() + 4); |
set_pc(EXCEPT_SYSCALL - 4); |
} |
|
__or_dynop void op_prep_trap_delay(void) |
{ |
env->ts_current = 1; |
env->delay_insn = 0; |
prep_except(get_pc() - 4); |
set_pc(EXCEPT_TRAP - 4); |
} |
|
__or_dynop void op_prep_trap(void) |
{ |
env->ts_current = 1; |
prep_except(get_pc()); |
set_pc(EXCEPT_TRAP - 4); |
} |
|
__or_dynop void op_do_except(void) |
{ |
handle_except(OP_PARAM1); |
} |
|
/* FIXME: This `instruction' should be split up like the l.trap and l.sys |
* instructions are done */ |
__or_dynop void op_illegal_delay(void) |
{ |
env->delay_insn = 0; |
env->ts_current = 1; |
env->sprs[SPR_EEAR_BASE] = get_pc() - 4; |
handle_except(EXCEPT_ILLEGAL); |
do_jump(EXCEPT_ILLEGAL - 4); |
} |
|
__or_dynop void op_illegal(void) |
{ |
env->sprs[SPR_EEAR_BASE] = get_pc(); |
handle_except(EXCEPT_ILLEGAL); |
do_jump(EXCEPT_ILLEGAL); |
} |
|
__or_dynop void op_do_sched(void) |
{ |
handle_sched(); |
HANDLE_SCHED(do_sched_wrap, "no_sched"); |
} |
|
__or_dynop void op_do_sched_delay(void) |
{ |
HANDLE_SCHED(do_sched_wrap_delay, "no_sched_delay"); |
} |
|
__or_dynop void op_macc(void) |
{ |
env->sprs[SPR_MACLO] = 0; |
/trunk/or1ksim/cpu/or32/op_support.h
22,8 → 22,7
void op_support_nop_printf(void); |
void op_support_nop_report(void); |
void op_support_nop_report_imm(int imm); |
void op_support_analysis(void); |
void do_jump(oraddr_t addr); |
void do_rfe(void); |
void handle_except(oraddr_t except); |
|
void upd_reg_from_t(oraddr_t pc); |
void upd_reg_from_t(oraddr_t pc, int bound); |
/trunk/or1ksim/cpu/or32/dyn_rec.c
288,7 → 288,6
static void *sigsegv_addr = NULL; |
|
void dyn_ret_stack_prot(void); |
void dump_held_xrefs(struct dyn_page *dp, FILE *f); |
|
void dyn_sigsegv_debug(int u, siginfo_t *siginf, void *dat) |
{ |
337,27 → 336,6
} |
sigsegv_state++; |
case 2: |
/* Dump the x-refs to disk */ |
for(dp = cpu_state.dyn_pages; dp; dp = dp->next) { |
printf("Dumping cross references of 0x%"PRIxADDR" to disk\n", dp->or_page); |
|
sprintf(filen, "or_xref.%"PRIxADDR, dp->or_page); |
if(!(f = fopen(filen, "w"))) { |
fprintf(stderr, "Unable to open %s to dump cross references to: %s\n", |
filen, strerror(errno)); |
continue; |
} |
|
fprintf(f, "Cross references in the page:\n"); |
dump_xrefs(dp, f); |
|
fprintf(f, "\nCross references held by this page:\n"); |
dump_held_xrefs(dp, f); |
|
fclose(f); |
} |
sigsegv_state++; |
case 3: |
/* Dump the contents of the stack */ |
printf("Stack dump: "); |
fflush(stdout); |
380,36 → 358,11
fflush(stdout); |
} |
sigsegv_state++; |
case 4: |
case 3: |
sim_done(); |
} |
} |
|
void dump_xrefs(struct dyn_page *dp, FILE *f) |
{ |
struct x_ref *xref; |
|
fprintf(f, "--- Cross reference dump for %"PRIxADDR" at %p ---\n", |
dp->or_page, dp->host_page); |
for(xref = dp->xrefs; xref; xref = xref->next) { |
fprintf(f, "x-refed or location: 0x%"PRIxADDR", host-location: %p, ref: %i\n", |
xref->or_addr, xref->dyn_addr, xref->ref); |
} |
fprintf(f, "--- Cross reference dump end ---\n"); |
} |
|
void dump_held_xrefs(struct dyn_page *dp, FILE *f) |
{ |
struct x_ref **xrefs; |
|
fprintf(f, "--- Held cross reference dump for %"PRIxADDR" at %p ---\n", |
dp->or_page, dp->host_page); |
for(xrefs = dp->held_xrefs; *xrefs; xrefs++) |
fprintf(f, "Holds an x-ref to 0x%"PRIxADDR", host-location: %p, ref: %i\n", |
(*xrefs)->or_addr, (*xrefs)->dyn_addr, (*xrefs)->ref); |
fprintf(f, "--- Held cross reference dump end ---\n"); |
} |
|
static void add_to_dp(struct dyn_page *new) |
{ |
struct dyn_page *cur; |
433,12 → 386,8
struct dyn_page *dp = malloc(sizeof(struct dyn_page)); |
dp->or_page = ADDR_PAGE(page); |
|
/* Allocate xref terminator */ |
dp->xrefs = NULL; |
dp->locs = malloc(sizeof(void *) * (PAGE_LEN / 4)); |
|
dp->held_xrefs = malloc(sizeof(struct x_ref *)); |
dp->held_xrefs[0] = NULL; |
|
dp->host_len = 0; |
dp->host_page = NULL; |
dp->dirty = 1; |
462,51 → 411,6
return NULL; |
} |
|
/* Finds the dynamicly recompiled location of the given or address */ |
struct x_ref *find_host_x_ref(struct x_ref *x_refs, oraddr_t addr) |
{ |
/* FIXME: Optimise this by knowing that the x_refs array is orderd */ |
while(x_refs && (x_refs->or_addr != addr)) x_refs = x_refs->next; |
|
return x_refs; |
} |
|
static void remove_xref(struct dyn_page *dp, struct x_ref *xref) |
{ |
struct x_ref *prev_xref; |
|
if(dp->xrefs == xref) { |
dp->xrefs = xref->next; |
free(xref); |
return; |
} |
|
prev_xref = dp->xrefs; |
while(prev_xref->next != xref) |
prev_xref = prev_xref->next; |
|
prev_xref->next = xref->next; |
free(xref); |
} |
|
struct x_ref *find_held_x_ref(struct x_ref **held_xrefs, oraddr_t or_addr) |
{ |
/* FIXME: Order this list in add_to_held_xrefs below and optimise this */ |
while(*held_xrefs && ((*held_xrefs)->or_addr != or_addr)) held_xrefs++; |
return *held_xrefs; |
} |
|
void add_to_held_xrefs(struct dyn_page *dp, struct x_ref *xref) |
{ |
unsigned int i; |
|
for(i = 0; dp->held_xrefs[i]; i++); |
|
dp->held_xrefs = realloc(dp->held_xrefs, sizeof(struct x_ref *) * (i + 2)); |
dp->held_xrefs[i] = xref; |
dp->held_xrefs[++i] = NULL; |
} |
|
/* This is called whenever the immu is either enabled/disabled or reconfigured |
* while enabled. This checks if an itlb miss would occour and updates the immu |
* hit delay counter */ |
563,12 → 467,13
oraddr_t pc = get_pc(); |
|
if(!cpu_state.ts_current) |
upd_reg_from_t(pc); |
upd_reg_from_t(pc, 0); |
|
if(add_normal && do_stats) { |
cpu_state.iqueue.insn_addr = pc; |
cpu_state.iqueue.insn = eval_insn_direct(pc, &brk, 1); |
cpu_state.iqueue.insn_index = insn_decode(cpu_state.iqueue.insn); |
runtime.cpu.instructions++; |
analysis(&cpu_state.iqueue); |
} |
|
585,18 → 490,10
/* Signals a page as dirty */ |
void dirtyfy_page(struct dyn_page *dp) |
{ |
struct x_ref **held_xrefs; |
struct x_ref *xref; |
oraddr_t check; |
|
printf("Dirtyfying page 0x%"PRIxADDR"\n", dp->or_page); |
|
/* decrease the reference counts of the xrefs that we hold */ |
for(held_xrefs = dp->held_xrefs; *held_xrefs; held_xrefs++) |
(*held_xrefs)->ref--; |
dp->held_xrefs = realloc(dp->held_xrefs, sizeof(struct x_ref *)); |
dp->held_xrefs[0] = NULL; |
|
dp->dirty = 1; |
|
/* If the execution is currently in the page that was touched then recompile |
604,21 → 501,12
check = cpu_state.delay_insn ? cpu_state.pc_delay : get_pc() + 4; |
if(ADDR_PAGE(check) == dp->or_page) { |
run_sched_out_of_line(1); |
if(!(xref = find_host_x_ref(dp->xrefs, check))) { |
xref = add_to_xrefs(dp, check); |
add_to_held_xrefs(dp, xref); |
} else { |
if(!find_held_x_ref(dp->held_xrefs, check)) { |
add_to_held_xrefs(dp, xref); |
xref->ref++; |
} |
} |
recompile_page(dp); |
|
cpu_state.delay_insn = 0; |
|
/* Jump out to the next instruction */ |
or_longjmp(xref->dyn_addr); |
do_jump(check); |
} |
} |
|
626,6 → 514,16
{ |
int i; |
|
/* Before takeing the temporaries out, temporarily remove the op_do_sched |
* operation such that dyn_page->ts_bound shall be correct before the |
* scheduler runs */ |
if(end && opq->num_ops && (opq->ops[opq->num_ops - 1] == op_do_sched_indx)) { |
opq->num_ops--; |
ship_gprs_out_t(opq, end, reg_t); |
gen_op_do_sched(opq, 1); |
return; |
} |
|
for(i = 0; i < NUM_T_REGS; i++) { |
if(reg_t[i] < 32) |
gen_op_move_gpr_t[i][reg_t[i]](opq, end); |
692,12 → 590,17
opq->num_ops++; |
} |
|
static void gen_op_mark_loc(struct op_queue *opq, int end) |
{ |
add_to_opq(opq, end, op_mark_loc_indx); |
} |
|
/* Adds a parameter to the opq */ |
void add_to_op_params(struct op_queue *opq, int end, unsigned long param) |
{ |
if(opq->num_ops_param == opq->ops_param_len) { |
opq->ops_param_len += OPS_ENLARGE_BY * sizeof(int); |
if(!(opq->ops_param = realloc(opq->ops_param, opq->ops_param_len))) { |
opq->ops_param_len += OPS_ENLARGE_BY; |
if(!(opq->ops_param = realloc(opq->ops_param, opq->ops_param_len * sizeof(int)))) { |
fprintf(stderr, "OOM\n"); |
exit(1); |
} |
735,7 → 638,6
{ |
struct sigaction sigact; |
struct op_queue *opq; |
struct x_ref *xref; |
unsigned int i; |
|
cpu_state.opqs = NULL; |
752,6 → 654,7
opq->ops = NULL; |
opq->ops_param_len = 0; |
opq->ops_param = NULL; |
opq->xref = 0; |
|
if(cpu_state.opqs) |
cpu_state.opqs->prev = opq; |
762,18 → 665,6
|
opq->prev = NULL; |
|
/* Allocate the x-ref structures that will be used for the infinite loop |
* instruction (l.j 0). Allocate a whole page's worth just to make sure that |
* we will have enough */ |
for(i = 0; i < (PAGE_LEN / 4); i++) { |
if(!(xref = malloc(sizeof(struct x_ref)))) { |
fprintf(stderr, "Out-of-memory while allocateing x-ref structures\n"); |
exit(1); |
} |
xref->next = cpu_state.inf_xrefs; |
cpu_state.inf_xrefs = xref; |
} |
|
/* Just some value that we'll use as the base for our stack */ |
rec_stack_base = get_sp(); |
|
787,14 → 678,7
if(sigaction(SIGSEGV, &sigact, NULL)) |
printf("WARN: Unable to install SIGSEGV handler! Don't expect to be able to debug the recompiler.\n"); |
|
/* Allocate memory for the rfe corss reference cache */ |
if(!(cpu_state.rfe_held_xrefs = malloc(sizeof(struct xref *) * NUM_RFE_HELD))) { |
printf("OOM\n"); |
exit(1); |
} |
cpu_state.rfe_held_xref_pos = 0; |
memset(cpu_state.rfe_held_xrefs, 0, sizeof(struct xref *) * NUM_RFE_HELD); |
|
/* Do architecture specific initialisation */ |
init_dyn_rec(); |
|
/* FIXME: Find a better place for this */ |
807,189 → 691,93
printf("Recompile engine up and running\n"); |
} |
|
/* rec_page is a physical address */ |
void recompile_page(struct dyn_page *dyn) |
/* Adds code to the opq for the instruction pointed to by addr */ |
static void recompile_insn(struct op_queue *opq, oraddr_t addr, int delay_insn) |
{ |
unsigned int j, k; |
unsigned int reg_t[NUM_T_REGS]; |
unsigned int insn_index; |
unsigned int pres_t[NUM_T_REGS]; /* Which temporary to preserve */ |
unsigned int insn_index; |
int delay_insn = 0; /* Is the next instruction to be decoded in a delay slot*/ |
enum insn_type delay_insn_type = 0; |
uint32_t insn; |
orreg_t param[3]; |
int i, j, k; |
int param_t[3]; /* Which temporary the parameters reside in */ |
int param_r[3]; /* is parameter a register */ |
orreg_t param[3]; |
int param_num; |
struct op_queue *opq = NULL; |
oraddr_t rec_addr = dyn->or_page; |
oraddr_t rec_page = dyn->or_page; |
struct x_ref *xref; |
uint32_t insn; |
int breakp; |
struct dyn_page *prev_dp; |
|
struct insn_op_struct *opd; |
|
/* The start of the next page */ |
rec_page += PAGE_LEN; |
breakp = 0; |
insn = eval_insn(addr, &breakp); |
|
printf("Recompileing page %"PRIxADDR"\n", rec_addr); |
fflush(stdout); |
/* FIXME: If a breakpoint is set at this location, insert exception code */ |
if(breakp) { |
fprintf(stderr, "FIXME: Insert breakpoint code\n"); |
} |
|
/* Mark all temporaries as not containing a register */ |
for(j = 0; j < NUM_T_REGS; j++) |
reg_t[j] = 32; /* Out-of-range registers */ |
insn_index = insn_decode(insn); |
|
dyn->delayr = -verify_memoryarea(rec_addr)->delayr; |
/* Copy over the state of the temporaries to the next opq */ |
memcpy(opq->reg_t_d, opq->reg_t, sizeof(opq->reg_t)); |
|
dyn->carrys_delay_slot = 0; |
/* Check if we have an illegal instruction */ |
if(insn_index == -1) { |
gen_l_invalid(opq, NULL, NULL, delay_insn); |
return; |
} |
|
/* Check if the previous page carries a delay slot over to this page */ |
if((prev_dp = find_dynd_page(rec_addr - PAGE_LEN))) |
delay_insn = prev_dp->carrys_delay_slot; |
/* If we are recompileing an instruction that has a delay slot and is in the |
* delay slot, ignore it. This is undefined behavour. */ |
if(delay_insn && ((or32_opcodes[insn_index].func_unit == it_jump) || |
(or32_opcodes[insn_index].func_unit == it_branch))) |
return; |
|
for(opq = cpu_state.opqs; rec_addr < rec_page; rec_addr += 4, opq = opq->next) { |
opq->num_ops = 0; |
opq->num_ops_param = 0; |
opq->jump_local = 0; |
/* figure out instruction operands */ |
for(i = 0; i < NUM_T_REGS; i++) |
pres_t[i] = 0; |
|
opq->insn_addr = rec_addr; |
param_t[0] = T_NONE; |
param_t[1] = T_NONE; |
param_t[2] = T_NONE; |
param_r[0] = 0; |
param_r[1] = 0; |
param_r[2] = 0; |
param_num = 0; |
|
breakp = 0; |
insn = eval_insn(rec_addr, &breakp); |
opd = op_start[insn_index]; |
while(1) { |
param[param_num] = eval_operand_val(insn, opd); |
|
/* FIXME: If a breakpoint is set at this location, insert exception code */ |
if(breakp) { |
fprintf(stderr, "FIXME: Insert breakpoint code\n"); |
} |
|
insn_index = insn_decode(insn); |
|
/* FIXME: Optimise this by knowing that dyn->x_refs is ordered (ie. Don't |
* call find_host_x_ref) */ |
/* Check if this location is cross referenced */ |
if((xref = find_host_x_ref(dyn->xrefs, rec_addr))) { |
/* If the x-refs reference count reached zero remove it */ |
if(xref->ref) { |
/* If the current address is cross-referenced, the temporaries shall be |
* in an undefined state, so we must assume that no registers reside in |
* them */ |
/* Ship out the current set of registers from the temporaries */ |
if(opq->prev) { |
ship_gprs_out_t(opq->prev, 1, reg_t); |
if(opd->type & OPTYPE_REG) { |
/* check which temporary the register is in, if any */ |
for(i = 0; i < NUM_T_REGS; i++) { |
if(opq->reg_t_d[i] == param[param_num]) { |
param_t[param_num] = i; |
pres_t[i] = 1; |
} |
for(j = 0; j < NUM_T_REGS; j++) |
reg_t[j] = 32; |
} else { |
/* Remove x-ref */ |
remove_xref(dyn, xref); |
} |
} |
|
memcpy(opq->reg_t, reg_t, sizeof(reg_t)); |
param_num++; |
while(!(opd->type & OPTYPE_OP)) opd++; |
if(opd->type & OPTYPE_LAST) |
break; |
opd++; |
} |
|
/* Check if we have an illegal instruction */ |
if(insn_index == -1) { |
gen_l_invalid(opq, param_t, param, delay_insn); |
if(delay_insn) { |
/* There is no need to do any jump handleing stuff as the instruction |
* will generate an exception */ |
if(opq->prev->jump_local == 2) { |
opq->prev->xref->next = cpu_state.inf_xrefs; |
cpu_state.inf_xrefs = opq->prev->xref; |
} |
opq->prev->jump_local = 0; |
delay_insn = 0; |
/* Jump instructions are special since they have a delay slot and thus they |
* need to control the exact operation sequence. Special case these, here to |
* avoid haveing loads of if(.func_unit != it_jump && != it_branch) below */ |
if((or32_opcodes[insn_index].func_unit == it_jump) || |
(or32_opcodes[insn_index].func_unit == it_branch)) { |
/* Ship the jump-to register out (if it exists). It requires special |
* handleing, which is done in gen_j_reg. */ |
for(i = 0; i < NUM_T_REGS; i++) { |
if(pres_t[i]) { |
gen_op_move_gpr_t[i][opq->reg_t_d[i]](opq->prev, 1); |
opq->reg_t_d[i] = 32; |
opq->reg_t[i] = 32; |
} |
continue; |
} |
|
/* figure out instruction operands */ |
for(j = 0; j < NUM_T_REGS; j++) |
pres_t[j] = 0; |
|
param_t[0] = T_NONE; |
param_t[1] = T_NONE; |
param_t[2] = T_NONE; |
param_r[0] = 0; |
param_r[1] = 0; |
param_r[2] = 0; |
param_num = 0; |
|
opd = op_start[insn_index]; |
while(1) { |
param[param_num] = eval_operand_val(insn, opd); |
|
if(opd->type & OPTYPE_REG) { |
/* check which temporary the register is in, if any */ |
for(j = 0; j < NUM_T_REGS; j++) { |
if(reg_t[j] == param[param_num]) { |
param_t[param_num] = j; |
pres_t[j] = 1; |
} |
} |
} |
|
param_num++; |
while(!(opd->type & OPTYPE_OP)) opd++; |
if(opd->type & OPTYPE_LAST) |
break; |
opd++; |
} |
|
opd = op_start[insn_index]; |
|
/* Before an exception takes place, all registers must be stored. */ |
if((or32_opcodes[insn_index].func_unit == it_exception)) { |
if(opq->prev) { |
ship_gprs_out_t(opq->prev, 1, reg_t); |
for(j = 0; j < NUM_T_REGS; j++) { |
opq->prev->reg_t[j] = 32; |
reg_t[j] = 32; |
} |
} |
} |
|
for(j = 0; j < param_num; j++, opd++) { |
while(!(opd->type & OPTYPE_OP)) opd++; |
if(!(opd->type & OPTYPE_REG)) |
continue; |
|
/* Never, ever, move r0 into a temporary */ |
if(!param[j]) |
continue; |
|
/* Check if this register has been moved into a temporary in a previous |
* operand */ |
for(k = 0; k < NUM_T_REGS; k++) { |
if(reg_t[k] == param[j]) { |
/* Yes, this register is already in a temporary */ |
pres_t[k] = 1; |
reg_t[k] = param[j]; |
param_t[j] = k; |
break; |
} |
} |
if(k != NUM_T_REGS) |
continue; |
|
if((param_t[j] != T_NONE)) |
continue; |
|
/* Search for an unused temporary */ |
k = find_unused_t(pres_t, reg_t); |
if(reg_t[k] < 32) { |
gen_op_move_gpr_t[k][reg_t[k]](opq->prev, 1); |
opq->reg_t[k] = 32; |
} |
pres_t[k] = 1; |
reg_t[k] = param[j]; |
param_t[j] = k; |
/* FIXME: Only generate code to move the register into a temporary if it |
* is used as a source operand */ |
gen_op_move_t_gpr[k][reg_t[k]](opq, 1); |
} |
|
/* FIXME: Do this in a more elegent way */ |
if(!strncmp(or32_opcodes[insn_index].name, "l.jal", 5)) { |
/* In the case of a l.jal instruction, make sure that LINK_REGNO is not in |
998,229 → 786,235
* after the delay slot instruction has executed and so it overwrittes the |
* `return address'. */ |
for(k = 0; k < NUM_T_REGS; k++) { |
if(reg_t[k] == LINK_REGNO) { |
if(opq->reg_t_d[k] == LINK_REGNO) { |
gen_op_move_gpr_t[k][LINK_REGNO](opq, 1); |
reg_t[k] = 32; |
opq->reg_t[k] = 32; |
opq->reg_t_d[k] = 32; |
break; |
} |
} |
} |
|
/* Store the state of the temporaries into dyn->ts */ |
dyn->ts[(rec_addr & (PAGE_LEN - 1)) / 2] = 0; |
if(reg_t[0] < 32) |
dyn->ts[(rec_addr & (PAGE_LEN - 1)) / 2] = reg_t[0]; |
if(reg_t[1] < 32) |
dyn->ts[(rec_addr & (PAGE_LEN - 1)) / 2] |= reg_t[1] << 5; |
if(reg_t[2] < 32) |
dyn->ts[(rec_addr & (PAGE_LEN - 1)) / 2] |= reg_t[2] << 10; |
/* Jump instructions don't have a disposition */ |
or32_opcodes[insn_index].exec(opq, param_t, param, delay_insn); |
|
/* To get the execution log correct for instructions like l.lwz r4,0(r4) the |
* effective address needs to be calculated before the instruction is |
* simulated */ |
if(do_stats) { |
/* Find any disposition in the instruction */ |
opd = op_start[insn_index]; |
for(j = 0; j < param_num; j++, opd++) { |
while(!(opd->type & OPTYPE_OP)) opd++; |
if(!(opd->type & OPTYPE_DIS)) |
continue; |
/* Analysis is done by the individual jump instructions */ |
/* Jump instructions don't touch runtime.sim.mem_cycles */ |
/* Jump instructions run their own scheduler */ |
return; |
} |
|
if(!param[j + 1]) |
gen_op_store_insn_ea(opq, 1, param[j]); |
else |
calc_insn_ea_table[param_t[j + 1]](opq, 1, param[j]); |
/* Before an exception takes place, all registers must be stored. */ |
if((or32_opcodes[insn_index].func_unit == it_exception)) { |
if(opq->prev) { |
ship_gprs_out_t(opq->prev, 1, opq->reg_t_d); |
for(i = 0; i < NUM_T_REGS; i++) { |
opq->reg_t_d[i] = 32; |
opq->reg_t[i] = 32; |
} |
} |
} |
|
or32_opcodes[insn_index].exec(opq, param_t, param, delay_insn); |
opd = op_start[insn_index]; |
|
/* If any sort of analysis is done, store all temporaries and run |
* analysis() */ |
if(do_stats) { |
ship_gprs_out_t(opq, 1, reg_t); |
for(j = 0; j < NUM_T_REGS; j++) |
reg_t[j] = 32; |
for(j = 0; j < param_num; j++, opd++) { |
while(!(opd->type & OPTYPE_OP)) opd++; |
if(!(opd->type & OPTYPE_REG)) |
continue; |
|
gen_op_analysis(opq, 1, insn_index, insn); |
} |
/* Never, ever, move r0 into a temporary */ |
if(!param[j]) |
continue; |
|
/* The call to join_mem_cycles() could be put into the individual operations |
* that emulate the load/store instructions, but then it would be added to |
* the cycle counter before analysis() is called, which not how the complex |
* execution modell does it. */ |
if((or32_opcodes[insn_index].func_unit == it_load) || |
(or32_opcodes[insn_index].func_unit == it_store)) |
gen_op_join_mem_cycles(opq, 1); |
|
/* If a delay sloted instruction is in the delay slot, avoid doing a jump on |
* the first delay sloted instruction. The problem with not doing the above |
* is that the 0x00000000 instruction is a jump instruction, which is used |
* for padding, and if there ends up being a jump instruction directly after |
* some padding and the code jumps to this location (as with the mmu test) |
* the jump instruction will set cpu_state.pc_delay but code will get |
* generated after the jump instruction and before the delay slot |
* instruciton to check cpu_state.pc_delay and jump out if it is set and so |
* we end up jumping out to the padding instruction. With some thought, the |
* 0x00000000 opcode could really have been encoded to some arithmetic |
* operation that would end up nop-ing (or better yet, to the l.nop 0 |
* instruction itself) */ |
/* If we came up to a page local jump and because it is the delay slot of |
* another delay sloted instruction the case below is skipped and |
* opq->prev->jump_local will remain set to 1, fix this by reseting it now*/ |
if(delay_insn && ((or32_opcodes[insn_index].func_unit == it_jump) || |
(or32_opcodes[insn_index].func_unit == it_branch))) { |
/* We don't generate code to do the relocation so there will be none. |
* Avoid haveing a reference to it */ |
/* Also remove the cross reference to it */ |
if(opq->prev) { |
if(opq->prev->jump_local == 2) { |
opq->prev->xref->next = cpu_state.inf_xrefs; |
cpu_state.inf_xrefs = opq->prev->xref; |
/* Check if this register has been moved into a temporary in a previous |
* operand */ |
for(k = 0; k < NUM_T_REGS; k++) { |
if(opq->reg_t_d[k] == param[j]) { |
/* Yes, this register is already in a temporary */ |
if(or32_opcodes[insn_index].func_unit != it_jump) { |
pres_t[k] = 1; |
param_t[j] = k; |
} |
opq->prev->jump_local = 0; |
break; |
} |
delay_insn = 0; |
} |
if(k != NUM_T_REGS) |
continue; |
|
/* In the case of an instruction in the delay slot the pc must be updated |
* before op_do_sched runs because if it so happens to generate an exception |
* it will think that we are still executeing the delay slot instruction |
* which infact we have just executed and then SPR_EPCR_BASE will end up |
* pointing to the delay slot instruction, which is wrong. If the delay |
* slot instruction is an exception instruction (l.trap/l.sys) the exception |
* must appear to have been generated in the delay slot */ |
if(delay_insn && (or32_opcodes[insn_index].func_unit != it_exception)) { |
if(xref || (delay_insn_type == it_branch)) |
gen_op_set_pc_preemt_check(opq, 1); |
else /* delay_insn_tyte == it_jump */ |
gen_op_set_pc_preemt(opq, 1); |
/* Move temporaries to their permanent storage */ |
ship_gprs_out_t(opq, 1, reg_t); |
if(param_t[j] != T_NONE) |
continue; |
|
/* Search for an unused temporary */ |
k = find_unused_t(pres_t, opq->reg_t_d); |
if(opq->reg_t_d[k] < 32) { |
/* FIXME: Only ship the temporary out if it has been used as a destination |
* register */ |
gen_op_move_gpr_t[k][opq->reg_t_d[k]](opq->prev, 1); |
opq->reg_t[k] = 32; |
opq->reg_t_d[k] = 32; |
} |
pres_t[k] = 1; |
opq->reg_t_d[k] = param[j]; |
param_t[j] = k; |
/* FIXME: Only generate code to move the register into a temporary if it |
* is used as a source operand */ |
gen_op_move_t_gpr[k][opq->reg_t_d[k]](opq, 0); |
} |
|
/* Same reason as for the above case */ |
if(or32_opcodes[insn_index].func_unit == it_exception) { |
/* FIXME: Do the instruction switch below in a more elegent way */ |
if(!strcmp(or32_opcodes[insn_index].name, "l.rfe")) { |
gen_op_set_rfe_pc(opq, 1); |
} else if(!strcmp(or32_opcodes[insn_index].name, "l.sys")) { |
gen_op_set_except_pc(opq, 1, EXCEPT_SYSCALL - 4); |
} else { /* or32_opcodes[insn_index].name == "l.trap" */ |
gen_op_set_except_pc(opq, 1, EXCEPT_TRAP - 4); |
} |
gen_op_set_ts_current(opq, 1); |
/* To get the execution log correct for instructions like l.lwz r4,0(r4) the |
* effective address needs to be calculated before the instruction is |
* simulated */ |
if(do_stats) { |
/* Find any disposition in the instruction */ |
opd = op_start[insn_index]; |
for(j = 0; j < param_num; j++, opd++) { |
while(!(opd->type & OPTYPE_OP)) opd++; |
if(!(opd->type & OPTYPE_DIS)) |
continue; |
|
if(!param[j + 1]) |
gen_op_store_insn_ea(opq, 1, param[j]); |
else |
calc_insn_ea_table[param_t[j + 1]](opq, 1, param[j]); |
} |
} |
|
or32_opcodes[insn_index].exec(opq, param_t, param, delay_insn); |
|
if(or32_opcodes[insn_index].func_unit != it_exception) { |
if(do_stats) |
gen_op_analysis(opq, 1, insn_index, insn); |
} |
|
/* The call to join_mem_cycles() could be put into the individual operations |
* that emulate the load/store instructions, but then it would be added to |
* the cycle counter before analysis() is called, which is not how the complex |
* execution model does it. */ |
if((or32_opcodes[insn_index].func_unit == it_load) || |
(or32_opcodes[insn_index].func_unit == it_store)) |
gen_op_join_mem_cycles(opq, 1); |
|
/* Delay slot instructions get a special scheduler, thus don't generate it |
* here */ |
if((or32_opcodes[insn_index].func_unit != it_exception) && !delay_insn) |
gen_op_do_sched(opq, 1); |
} |
|
/* If this is an exception instruction then we still need to perform the |
* exception */ |
if(or32_opcodes[insn_index].func_unit == it_exception) { |
/* FIXME: Do the instruction switch below in a more elegent way */ |
if(!strcmp(or32_opcodes[insn_index].name, "l.rfe")) { |
gen_op_rfe(opq, 1); |
} else if(!strcmp(or32_opcodes[insn_index].name, "l.sys")) { |
gen_op_do_except(opq, 1, EXCEPT_SYSCALL); |
} else { /* or32_opcodes[insn_index].name == "l.trap" */ |
gen_op_do_except(opq, 1, EXCEPT_TRAP); |
} |
} |
/* rec_page is a physical address */ |
void recompile_page(struct dyn_page *dyn) |
{ |
unsigned int j; |
struct op_queue *opq = cpu_state.opqs; |
oraddr_t rec_addr = dyn->or_page; |
oraddr_t rec_page = dyn->or_page; |
void **loc; |
|
/* FIXME: If the delay slot is cross referenced after we have stuck the jump |
* instruction in the operations queue we will genererate temporary-> |
* register code after the jump, which will be unreachable. This is no |
* problem as all temporaries are stored in anticipation for a jump. */ |
/* FIXME: If the delay slot is cross referenced we should generate the |
* conditional jump code as we do below. This will not happen if the delay |
* slot is cross referenced after we generate the operations for the jump */ |
/* FIXME: If the instruction in the delay slot is an exception instruction |
* the code that we generate below will be unreachable since the exception |
* instruction jumps to the exection vector */ |
/* Generate code to jump out to the proper location */ |
if(delay_insn) { |
for(j = 0; j < NUM_T_REGS; j++) |
reg_t[j] = 32; |
/* The start of the next page */ |
rec_page += PAGE_LEN; |
|
if(xref || (delay_insn_type == it_branch)) { |
/* If the delay-slot instruction is cross referenced, then we have to |
* check env->delay_insn */ |
if(opq->prev && opq->prev->jump_local) { |
gen_op_jmp_imm_check(opq, 1, 0); |
opq->prev->jump_local_loc = &opq->ops_param[opq->num_ops_param - 1]; |
} else { |
gen_op_do_jump_check(opq, 1); |
} |
} else if(delay_insn_type == it_jump) { |
gen_op_clear_delay_insn(opq, 1); |
if(opq->prev && opq->prev->jump_local) { |
/* The 0 will get patched when the page-local jumps get patched */ |
gen_op_jmp_imm(opq, 1, 0); |
/* FIXME: opq->ops_param is realloced with realloc and so we risk a |
* reallocation in which the location ends up moveing in memory */ |
opq->prev->jump_local_loc = &opq->ops_param[opq->num_ops_param - 1]; |
} else { |
gen_op_do_jump(opq, 1); |
} |
} |
delay_insn = 0; |
printf("Recompileing page %"PRIxADDR"\n", rec_addr); |
fflush(stdout); |
|
/* Mark all temporaries as not containing a register */ |
for(j = 0; j < NUM_T_REGS; j++) |
opq->reg_t[j] = 32; /* Out-of-range registers */ |
|
dyn->delayr = -verify_memoryarea(rec_addr)->delayr; |
|
opq->num_ops = 0; |
opq->num_ops_param = 0; |
|
/* Insert code to check if the first instruction is exeucted in a delay slot*/ |
gen_op_check_delay_slot(opq, 1, 0); |
recompile_insn(opq, rec_addr, 1); |
ship_gprs_out_t(opq, 1, opq->reg_t_d); |
gen_op_do_sched_delay(opq, 1); |
gen_op_clear_delay_insn(opq, 1); |
gen_op_do_jump_delay(opq, 1); |
gen_op_mark_loc(opq, 1); |
|
for(j = 0; j < NUM_T_REGS; j++) |
opq->reg_t[j] = 32; /* Out-of-range registers */ |
|
for(; rec_addr < rec_page; rec_addr += 4, opq = opq->next) { |
if(opq->prev) { |
opq->num_ops = 0; |
opq->num_ops_param = 0; |
} |
opq->jump_local = -1; |
opq->not_jump_loc = -1; |
|
/* Set flag for next instruction to be in a delay slot */ |
if((or32_opcodes[insn_index].func_unit == it_jump) || |
(or32_opcodes[insn_index].func_unit == it_branch)) { |
delay_insn = 1; |
delay_insn_type = or32_opcodes[insn_index].func_unit; |
opq->insn_addr = rec_addr; |
|
/* Check if this location is cross referenced */ |
if(opq->xref) { |
/* If the current address is cross-referenced, the temporaries shall be |
* in an undefined state, so we must assume that no registers reside in |
* them */ |
/* Ship out the current set of registers from the temporaries */ |
if(opq->prev) |
ship_gprs_out_t(opq->prev, 1, opq->reg_t); |
|
for(j = 0; j < NUM_T_REGS; j++) |
opq->reg_t[j] = 32; |
} |
} |
|
if(delay_insn) { |
dyn->carrys_delay_slot = 1; |
/* Quick hack to avoid dereferencing an uninitialised pointer below with |
* *opq->jump_local_loc */ |
if(opq->prev->jump_local == 2) { |
/* FIXME: In this case the delay slot instruction won't get executed */ |
opq->prev->xref->next = cpu_state.inf_xrefs; |
cpu_state.inf_xrefs = opq->prev->xref; |
} |
opq->prev->jump_local = 0; |
recompile_insn(opq, rec_addr, 0); |
|
/* Store the state of the temporaries */ |
memcpy(opq->next->reg_t, opq->reg_t_d, sizeof(opq->reg_t)); |
} |
|
dyn->dirty = 0; |
|
/* Store the state of the temporaries */ |
dyn->ts_bound[PAGE_LEN >> 2] = dyn->ts_during[j]; |
|
/* Ship temporaries out to the corrisponding registers */ |
ship_gprs_out_t(opq->prev, 1, reg_t); |
ship_gprs_out_t(opq->prev, 1, opq->reg_t); |
|
opq->num_ops = 0; |
opq->num_ops_param = 0; |
opq->jump_local = 0; |
opq->not_jump_loc = -1; |
opq->jump_local = -1; |
|
/* Insert code to jump to the next page */ |
gen_op_set_ts_current(opq, 1); |
gen_op_do_jump_pc(opq, 1); |
gen_op_do_jump(opq, 1); |
|
/* Generate the code */ |
gen_code(cpu_state.opqs, dyn); |
|
/* Patch the x-ref table */ |
for(xref = dyn->xrefs; xref; xref = xref->next) |
xref->dyn_addr = dyn->host_page + (unsigned int)xref->dyn_addr; |
/* Fix up the locations */ |
for(loc = dyn->locs; loc < &dyn->locs[PAGE_LEN / 4]; loc++) |
*loc += (unsigned int)dyn->host_page; |
|
cpu_state.opqs->ops_param[0] += (unsigned int)dyn->host_page; |
|
/* Search for page-local jumps */ |
for(opq = cpu_state.opqs; opq; opq = opq->next) { |
if(opq->jump_local) { |
if(opq->jump_local == 2) |
/* This cross reference was not patched above so patch it now */ |
opq->xref->dyn_addr = dyn->host_page + (unsigned int)opq->xref->dyn_addr; |
for(opq = cpu_state.opqs, j = 0; j < (PAGE_LEN / 4); opq = opq->next, j++) { |
if(opq->jump_local != -1) |
opq->ops_param[opq->jump_local] = |
(unsigned int)dyn->locs[opq->jump_local_loc >> 2]; |
|
*opq->jump_local_loc = (unsigned int)opq->xref->dyn_addr; |
if(opq->jump_local == 2) { |
/* Return the xref to the pool of infinite loop cross references */ |
opq->xref->next = cpu_state.inf_xrefs; |
cpu_state.inf_xrefs = opq->xref; |
} |
} |
if(opq->not_jump_loc != -1) |
opq->ops_param[opq->not_jump_loc] = (unsigned int)dyn->locs[j + 1]; |
|
/* Store the state of the temporaries into dyn->ts_bound */ |
dyn->ts_bound[j] = 0; |
if(opq->reg_t[0] < 32) |
dyn->ts_bound[j] = opq->reg_t[0]; |
if(opq->reg_t[1] < 32) |
dyn->ts_bound[j] |= opq->reg_t[1] << 5; |
if(opq->reg_t[2] < 32) |
dyn->ts_bound[j] |= opq->reg_t[2] << 10; |
|
dyn->ts_during[j] = 0; |
if(opq->reg_t_d[0] < 32) |
dyn->ts_during[j] = opq->reg_t_d[0]; |
if(opq->reg_t_d[1] < 32) |
dyn->ts_during[j] |= opq->reg_t_d[1] << 5; |
if(opq->reg_t_d[2] < 32) |
dyn->ts_during[j] |= opq->reg_t_d[2] << 10; |
} |
|
/* Patch the relocations */ |
1230,58 → 1024,12
/* Since eval_insn is called to get the instruction, runtime.sim.mem_cycles is |
* updated but the recompiler expectes it to start a 0, so reset it */ |
runtime.sim.mem_cycles = 0; |
|
#if 0 |
This is very usefull during debuging |
/* Count the number of infinite loop cross references (to make sure that we |
* returned them all) */ |
for(j = 0, xref = cpu_state.inf_xrefs; xref; xref = xref->next) { |
printf("Cross reference to %"PRIxADDR" is here\n", xref->or_addr); |
j++; |
} |
|
if(j != (PAGE_LEN / 4)) { |
fprintf(stderr, "Infinite loop cross references are leaked!\n"); |
fprintf(stderr, "Number in free list now: %i, meant to be: %i\n", j, PAGE_LEN / 4); |
exit(1); |
} |
#endif |
|
} |
|
struct x_ref *add_to_xrefs(struct dyn_page *dp, oraddr_t addr) |
{ |
struct x_ref *new; |
struct x_ref *cur; |
struct x_ref *prev; |
|
new = malloc(sizeof(struct x_ref)); |
|
new->ref = 1; |
new->or_addr = addr; |
|
/* Find the location to insert the address */ |
for(cur = dp->xrefs, prev = NULL; cur; prev = cur, cur = cur->next) { |
if(cur->or_addr > addr) |
break; |
} |
|
if(prev) |
prev->next = new; |
else |
dp->xrefs = new; |
new->next = cur; |
|
return new; |
} |
|
/* Returns non-zero if the jump is into this page, 0 otherwise */ |
static int find_jump_loc(oraddr_t j_ea, struct op_queue *opq) |
{ |
struct dyn_page *dp; |
int i; |
struct x_ref *xref = NULL; |
int *ops; |
|
/* Mark the jump as non page local if the delay slot instruction is on the |
* next page to the jump instruction. This should not be needed */ |
1294,37 → 1042,26
|
/* The jump is into the page currently undergoing dynamic recompilation */ |
|
/* FIXME: It would be great if we didn't have to do this (find_dynd...) (it is |
* already passed to recompile_page) */ |
dp = find_dynd_page(j_ea); |
|
/* Check if we have already x-refed this location */ |
if((xref = find_host_x_ref(dp->xrefs, j_ea))) { |
/* If we have already x-refed this location, don't x-ref it again */ |
if(!find_held_x_ref(dp->held_xrefs, j_ea)) { |
xref->ref++; |
add_to_held_xrefs(dp, xref); |
} |
} else { |
/* Stick this address into the page's x-ref table */ |
xref = add_to_xrefs(dp, j_ea); |
add_to_held_xrefs(dp, xref); |
} |
|
opq->xref = xref; |
|
/* If we haven't got to the location of the jump, everything is ok */ |
if(j_ea > opq->insn_addr) |
if(j_ea > opq->insn_addr) { |
/* Find the corissponding opq and mark it as cross referenced */ |
for(i = (j_ea - opq->insn_addr) / 4; i; i--) |
opq = opq->next; |
opq->xref = 1; |
return 1; |
} |
|
/* Insert temporary -> register code before the jump ea and register -> |
* temporary at the x-ref address */ |
while(opq->insn_addr > j_ea) opq = opq->prev; |
for(i = (opq->insn_addr - j_ea) / 4; i; i--) |
opq = opq->prev; |
|
if(!opq->prev) |
/* We're at the begining of a page, no need to do anything */ |
return 1; |
|
/* Found location, insert code */ |
|
ship_gprs_out_t(opq->prev, 1, opq->reg_t); |
|
for(i = 0; i < NUM_T_REGS; i++) { |
1334,21 → 1071,124
} |
} |
|
/* In the event of a page local jump that jumps backwards (l.j -4) the cross |
* reference to the target may not have existed when the jump-ed to adress was |
* recompiled and if the jump-ed to address is in the delay slot of another |
* jump instruction an op_jmp_imm_check operation must be generated and not an |
* op_jmp_imm operation */ |
for(ops = opq->ops, i = 0; i < opq->num_ops; i++, ops++) { |
if(*ops == op_jmp_imm_indx) |
*ops = op_jmp_imm_check_indx; |
else if(*ops == op_set_pc_preemt_indx) |
*ops = op_set_pc_preemt_check_indx; |
} |
opq->xref = 1; |
|
return 1; |
} |
|
static void gen_j_imm(struct op_queue *opq, oraddr_t off) |
{ |
int jump_local; |
int i; |
int reg_t[NUM_T_REGS]; |
|
off <<= 2; |
|
jump_local = find_jump_loc(opq->insn_addr + off, opq); |
|
if(ADDR_PAGE(opq->insn_addr) != ADDR_PAGE(opq->insn_addr + 4)) { |
gen_op_set_pc_delay_imm(opq, 1, off); |
gen_op_do_sched(opq, 1); |
return; |
} |
|
gen_op_set_delay_insn(opq, 1); |
gen_op_do_sched(opq, 1); |
|
/* Recompileing the delay slot instruction must see the temoraries being in |
* the state after the jump/branch instruction not before */ |
memcpy(reg_t, opq->reg_t, sizeof(reg_t)); |
memcpy(opq->reg_t, opq->reg_t_d, sizeof(reg_t)); |
|
/* Generate the delay slot instruction */ |
recompile_insn(opq, opq->insn_addr + 4, 1); |
|
memcpy(opq->reg_t, reg_t, sizeof(reg_t)); |
|
ship_gprs_out_t(opq, 1, opq->reg_t_d); |
|
gen_op_add_pc(opq, 1, (orreg_t)off - 8); |
gen_op_clear_delay_insn(opq, 1); |
gen_op_do_sched_delay(opq, 1); |
|
if(jump_local) { |
gen_op_jmp_imm(opq, 1, 0); |
opq->jump_local = opq->num_ops_param - 1; |
opq->jump_local_loc = (opq->insn_addr + (orreg_t)off) & (PAGE_LEN - 1); |
} else |
gen_op_do_jump(opq, 1); |
} |
|
static const generic_gen_op set_pc_delay_gpr[32] = { |
NULL, |
gen_op_move_gpr1_pc_delay, |
gen_op_move_gpr2_pc_delay, |
gen_op_move_gpr3_pc_delay, |
gen_op_move_gpr4_pc_delay, |
gen_op_move_gpr5_pc_delay, |
gen_op_move_gpr6_pc_delay, |
gen_op_move_gpr7_pc_delay, |
gen_op_move_gpr8_pc_delay, |
gen_op_move_gpr9_pc_delay, |
gen_op_move_gpr10_pc_delay, |
gen_op_move_gpr11_pc_delay, |
gen_op_move_gpr12_pc_delay, |
gen_op_move_gpr13_pc_delay, |
gen_op_move_gpr14_pc_delay, |
gen_op_move_gpr15_pc_delay, |
gen_op_move_gpr16_pc_delay, |
gen_op_move_gpr17_pc_delay, |
gen_op_move_gpr18_pc_delay, |
gen_op_move_gpr19_pc_delay, |
gen_op_move_gpr20_pc_delay, |
gen_op_move_gpr21_pc_delay, |
gen_op_move_gpr22_pc_delay, |
gen_op_move_gpr23_pc_delay, |
gen_op_move_gpr24_pc_delay, |
gen_op_move_gpr25_pc_delay, |
gen_op_move_gpr26_pc_delay, |
gen_op_move_gpr27_pc_delay, |
gen_op_move_gpr28_pc_delay, |
gen_op_move_gpr29_pc_delay, |
gen_op_move_gpr30_pc_delay, |
gen_op_move_gpr31_pc_delay }; |
|
static void gen_j_reg(struct op_queue *opq, unsigned int gpr, int insn_index, |
uint32_t insn) |
{ |
int i; |
int reg_t[NUM_T_REGS]; |
|
if(do_stats) |
gen_op_analysis(opq, 1, insn_index, insn); |
|
if(!gpr) |
gen_op_clear_pc_delay(opq, 1); |
else |
set_pc_delay_gpr[gpr](opq, 1); |
|
gen_op_do_sched(opq, 1); |
|
/* Recompileing the delay slot instruction must see the temoraries being in |
* the state after the jump/branch instruction not before */ |
memcpy(reg_t, opq->reg_t, sizeof(reg_t)); |
memcpy(opq->reg_t, opq->reg_t_d, sizeof(reg_t)); |
|
/* Generate the delay slot instruction */ |
gen_op_set_delay_insn(opq, 1); |
recompile_insn(opq, opq->insn_addr + 4, 1); |
|
memcpy(opq->reg_t, reg_t, sizeof(reg_t)); |
|
ship_gprs_out_t(opq, 1, opq->reg_t_d); |
|
gen_op_set_pc_pc_delay(opq, 1); |
gen_op_clear_delay_insn(opq, 1); |
gen_op_do_sched_delay(opq, 1); |
|
gen_op_do_jump_delay(opq, 1); |
} |
|
/*------------------------------[ Operation generation for an instruction ]---*/ |
/* FIXME: Flag setting is not done in any instruction */ |
/* FIXME: Since r0 is not moved into a temporary, check all arguments below! */ |
1493,15 → 1333,64
void gen_l_bf(struct op_queue *opq, int param_t[3], orreg_t param[3], |
int delay_slot) |
{ |
opq->jump_local = find_jump_loc(opq->insn_addr + (orreg_t)(param[0] << 2), opq); |
gen_op_check_flag(opq, 1, param[0] << 2); |
int i; |
if(do_stats) |
gen_op_analysis(opq, 1, 3, 0x10000000 | (param[0] & 0x03ffffff)); |
|
/* The temporaries are expected to be shiped out after the execution of the |
* branch instruction wether it branched or not */ |
if(opq->prev) { |
ship_gprs_out_t(opq->prev, 1, opq->reg_t); |
for(i = 0; i < NUM_T_REGS; i++) { |
opq->reg_t[i] = 32; |
opq->reg_t_d[i] = 32; |
} |
} |
|
if(ADDR_PAGE(opq->insn_addr) != ADDR_PAGE(opq->insn_addr + 4)) { |
gen_op_check_flag_delay(opq, 1, param[0] << 2); |
gen_op_do_sched(opq, 1); |
opq->not_jump_loc = -1; |
return; |
} |
|
gen_op_check_flag(opq, 1, 0); |
opq->not_jump_loc = opq->num_ops_param - 1; |
|
gen_j_imm(opq, param[0]); |
} |
|
void gen_l_bnf(struct op_queue *opq, int param_t[3], orreg_t param[3], |
int delay_slot) |
{ |
opq->jump_local = find_jump_loc(opq->insn_addr + (orreg_t)(param[0] << 2), opq); |
gen_op_check_not_flag(opq, 1, param[0] << 2); |
int i; |
if(do_stats) |
gen_op_analysis(opq, 1, 2, 0x0c000000 | (param[0] & 0x03ffffff)); |
|
/* The temporaries are expected to be shiped out after the execution of the |
* branch instruction wether it branched or not */ |
if(opq->prev) { |
ship_gprs_out_t(opq->prev, 1, opq->reg_t); |
for(i = 0; i < NUM_T_REGS; i++) { |
opq->reg_t[i] = 32; |
opq->reg_t_d[i] = 32; |
} |
} |
|
if(ADDR_PAGE(opq->insn_addr) != ADDR_PAGE(opq->insn_addr + 4)) { |
gen_op_check_not_flag_delay(opq, 1, param[0] << 2); |
gen_op_do_sched(opq, 1); |
opq->not_jump_loc = -1; |
return; |
} |
|
gen_op_check_not_flag(opq, 1, 0); |
opq->not_jump_loc = opq->num_ops_param - 1; |
|
gen_j_imm(opq, param[0]); |
|
/* The temporaries don't get shiped out if the branch is not taken */ |
memcpy(opq->next->reg_t, opq->reg_t, sizeof(opq->reg_t)); |
} |
|
static const generic_gen_op l_cmov_t_table[NUM_T_REGS][NUM_T_REGS][NUM_T_REGS] = { |
1609,8 → 1498,6
void gen_l_div(struct op_queue *opq, int param_t[3], orreg_t param[3], |
int delay_slot) |
{ |
/* Cross reference this location, since an ILLEGAL exception may happen */ |
find_jump_loc(opq->insn_addr, opq); |
if(!param[2]) { |
/* There is no option. This _will_ cause an illeagal exception */ |
if(!delay_slot) |
1654,8 → 1541,6
void gen_l_divu(struct op_queue *opq, int param_t[3], orreg_t param[3], |
int delay_slot) |
{ |
/* Cross reference this location, since an ILLEGAL exception may happen */ |
find_jump_loc(opq->insn_addr, opq); |
if(!param[2]) { |
/* There is no option. This _will_ cause an illeagal exception */ |
if(!delay_slot) |
1816,62 → 1701,37
void gen_l_j(struct op_queue *opq, int param_t[3], orreg_t param[3], |
int delay_slot) |
{ |
gen_op_set_pc_delay_imm(opq, 1, param[0] << 2); |
if(do_stats) |
gen_op_analysis(opq, 1, 0, param[0] & 0x03ffffff); |
|
/* Don't allocate a seporate x-ref structure for the infinite loop instruction |
* (l.j 0) */ |
if(!param[0]) { |
opq->jump_local = 2; |
opq->xref = cpu_state.inf_xrefs; |
opq->xref->or_addr = opq->insn_addr; |
cpu_state.inf_xrefs = opq->xref->next; |
return; |
} |
|
opq->jump_local = find_jump_loc(opq->insn_addr + (orreg_t)(param[0] << 2), opq); |
gen_j_imm(opq, param[0]); |
} |
|
void gen_l_jal(struct op_queue *opq, int param_t[3], orreg_t param[3], |
int delay_slot) |
{ |
/* It is highly likely that the location that was jumped to will `return'. |
* Therefore, insert a cross reference at that address */ |
find_jump_loc(opq->insn_addr + 8, opq); |
/* Store the return address */ |
gen_op_store_link_addr_gpr(opq, 1); |
|
gen_l_j(opq, param_t, param, delay_slot); |
if(do_stats) |
gen_op_analysis(opq, 1, 1, 0x04000000 | (param[0] & 0x03ffffff)); |
|
/* Store the return address */ |
gen_op_store_link_addr_gpr(opq, 1); |
gen_j_imm(opq, param[0]); |
} |
|
static const generic_gen_op set_pc_delay_t[NUM_T_REGS] = |
{ gen_op_set_pc_delay_t0, gen_op_set_pc_delay_t1, gen_op_set_pc_delay_t2 }; |
|
void gen_l_jr(struct op_queue *opq, int param_t[3], orreg_t param[3], |
int delay_slot) |
{ |
/* Treat all jumps as non page-local */ |
opq->jump_local = 0; |
|
if(!param[0]) { |
gen_op_clear_pc_delay(opq, 1); |
return; |
} |
|
set_pc_delay_t[param_t[0]](opq, 1); |
gen_j_reg(opq, param[0], 104, 0x14000000 | (param[0] << 11)); |
} |
|
void gen_l_jalr(struct op_queue *opq, int param_t[3], orreg_t param[3], |
int delay_slot) |
{ |
/* It is highly likely that the location that was jumped to will `return'. |
* Therefore, insert a cross reference at that address */ |
find_jump_loc(opq->insn_addr + 8, opq); |
|
gen_l_jr(opq, param_t, param, delay_slot); |
|
/* Store the return address */ |
gen_op_store_link_addr_gpr(opq, 1); |
|
gen_j_reg(opq, param[0], 105, 0x18000000 | (param[0] << 11)); |
} |
|
/* FIXME: Optimise all load instruction when the disposition == 0 */ |
2295,7 → 2155,12
void gen_l_rfe(struct op_queue *opq, int param_t[3], orreg_t param[3], |
int delay_slot) |
{ |
if(do_stats) |
gen_op_analysis(opq, 1, 12, 0x24000000); |
|
gen_op_prep_rfe(opq, 1); |
gen_op_do_sched(opq, 1); |
gen_op_do_jump(opq, 1); |
} |
|
/* FIXME: All store instructions should be optimised when the disposition = 0 */ |
2991,14 → 2856,16
void gen_l_sys(struct op_queue *opq, int param_t[3], orreg_t param[3], |
int delay_slot) |
{ |
/* Since we *know* that we *will* jump to the next instruction, insert an xref |
* there */ |
find_jump_loc(opq->insn_addr + 4, opq); |
if(do_stats) |
gen_op_analysis(opq, 1, 7, 0x20000000 | param[0]); |
|
if(!delay_slot) |
gen_op_prep_sys(opq, 1); |
else |
gen_op_prep_sys_delay(opq, 1); |
|
gen_op_do_sched(opq, 1); |
gen_op_do_jump(opq, 1); |
} |
|
/* FIXME: This will not work if the l.trap is in a delay slot */ |
3005,9 → 2872,8
void gen_l_trap(struct op_queue *opq, int param_t[3], orreg_t param[3], |
int delay_slot) |
{ |
/* Since we *know* that we *will* jump to the next instruction, insert an xref |
* there */ |
find_jump_loc(opq->insn_addr + 4, opq); |
if(do_stats) |
gen_op_analysis(opq, 1, 8, 0x22000000); |
|
if(!delay_slot) |
gen_op_prep_trap(opq, 1); |
3070,9 → 2936,6
void gen_l_invalid(struct op_queue *opq, int param_t[3], orreg_t param[3], |
int delay_slot) |
{ |
/* The program running on openrisc may decide to patch this location, so |
* just cross reference this location just-in-case */ |
find_jump_loc(opq->insn_addr, opq); |
if(!delay_slot) |
gen_op_illegal(opq, 1); |
else |
/trunk/or1ksim/cpu/or32/dyn_rec.h
21,48 → 21,38
#ifndef DYN_REC_H |
#define DYN_REC_H |
|
struct x_ref { |
void *dyn_addr; /* Recompiled address */ |
oraddr_t or_addr; /* The or address of the x-ref (physical) */ |
unsigned int ref; /* How many times the x-ref is referenced */ |
struct x_ref *next; |
}; |
|
/* Each dynamically recompiled page has one of these */ |
struct dyn_page { |
oraddr_t or_page; |
void *host_page; |
unsigned int host_len; |
int carrys_delay_slot; /* Is the delay-slot of the last insn on the next page?*/ |
int dirty; /* Is recompiled page invalid? */ |
struct x_ref *xrefs; /* what's referenced in this page */ |
struct x_ref **held_xrefs; /* The xrefs that this page holds */ |
int delayr; /* delayr of memory backing this page */ |
uint16_t ts[4096]; /* What registers the temporaries back */ |
uint16_t ts_during[2048]; /* What registers the temporaries back (during the |
* instruction) */ |
uint16_t ts_bound[2049]; /* What registers the temporaries back (on the |
* begining boundry of the instruction) */ |
void **locs; /* Openrisc locations in the recompiled code */ |
struct dyn_page *next; |
}; |
|
struct dyn_page *find_dynd_page(oraddr_t addr); |
struct x_ref *find_host_x_ref(struct x_ref *x_refs, oraddr_t addr); |
struct x_ref *add_to_xrefs(struct dyn_page *dp, oraddr_t addr); |
void recompile_page(struct dyn_page *dyn); |
struct dyn_page *new_dp(oraddr_t page); |
void add_to_held_xrefs(struct dyn_page *dp, struct x_ref *xref); |
void add_to_opq(struct op_queue *opq, int end, int op); |
void add_to_op_params(struct op_queue *opq, int end, unsigned long param); |
void *enough_host_page(struct dyn_page *dp, void *cur, unsigned int *len, |
unsigned int amount); |
void dirtyfy_page(struct dyn_page *dp); |
struct x_ref *find_held_x_ref(struct x_ref **held_xrefs, oraddr_t or_addr); |
void init_dyn_recomp(void); |
void jump_dyn_code(oraddr_t addr); |
void dump_xrefs(struct dyn_page *dp, FILE *f); |
void run_sched_out_of_line(int add_normal); |
void recheck_immu(int got_en_dis); |
void enter_dyn_code(oraddr_t addr, struct dyn_page *dp); |
|
extern void *rec_stack_base; |
|
#define IMMU_GOT_ENABLED 1 |
#define IMMU_GOT_DISABLED 2 |
|
#define NUM_RFE_HELD 100 |
|
#endif |
/trunk/or1ksim/cpu/or32/op_t_reg_mov_op.h
47,3 → 47,10
{ |
env->reg[REG] = t2; |
} |
|
__or_dynop void glue(glue(op_move_gpr, REG), _pc_delay)(void) |
{ |
env->pc_delay = env->reg[REG]; |
env->delay_insn = 1; |
} |
|
/trunk/or1ksim/cpu/or32/dyngen.c
47,6 → 47,8
static const char *gen_code_proto = |
"void gen_code(struct op_queue *opq, struct dyn_page *dp);\n" |
"void patch_relocs(struct op_queue *opq, void *host_page);\n" |
"\n" |
"#define op_mark_loc_indx 0\n" |
"\n"; |
|
static const char *c_sw_file_head = |
71,21 → 73,17
" unsigned int host_len = dp->host_len;\n" |
" void *host_cur = dp->host_page;\n" |
" oraddr_t pc = dp->or_page;\n" |
" struct x_ref *next_x_ref = dp->xrefs;\n" |
" void **loc = dp->locs;\n" |
"\n" |
" while(opq) {\n" |
" /* For now, only store offsets in the x-ref table */\n" |
" if(next_x_ref && (next_x_ref->or_addr == pc)) {\n" |
" next_x_ref->dyn_addr = (void *)(host_cur - dp->host_page);\n" |
" next_x_ref = next_x_ref->next;\n" |
" }\n" |
" if(opq->next)\n" |
" *loc++ = (void *)(host_cur - dp->host_page);" |
"\n" |
" /* Patch the dyn_addr of the xrefs for infinite loops */\n" |
" if(opq->jump_local == 2)\n" |
" opq->xref->dyn_addr = (void *)(host_cur - dp->host_page);\n" |
"\n" |
" for(i = 0, ops = opq->ops; i < opq->num_ops; i++, ops++) {\n" |
" switch(*ops) {\n"; |
" switch(*ops) {\n" |
" case op_mark_loc_indx:\n" |
" opq->ops_param[0] = host_cur - dp->host_page;\n" |
" break;\n"; |
|
static const char *c_sw_file_tail = |
" }\n" |
119,8 → 117,8
"#include \"%s\"\n" |
"\n" |
"void do_scheduler(void); /* FIXME: Remove */\n" |
"void analysis(struct iqueue_entry *current); /* FIXME: Remove */\n" |
"void do_sched_wrap(void); /* FIXME: Remove */\n" |
"void do_sched_wrap_delay(void); /* FIXME: Remove */\n" |
"void simprintf(oraddr_t stackaddr, unsigned long regparam); /* FIXME: Remove */\n" |
"\n" |
"void patch_relocs(struct op_queue *opq, void *host_page)\n" |
/trunk/or1ksim/cpu/or32/rec_i386.h
20,28 → 20,6
|
#include "common_i386.h" |
|
extern void *rec_stack_base; |
|
/* Sets the stack to a specified value */ |
static inline void or_longjmp(void *loc) __attribute__((noreturn)); |
static inline void or_longjmp(void *loc) |
{ |
/* We push a trampoline address (dyn_ret_stack_prot) onto the stack to be able |
* to detect if any ret instructions found their way into an operation. */ |
asm("\tmovl %0, %%eax\n" |
"\tmovl %1, %%esp\n" |
"\tmovl $%2, %%ebp\n" |
"\tpush $dyn_ret_stack_prot\n" |
"\tpush $dyn_ret_stack_prot\n" |
"\tpush $dyn_ret_stack_prot\n" |
"\tjmp *%%eax\n" |
: |
: "m" (loc), |
"m" (rec_stack_base), |
"m" (cpu_state)); |
} |
|
|
/* Initialises the recompiler (architechture specific). */ |
static inline void init_dyn_rec(void) |
{ |
/trunk/or1ksim/cpu/or32/op_i386.h
25,15 → 25,12
|
/* Handles the scheduler and PC updateing. Yes, useing MMX is a requirement. It |
* just won't change. This must be as compact as possible */ |
static inline void handle_sched(void) |
{ |
asm("paddd %%mm1, %%mm0\n" |
"\tmovd %%mm0, %%eax\n" |
"\ttestl %%eax, %%eax\n" |
"\tjg .no_need_run_sched\n" |
"\tcall do_sched_wrap\n" |
"\t.no_need_run_sched:" : : ); |
} |
#define HANDLE_SCHED(func, jmp) asm("paddd %%mm1, %%mm0\n" \ |
"\tmovd %%mm0, %%eax\n" \ |
"\ttestl %%eax, %%eax\n" \ |
"\tjg ." jmp "\n" \ |
"\tcall "#func"\n" \ |
"\t." jmp ":" : : ) |
|
static inline int32_t do_cycles(void) |
{ |
55,3 → 52,23
runtime.sim.mem_cycles = 0; |
} |
|
static inline void or_longjmp(void *loc) __attribute__((noreturn)); |
static inline void or_longjmp(void *loc) |
{ |
/* We push a trampoline address (dyn_ret_stack_prot) onto the stack to be able |
* to detect if any ret instructions found their way into an operation. */ |
asm("\tmovl %0, %%eax\n" |
"\tmovl %1, %%esp\n" |
"\tmovl $%2, %%ebp\n" |
"\tpush $dyn_ret_stack_prot\n" |
"\tpush $dyn_ret_stack_prot\n" |
"\tpush $dyn_ret_stack_prot\n" |
"\tjmp *%%eax\n" |
: |
: "m" (loc), |
"m" (rec_stack_base), |
"m" (cpu_state)); |
} |
|
|
|
/trunk/or1ksim/cpu/common/execute.h
58,14 → 58,6
/* Micro operation queue. Only used to speed up recompile_page */ |
struct op_queue *opqs; |
|
/* Cross references that reference the jumps from the infinite loop |
* instruction (l.j 0) */ |
struct x_ref *inf_xrefs; |
|
/* rfe cross-reference cache */ |
struct x_ref **rfe_held_xrefs; |
unsigned int rfe_held_xref_pos; |
|
/* Set if all temporaries are stored */ |
int ts_current; |
|
/trunk/or1ksim/cpu/or1k/except.c
138,9 → 138,9
#if DYNAMIC_EXECUTION |
/* In immu_translate except_handle is called with except_handle(..., virtaddr) */ |
/* Add the immu miss delay to the cycle counter */ |
if(!immu_ex_from_insn) |
if(!immu_ex_from_insn) { |
mtspr(SPR_EPCR_BASE, get_pc() - (cpu_state.delay_insn ? 4 : 0)); |
else |
} else |
/* This exception came from an l.mtspr instruction in which case the pc |
* points to the l.mtspr instruction when in acutal fact, it is the next |
* instruction that would have faulted/missed. ea is used instead of |
148,9 → 148,12
* in the delay slot of a page local jump the fault must happen on the |
* instruction that was jumped to. This is handled in recheck_immu. */ |
mtspr(SPR_EPCR_BASE, ea); |
run_sched_out_of_line(immu_ex_from_insn); |
immu_ex_from_insn = 0; |
break; |
run_sched_out_of_line(immu_ex_from_insn); |
/* Save the registers that are in the temporaries */ |
if(!cpu_state.ts_current) |
upd_reg_from_t(cpu_state.pc, !immu_ex_from_insn); |
immu_ex_from_insn = 0; |
break; |
#endif |
/* All these exceptions happen during a simulated instruction */ |
case EXCEPT_BUSERR: |
165,6 → 168,14
* function jumps out to the exception vector the scheduler would never have |
* a chance to run, therefore run it now */ |
run_sched_out_of_line(1); |
/* Save the registers that are in the temporaries */ |
if(!cpu_state.ts_current) { |
if(cpu_state.delay_insn && |
(ADDR_PAGE(cpu_state.pc) == ADDR_PAGE(cpu_state.pc - 4))) |
upd_reg_from_t(cpu_state.pc - 4, 0); |
else |
upd_reg_from_t(cpu_state.pc, 0); |
} |
#endif |
mtspr(SPR_EPCR_BASE, cpu_state.pc - (cpu_state.delay_insn ? 4 : 0)); |
break; |
188,6 → 199,10
* of the last instruction executed and not the next one, to which the pc |
* now points to */ |
cpu_state.pc -= 4; |
|
/* Save the registers that are in the temporaries */ |
if(!cpu_state.ts_current) |
upd_reg_from_t(cpu_state.pc, 1); |
#endif |
break; |
} |
199,16 → 214,12
|
/* Complex/simple execution strictly don't need this because of the |
* next_delay_insn thingy but in the dynamic execution modell that doesn't |
* exist and thus cpu_state.insn_delay would stick in the exception handler |
* exist and thus cpu_state.delay_insn would stick in the exception handler |
* causeing grief if the first instruction of the exception handler is also in |
* the delay slot of the previous instruction */ |
cpu_state.delay_insn = 0; |
|
#if DYNAMIC_EXECUTION |
/* Save the registers that are in the temporaries */ |
if(!cpu_state.ts_current) |
upd_reg_from_t(cpu_state.pc); |
|
cpu_state.pc = except_vector; |
cpu_state.ts_current = 0; |
jump_dyn_code(except_vector); |