OpenCores
URL https://opencores.org/ocsvn/forwardcom/forwardcom/trunk

Subversion Repositories forwardcom

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /forwardcom/trunk
    from Rev 25 to Rev 26
    Reverse comparison

Rev 25 → Rev 26

/fetch.sv
0,0 → 1,600
//////////////////////////////////////////////////////////////////////////////////
// Engineer: Agner Fog
//
// Create Date: 2020-05-03
// Last modified: 2021-07-30
// Module Name: fetch
// Project Name: ForwardCom soft core
// Target Devices: Artix 7
// Tool Versions: Vivado v. 2020.1
// License: CERN-OHL-W v. 2 or later
// Description: Instruction fetch and unconditional jump, call, and return
//
//////////////////////////////////////////////////////////////////////////////////
`include "defines.vh"
 
// code address to jump to when reset button is pressed
parameter max_loader_size = (`MAX_LOADER_SIZE) << 2; // loader size in words
parameter code_memory_start = 2**`CODE_ADDR_START;
parameter code_memory_size = 2**(`CODE_ADDR_WIDTH+2);
//parameter code_memory_end = code_memory_start + code_memory_size;
parameter loader_start_address = code_memory_size - max_loader_size; // address of loader relative to code memory start, in bytes
 
// upper 7 bits of instruction word identifying unconditional jump or call
parameter instruction_jump_uncond = 7'b0111100; // next bit is 1 for call, 0 for jump. The rest is 24 bits signed offset
// upper 11 bits of instruction word identifying return instruction
parameter instruction_return = 11'b01110111110;
// upper 11 bits of instruction word identifying sys_return instruction
parameter instruction_sys_return = 11'b01111111110;
// upper 4 bits of any 1-word control transfer instruction
parameter instruction_jumpa = 4'b0111;
// upper 8 bits of any 2-word control transfer instruction
parameter instruction_jump2w = 8'b10101000;
// upper 8 bits of any 3-word control transfer instruction
parameter instruction_jump3w = 8'b11001000;
// bit OP1 for push and pop instructions (= 56,57)
parameter instruction_push_pop = 6'b111000;
// upper 11 bits of instruction word identifying read_perfs serializing instruction. Need M bit too
parameter instruction_read_perfs = 11'b01000100101;
 
 
// Fetch module: fetch instructions from memory or code cache
module fetch
( input clock, // system clock (100 MHz)
input clock_enable, // clock enable. Used when single-stepping
input reset, // system reset.
input restart, // restart running program
input valid_in, // valid data from code cache ready
input stall_in, // a later stage in pipeline is stalled
input jump_in, // a jump target is coming from the ALU. jump_pointer has been sent to the code cache
input nojump_in, // signal from ALU that the jump target is the next instruction
input [`CODE_ADDR_WIDTH-1:0] jump_pointer, // jump target from ALU
input [`CODE_DATA_WIDTH-1:0] read_data, // data from code cache
input [`CODE_ADDR_WIDTH-1:0] return_pop_data,// Return address popped here at return instruction
output reg [`CODE_ADDR_WIDTH-2:0] read_addr_out, // read address relative to code memory start
output reg read_enable_out, // code cache read enable
output reg valid_out, // An instruction is ready for output to decoder
output reg jump_out, // A jump instruction is bypassing the pipeline
output reg [`CODE_ADDR_WIDTH-1:0] instruction_pointer_out, // address of current instruction
output reg [95:0] instruction_out, // current instruction, up to 3 words long
output reg call_e_out, // Executing call instruction. push_data contains return address
output reg return_e_out, // Executing return instruction. return address is available in advance on pop_data
output reg stall_predict_out, // Predict that decoder will use multiple clock cycles
output reg [`CODE_ADDR_WIDTH-1:0] call_push_data_out, // Return address pushed here at call instruction
output reg [31:0] debug1_out // temporary debug output
);
 
// Efficient handling of jumps, calls, and returns:
// Unconditional jumps, calls, and returns are executed directly in the fetch unit rather
// than waiting for the instruction to go through the pipeline.
// Conditional and indirect jumps must go to the ALU. The jump target address is fed from the ALU
// directly to the code cache in order to save one clock cycle.
// Direct calls and returns are communicating directly with the call stack.
// Indirect calls are handled in both fetch unit and ALU. The return address is pushed on the
// call stack by the fecth module while the target address comes from the ALU.
// Return addresses are obtained from the call stack. It takes one clock to send a call or return
// request to the call stack and another clock to retrieve the return address from the stack.
// Therefore, it is not possible to execute a return in the first clock cycle after another
// call or return. The fetch module does not check for this because the second return is delayed
// for a clock cycle anyway to wait for the target to be fetched from the code cache.
 
parameter fetch_buffer_size = 8; // number of 32-bit words in instruction fetch buffer
 
// Name suffixes on local variables:
// 0: relates to the instruction that is currently in output registers
// 1: relates to the instruction that is being generated in the current clock cycle
// 2: relates to the instruction that will be generated in the next clock cycle
 
reg [0:fetch_buffer_size-1][31:0] fetch_buffer; // instruction buffer, (fetch_buffer_size) * 32-bit words
reg unsigned [3:0] valid_words0; // number of valid 32-bit words in fetch_buffer
logic unsigned [3:0] valid_words1; // number of valid words in fetch_buffer in next clock cycle
logic unsigned [1:0] instruction_length0; // length of current instruction, in 32-bit words
logic unsigned [1:0] instruction_length1; // length of next instruction, in 32-bit words
logic unsigned [1:0] instruction_length2; // length of 2. next instruction, in 32-bit words
logic instruction_ready0; // current instruction has been fetched
logic instruction_ready1; // instruction 1 will be dispatched in next clock cycle
 
logic [1:0] buffer_action; // 0: idle. nothing dispatched. buffer is full or waiting for data
// 1: fill buffer. nothing dispatched. new data arriving from code cache
// 2: dispatch. instruction 0 is dispatched to the pipeline. shift down data
// 3: dispatch and fill.
logic shift_out0; // instruction 0 is dispatched in this clock cycle and fetch_buffer is shifted to get the next instruction to position 0
logic unsigned [1:0] dispatch_length0; // length of dispatched instruction
logic send_next; // send an address to code cache. true if buffer is sure not to overflow in next two clocks
logic [3:0] fetch_buffer_pos; // position where to write to fetch_buffer from cache
 
logic early_jump; // jump instruction detected in instruction 1 or 2
logic conditional_jump; // a conditional or indirect jump or call detected in instruction 1. Wait for ALU to find target
logic [1:0] call_instruction; // 1: any kind of call or trap detected in instruction 1 or 2. Push return address on stack
// 2: return or system return instruction detected. pop return address from stack
logic unsigned [`CODE_ADDR_WIDTH-1:0] early_jump_addr; // target address for early jump
reg unsigned [`CODE_ADDR_WIDTH:0] jump_target; // save jump target address. may be calculated here for unconditional jump, or input from ALU for conditional jump
logic unsigned [`CODE_ADDR_WIDTH:0] reset_target;// Address of loader or restart code
reg restart_underway; // remember restarting is in process
 
logic unsigned [`CODE_ADDR_WIDTH-1:0] return_addr; // return address after call instruction
logic [31:0] word1; // first word of instruction 1
logic unsigned [`CODE_ADDR_WIDTH-1:0] instruction_pointer1; // address of instruction 1
 
reg [3:0] jump_case; // for debug display only. may be removed
// It takes two clock cycles to fetch data from the code cache: one clock to send an address to
// the code cache, and one clock to send the data from the code cache.
// The following three shift registers are keeping track for the data that is underway:
// next_underway is tracking sequential code, target_underway is tracking jump targets,
// and wait_for_target tells that we are waiting for a jump target to be calculated and fetched.
 
reg [1:0] next_underway; // target_underway is a shift register indicating that code words are underway from the code cache
// next_underway is shifted right with zero extension
// next_underway[0]: data arrived from code cache
// next_underway[1]: next address has been sent to code cache
 
reg [2:0] target_underway; // target_underway is a shift register indicating that a jump target is underway:
// target_underway is shifted right with zero extension
// 100: system reset
// 010: wait for target to be fetched from code cache
// 001: target code is inserted in fetch_buffer. Clear wait_for_target
 
reg wait_for_target; // wait_for_target indicates that an unconditional jump, call, or return
// is waiting for the target to be fetched from the code cache
 
reg wait_for_jump; // wait_for_jump indicates that a conditional or indirect jump or call
// has been dispatched and is waiting for the ALU to deliver the target address
 
 
// Analyze the status of fetch_buffer:
always_comb begin
 
// if (restart == 0): Start address is loader address
// if (restart == 1): Start address is restart address = loader address + 1
reset_target = {loader_start_address >> 3, (restart | restart_underway)};
// Find length and position of instruction 0
if (valid_words0 > 0) begin
instruction_length0 = fetch_buffer[0][31] ? fetch_buffer[0][31:30] : 2'b01; // the length of instruction 0
// instruction 0 is ready if all words belonging to the instruction are fetched.
instruction_ready0 = (valid_words0 >= instruction_length0) && !target_underway[0] && !wait_for_target;
shift_out0 = instruction_ready0 & !stall_in & !reset & (!wait_for_jump | nojump_in); // instruction 0 will be dispatched in this clock cycle
end else begin
// First instruction has not been fetched yet
instruction_length0 = 0;
instruction_ready0 = 0;
shift_out0 = 0;
end
// number of words dispatched
if (shift_out0)
dispatch_length0 = instruction_length0;
else
dispatch_length0 = 0;
// check if we can fill the buffer
if ((target_underway[0] | early_jump | jump_in) & valid_in) begin // overwrite buffer with new jump target
send_next = 1;
fetch_buffer_pos = 0;
end else begin
if (shift_out0) begin
fetch_buffer_pos = valid_words0 - instruction_length0;
end else begin
fetch_buffer_pos = valid_words0;
end
// determine whether we will fetch the next doubleword from the code cache.
// maybe this can be tweaked a little better, but make sure the fetch buffer cannot overflow in case of stalls
if (next_underway[0] & valid_in & next_underway[1]) begin
send_next = fetch_buffer_pos < fetch_buffer_size - 6;
end else if ((next_underway[0] & valid_in) | next_underway[1]) begin
send_next = fetch_buffer_pos < fetch_buffer_size - 4;
end else begin
send_next = fetch_buffer_pos < fetch_buffer_size - 2;
end
end
 
buffer_action[0] = (next_underway[0] | target_underway[0]) & valid_in; // fill buffer
buffer_action[1] = shift_out0; // instruction 0 dispatched. shift down buffer
 
// predict if the next instruction, i.e. instruction 1, will be ready in next clock cycle
if (target_underway[0] & valid_in) begin
if (jump_target[0])
valid_words1 = 1; // jumping to an odd address. use only the upper half of read_data
else
valid_words1 = 2; // jumping to even address. use 64 bits read_data
end else if (wait_for_target) begin
valid_words1 = 0;
end else begin
if (next_underway[0] & valid_in)
valid_words1 = valid_words0 - dispatch_length0 + 2;
else
valid_words1 = valid_words0 - dispatch_length0;
end
// Find first word of instruction 1 for the sake of early jump detection and predecoding.
// (Here, I am shortening the critical path
// valid_words0 -> instruction_length0 -> instruction_ready0 -> shift_out0 -> dispatch_length0
// -> valid_words1 -> word1 -> instruction_length1 -> early_jump_addr -> instruction_pointer_out
// by postponing "if (valid_words1 != 0)")
if (target_underway[0] && valid_in) begin // get instruction1 from jump target
if (jump_target[0]) begin
word1 = read_data[63:32]; // jumping to odd address
end else begin
word1 = read_data[31:0];
end
instruction_pointer1 = jump_target;
instruction_length1 = word1[31] ? word1[31:30] : 2'b01; // length of second instruction
end else if (valid_words0 > instruction_length0) begin // instruction 1 is already in buffer
word1 = fetch_buffer[instruction_length0];
instruction_pointer1 = instruction_pointer_out + instruction_length0;
instruction_length1 = word1[31] ? word1[31:30] : 2'b01; // length of second instruction
end else if (valid_words0 == instruction_length0) begin // instruction 1 is going into buffer in this clock cycle
word1 = read_data[31:0];
instruction_pointer1 = instruction_pointer_out + instruction_length0;
instruction_length1 = word1[31] ? word1[31:30] : 2'b01; // length of second instruction
 
end else if (valid_words0 > 0) begin // instruction 1 is partially in buffer
word1 = fetch_buffer[0];
instruction_pointer1 = instruction_pointer_out;
instruction_length1 = word1[31] ? word1[31:30] : 2'b01; // length of second instruction
end else begin
word1 = 0;
instruction_pointer1 = 0; //64'HXXXXXXXXXXXXXXXX;
instruction_length1 = 3; // indicate not ready
end
 
 
// Look for jump, call, and return instructions in instruction 1
// in order to fetch target as early as possible.
// This is done in the following way:
// Unconditional jumps, calls, and returns are handled as early as possible in order
// to fetch early from the target address and thereby save time. However,
// we have to check if there is a preceding jump or call in a preceding position in
// fetch_buffer before we execute a jump, call, or return in position 2.
// Conditional and indirect jumps are detected when they are in position 0 in fetch_buffer,
// and we have to wait for the ALU to find the target address.
// Indirect calls are are also detected when they are in position 0 in fetch_buffer:
// the return address is pushed on the call stack while we wait for the ALU to find the target address.
// The following variables tell what we have found here:
// early_jump: An unconditional jump, call, or return detected in position 1 or 2.
// conditional_jump: A conditional or indirect jump or call is detected. Wait for ALU to find target
// call_instruction: 1: any kind of call detected. Push return address on stack
// 2: a return or sys_return instruction detected. Pop return address from stack
conditional_jump = 0;
early_jump = 0;
early_jump_addr = 0;
call_instruction = 0;
return_addr = 0;
instruction_ready1 = (valid_words1 >= instruction_length1) & !reset && (!wait_for_jump | nojump_in); // instruction 1 will be dispatched in next clock cycle
//valid_out <= valid_words1 >= instruction_length1 & !reset && !early_jump & target_underway[2:1] == 0 & (!wait_for_jump | nojump_in);
 
if (valid_words1 != 0 && word1[31:28] == instruction_jumpa) begin
// Any single-word control transfer instruction is underway
if ((word1[31:25] == instruction_jump_uncond) & !stall_in & (!wait_for_jump | nojump_in)) begin
// unconditional jump or call instruction found in instruction 1
early_jump = 1;
early_jump_addr = $signed(word1[23:0]) + instruction_pointer1 + 1; // add 24-bit signed offset to address of end of instruction
call_instruction = word1[24]; // 0: unconditional jump, 1: direct call
return_addr = instruction_pointer1 + instruction_length1; // return address for call instruction
end else if ((word1[31:21] == instruction_return || word1[31:21] == instruction_sys_return) & !stall_in & (!wait_for_jump | nojump_in)) begin
// a return instruction is found in the first instruction
early_jump = 1;
early_jump_addr = return_pop_data; // get return address from call stack
call_instruction = 2; // 2 means return instruction
return_addr = 0;
end else if ((word1[`OP1] == `IJ_JUMP_INDIRECT_MEM+1 || word1[`OP1] == `IJ_JUMP_RELATIVE+1 || word1[`OP1] == `IJ_SYSCALL) & !stall_in & (!wait_for_jump | nojump_in)) begin
// an indirect call or system call instruction is found in the first instruction
early_jump = 0;
early_jump_addr = 0;
return_addr = instruction_pointer1 + instruction_length1; // return address to push on call stack
conditional_jump = 1; // this instruction must go the the ALU
if (word1[`OP1] == `IJ_TRAP && word1[`MODE] == 7) begin
// Trap or breakpoint in format 1.7C (IJ_TRAP == IJ_SYSCALL)
// The breakpoint instruction should not push a return address on the call stack as long
// as it only activates single step mode without calling any interrupt service routine.
// Note: this code must be changed if any traps or trap instructions go to an interrupt
// service routine that ends with a return or a system return.
// Setting call_instruction to 1 here will make the next return instruction fail if the
// trap does not end with a return.
call_instruction = 0;
end else begin
// All other indirect call and system call instructions
call_instruction = 1;
end
end else begin
// other conditional or indirect jump instruction found in instruction 1
early_jump = 0;
early_jump_addr = 0;
call_instruction = 0;
conditional_jump = 1; // this instruction must go the the ALU
return_addr = 0;
end
 
end else if (valid_words1 > 1 && word1[31:24] == instruction_jump2w) begin
// any double-word jump or call instruction found in the instruction 1
early_jump = 0;
early_jump_addr = 0;
conditional_jump = 1; // this instruction must go the the ALU
if (word1[5:0] == `IJ_JUMP_INDIRECT_MEM + 1 // indirect call
|| word1[5:0] == `IJ_JUMP_RELATIVE + 1 // call with relative pointer
|| word1[5:0] == `IJ_SYSCALL // system call
|| word1[`OP1] == 7 // system call
) begin
call_instruction = !stall_in & (!wait_for_jump | nojump_in); // push return address on stack
return_addr = instruction_pointer1 + instruction_length1;
end else begin
call_instruction = 0;
return_addr = 0;
end
end else if (valid_words1 > 2 && word1[31:24] == instruction_jump3w) begin
// any triple-word jump or call instruction found in first instruction
early_jump = 0;
early_jump_addr = 0;
conditional_jump = 1; // this instruction must go the the ALU
if (word1[5:0] == `IJ_JUMP_INDIRECT_MEM+1 // 64-bit call
|| word1[5:0] == `IJ_SYSCALL // system call
) begin
call_instruction = !stall_in & (!wait_for_jump | nojump_in); // push return address on stack
return_addr = instruction_pointer1 + instruction_length1;
end else begin
call_instruction = 0;
return_addr = 0;
end
end else if (valid_words1 != 0 && word1[31:21] == instruction_read_perfs && word1[`M]) begin
// the serializing instruction read_perfs must flush the pipeline.
// Use the conditional jump mechanism for this, and give a nojump_in when ready to resume feeding the pipeline
conditional_jump = 1; // serializing instruction read_perfs
end
end
 
 
// Generate code for all possible inputs to each word in fetch_buffer.
// The current instruction is removed, and the rest of fetch_buffer is shifted down to make space for next 2 words of code
// Data from the code cache are inserted into the first vacant space of fetch_buffer
genvar i;
generate
// generation loop for each word in fetch_buffer
for (i = 0; i < fetch_buffer_size; i++) begin
always_ff @(posedge clock) if (clock_enable) begin
 
if (i < fetch_buffer_pos && buffer_action[1]) begin
// instruction 0 is being dispatched. shift down
fetch_buffer[i][31:0] <= fetch_buffer[i+instruction_length0][31:0];
end else if (i == fetch_buffer_pos && buffer_action[0]) begin
// load first word
if (target_underway[0] & jump_target[0]) begin
// jumping to an odd address. use only upper half of read_data
fetch_buffer[i][31:0] <= read_data[63:32];
end else begin
// load first word
fetch_buffer[i][31:0] <= read_data[31:0];
end
end else if (i == fetch_buffer_pos + 1 && buffer_action[0]) begin
// load second word
fetch_buffer[i][31:0] <= read_data[63:32];
end
end
end
endgenerate
 
 
// Calculate read_addr and instruction_pointer in next clock cycle
// The shift registers named target_underway and wait_for_target indicate if we are waiting for a jump target
always_ff @(posedge clock) if (clock_enable) begin
 
valid_words0 <= valid_words1;
read_enable_out <= send_next;
if (!stall_in) begin
// send instruction to the decoder
valid_out <= instruction_ready1 && !early_jump;
 
// Unconditional jumps are bypassing the pipeline
jump_out <= early_jump;
end else if (instruction_ready1 && !early_jump) begin
// Turn valid_out on, but not off, when there is stall_in.
// This is necessary if there is a stall one instruction before a fast jump,
// causing the jump bubble to be filled. Otherwise, it skips the first instruction after the jump
valid_out <= 1;
end
jump_case <= 0;
if (reset) begin
// reset button pressed
if (restart) restart_underway <= 1;
next_underway <= 2'b00;
target_underway <= 3'b100;
wait_for_target <= 1;
wait_for_jump <= 0;
jump_target <= reset_target;
read_addr_out <= reset_target >> 1;
instruction_pointer_out <= reset_target;
valid_words0 <= 0;
read_enable_out <= 0;
valid_out <= 0;
jump_out <= 0;
end else if (target_underway[2]) begin
// first clock after reset
jump_case <= 1;
next_underway <= 2'b00;
target_underway <= {1'b0,target_underway[2:1]}; // shift right to indicate when jump target arrives
wait_for_target <= 1; // skip all instructions until jump target arrives
instruction_pointer_out <= reset_target;
jump_target <= reset_target;
read_addr_out <= reset_target >> 1;
end else if (early_jump) begin
// unconditional jump detected in instruction 1
jump_case <= 2;
next_underway <= 2'b00;
target_underway <= 3'b010; // wait 2 clock cycles for target
read_addr_out <= early_jump_addr >> 1;
jump_target <= early_jump_addr;
restart_underway <= 0;
if (!stall_in) begin
wait_for_target <= 1; // skip all instructions until jump target arrives
wait_for_jump <= 0;
instruction_pointer_out <= early_jump_addr;
end
 
end else if (conditional_jump && (instruction_ready1 & !stall_in || shift_out0)) begin
// conditional jump detected in instruction 1
jump_case <= 3;
next_underway <= {send_next,next_underway[1]}; // shift right to indicate when data arrives
target_underway <= 3'b000; // wait 2 clock cycles for target
// read address is two words ahead because reading takes 2 clock cycles
if (send_next) begin
read_addr_out <= read_addr_out + 1;
end
wait_for_jump <= 1; // wait for jump target address from ALU
jump_target <= 0;
wait_for_target <= 0;
if (shift_out0) begin
// point to next instruction
instruction_pointer_out <= instruction_pointer_out + instruction_length0;
end
/*if (!stall_in) begin
jump_target <= 0;
wait_for_target <= 0;
end*/
end else if (target_underway[0] & valid_in) begin
// a jump target has arrived from code cache. (ignore any subsequent jump instructions)
restart_underway <= 0;
jump_case <= 4;
next_underway <= {send_next, next_underway[1]}; // shift right to indicate when data arrives
wait_for_target <= 0; // stop waiting for jump target
target_underway <= 3'b000;
read_addr_out <= read_addr_out + 1;
if (!stall_in) begin
instruction_pointer_out <= jump_target; // set address of current instruction
end
end else if (jump_in & wait_for_jump & valid_words1 >= instruction_length1) begin
// a conditional or indirect jump instruction has been executed in ALU
// the ALU has sent the target address directly to the code cache to save one clock cycle
//next_underway <= 2'b00;
restart_underway <= 0;
jump_case <= 5;
next_underway <= {send_next, next_underway[1]}; // shift right to indicate when data arrives
target_underway <= 3'b001; // wait one clock cycle for target
if (!stall_in) begin
wait_for_jump <= 0;
read_addr_out <= (jump_pointer >> 1) + 1;
wait_for_target <= 1;
jump_target <= jump_pointer;
instruction_pointer_out <= jump_pointer;
end
 
end else if (nojump_in & wait_for_jump) begin
// a conditional or indirect jump instruction has been executed in ALU
// and the target is the next instruction
//next_underway <= {send_next,next_underway[1]}; // shift right to indicate when data arrives
restart_underway <= 0;
jump_case <= 6;
next_underway <= {send_next, next_underway[1]}; // shift right to indicate when data arrives
target_underway <= 3'b000; // wait two clock cycles for target
wait_for_target <= 0;
wait_for_jump <= 0;
if (send_next) begin
read_addr_out <= read_addr_out + 1;
end
// if (!stall_in) begin
if (shift_out0) begin
instruction_pointer_out <= instruction_pointer_out + instruction_length0;
end
end else begin
// no new jump instruction
restart_underway <= 0;
jump_case <= 7;
next_underway <= {send_next,next_underway[1]}; // shift right to indicate when data arrives
target_underway <= {1'b0,target_underway[2:1]}; // shift right to indicate when jump target arrives
// make ready for next read. Least significant address bit ignored because data bus is double size
// read address is two words ahead because reading takes 2 clock cycles
if (send_next) begin
read_addr_out <= read_addr_out + 1;
end
if (shift_out0) begin
// point to next instruction
instruction_pointer_out <= instruction_pointer_out + instruction_length0;
end
end
// communicate with call stack as soon as a call or return instruction is detected.
// checking !target_underway[0] && !wait_for_target[0] to avoid seding the call_e_out
// or return_e_out multiple times
if (reset || target_underway[2:1] != 0) begin
call_e_out <= 0;
return_e_out <= 0;
call_push_data_out <= 0;
end else if (call_instruction == 1) begin
call_e_out <= 1;
return_e_out <= 0;
call_push_data_out <= return_addr;
end else if (call_instruction == 2) begin
return_e_out <= 1;
call_e_out <= 0;
call_push_data_out <= 0;
end else begin
call_e_out <= 0;
call_push_data_out <= 0;
return_e_out <= 0;
end
// predict that decoder will use multiple clock cycles for push and pop instructions
if (valid_words1 != 0 && word1[`IL] == 2'b01 && (word1[`MODE] == 3'b011 || (word1[`MODE] == 3'b00 && word1[`M]))
&& word1[`OP1] >> 1 == instruction_push_pop >> 1 && shift_out0) begin
stall_predict_out <= 1; // mode = 1.3 or 1.8, op1 = 56 or 57
end else begin
stall_predict_out <= 0;
end
// collect various signals for debugging purpose
debug1_out[0] <= early_jump;
debug1_out[1] <= conditional_jump;
debug1_out[3] <= stall_in;
debug1_out[6:4] <= valid_words1[2:0];
debug1_out[7] <= instruction_ready1;
debug1_out[8] <= buffer_action[0]; // fill buffer
debug1_out[9] <= buffer_action[1]; // shift_out0;
debug1_out[11:10]<= dispatch_length0;
debug1_out[15:12]<= fetch_buffer_pos;
debug1_out[16] <= send_next;
debug1_out[17] <= instruction_ready0;
debug1_out[18] <= nojump_in;
debug1_out[19] <= jump_in;
end
// register variables are assigned to avoid an extra clock delay:
assign debug1_out[21:20] = next_underway;
assign debug1_out[23:22] = target_underway[1:0];
assign debug1_out[27:24] = jump_case; // jump handling case
assign debug1_out[28] = wait_for_target;
assign debug1_out[29] = wait_for_jump;
assign debug1_out[31] = valid_out;
 
 
// output instruction, 1-3 words
assign instruction_out[31:0] = fetch_buffer[0][31:0];
assign instruction_out[63:32] = fetch_buffer[1][31:0];
assign instruction_out[95:64] = fetch_buffer[2][31:0];
 
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.