OpenCores
URL https://opencores.org/ocsvn/oops/oops/trunk

Subversion Repositories oops

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /oops
    from Rev 1 to Rev 2
    Reverse comparison

Rev 1 → Rev 2

/trunk/rtl/free_list.v
0,0 → 1,128
//////////////////////////////////////////////////////////////////
// //
// OoOPs Core Register Free List module //
// //
// This file is part of the OoOPs project //
// http://www.opencores.org/project,oops //
// //
// Description: //
// The free list is a circular FIFO used to keep track of free //
// physical registers that can be allocated to new instructions//
// New tags are allocated from the head and freed tags are //
// written to the tail of the FIFO. //
// The head pointer+1 is passed along with branches so that //
// The FIFO state can be recovered after a misprediction. //
// //
// Note: MULT/DIV instructions will require two tags since //
// they update both HI and LO registers. //
// //
// Author(s): //
// - Joshua Smith, smjoshua@umich.edu //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2012 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
`include "ooops_defs.v"
 
// TODO: consider making this a bit-vector-based free list to save on
// flop usage. With tag FIFO, we have (`ARCH_REGS+`ROB_SZ)*`TAG_SZ flops
// just for the storage.
module free_list (
input wire clk,
input wire rst,
input wire ds1_dest_wr,
 
input wire rob_pipe_flush,
input wire [`FL_PTR_SZ-1:0] rob_ds_fl_head_ptr,
input wire rob_ds_ret_valid,
input wire rob_ds_ret_dest_write,
input wire [`TAG_SZ-1:0] rob_ds_ret_tag_old,
 
output wire [`TAG_SZ-1:0] ds1_dest_tag,
output wire [`FL_PTR_SZ-1:0] ds1_fl_head_ptr
);
 
// Internal wires/regs
wire [`TAG_SZ-1:0] tag_list [`FL_SZ-1:0];
wire [`TAG_SZ-1:0] tag_list_in [`FL_SZ-1:0];
wire [`FL_SZ-1:0] tag_list_ld;
wire [`FL_PTR_SZ-1:0] head_ptr;
wire [`FL_PTR_SZ-1:0] head_ptr_p1;
wire [`FL_PTR_SZ-1:0] head_ptr_p2;
wire [`FL_PTR_SZ-1:0] head_ptr_in;
wire [`FL_PTR_SZ-1:0] tail_ptr;
wire [`FL_PTR_SZ-1:0] tail_ptr_p1;
wire [`FL_PTR_SZ-1:0] tail_ptr_in;
wire pop;
wire push;
wire [`FL_PTR_SZ-1:0] rob_ds_fl_head_ptr_p1;
wire [`TAG_SZ-1:0] ds1_dest_tag_in;
wire [`FL_PTR_SZ-1:0] ds1_fl_head_ptr_in;
integer i;
 
// Handle output generation
// For timing, make dest_tag and fl_head_ptr available from a flop.
// TODO: verify corner cases such as free list becomes empty (so next head_ptr is tail_ptr) and instruction retiring.
assign ds1_dest_tag_in = rob_pipe_flush ? tag_list[rob_ds_fl_head_ptr] :
//(head_ptr_p1 == tail_ptr) & push ? rob_ds_ret_tag_old :
tag_list[head_ptr_p1]; // ds1_dest_wr case
 
assign rob_ds_fl_head_ptr_p1 = (rob_ds_fl_head_ptr == `FL_SZ-1) ? {`FL_PTR_SZ{1'b0}} : rob_ds_fl_head_ptr + `FL_PTR_SZ'h1;
assign ds1_fl_head_ptr_in = rob_pipe_flush ? rob_ds_fl_head_ptr_p1 : head_ptr_p2;
 
MDFFLR #(`TAG_SZ) ds1_dest_tag_ff (clk, rst, pop | rob_pipe_flush, `ARCH_REGS, ds1_dest_tag_in, ds1_dest_tag);
MDFFLR #(`FL_PTR_SZ) ds1_fl_head_ptr_ff (clk, rst, pop | rob_pipe_flush, `ARCH_REGS+1, ds1_fl_head_ptr_in, ds1_fl_head_ptr);
 
// Handle updating head/tail pointers
assign pop = ds1_dest_wr;
assign push = rob_ds_ret_valid & rob_ds_ret_dest_write;
assign head_ptr_p1 = (head_ptr == `FL_SZ-1) ? {`FL_PTR_SZ{1'b0}} : head_ptr + `FL_PTR_SZ'h1;
assign head_ptr_p2 = (head_ptr == `FL_SZ-2) ? {`FL_PTR_SZ{1'b0}} : head_ptr + `FL_PTR_SZ'h2;
assign tail_ptr_p1 = (tail_ptr == `FL_SZ-1) ? {`FL_PTR_SZ{1'b0}} : tail_ptr + `FL_PTR_SZ'h1;
assign head_ptr_in = (rob_pipe_flush) ? rob_ds_fl_head_ptr : head_ptr_p1;
assign tail_ptr_in = tail_ptr_p1;
 
// Initialize head pointer to NUM_ARCH_REGS because architected registers will
// be allocated out of reset.
MDFFLR #(`FL_PTR_SZ) head_ptr_ff (clk, rst, pop | rob_pipe_flush, `ARCH_REGS, head_ptr_in, head_ptr);
MDFFLR #(`FL_PTR_SZ) tail_ptr_ff (clk, rst, push, {`FL_PTR_SZ{1'b0}}, tail_ptr_in, tail_ptr);
 
// Handle updating list
// Reset list so that physical registers beyond 33 are initialized into free list
assign tag_list_ld = (push << tail_ptr);
genvar g;
generate
for (g=0; g<`FL_SZ; g=g+1) begin: fl_gen
assign tag_list_in[g] = rob_ds_ret_tag_old;
if (g < `ARCH_REGS)
MDFFLR #(`TAG_SZ) tag_list_ff (clk, rst, tag_list_ld[g], `TAG_SZ'h0, tag_list_in[g], tag_list[g]);
else
MDFFLR #(`TAG_SZ) tag_list_ff (clk, rst, tag_list_ld[g], g[`TAG_SZ-1:0], tag_list_in[g], tag_list[g]);
end
endgenerate
 
endmodule
trunk/rtl/free_list.v Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/rtl/icache_ctl.v =================================================================== --- trunk/rtl/icache_ctl.v (nonexistent) +++ trunk/rtl/icache_ctl.v (revision 2) @@ -0,0 +1,220 @@ +////////////////////////////////////////////////////////////////// +// // +// OoOPs Core Instruction Cache Control module // +// // +// This file is part of the OoOPs project // +// http://www.opencores.org/project,oops // +// // +// Description: // +// Controller for Instruction Cache. Block takes requests from// +// the IF stage, handles the inputs to the cache RAMs, detects // +// cache hits, and generates bus requests if the cache misses. // +// The controller is only capable of handling one outstanding // +// miss and does no prefetching. // +// // +// Author(s): // +// - Joshua Smith, smjoshua@umich.edu // +// // +////////////////////////////////////////////////////////////////// +// // +// Copyright (C) 2012 Authors and OPENCORES.ORG // +// // +// This source file may be used and distributed without // +// restriction provided that this copyright statement is not // +// removed from the file and that any derivative work contains // +// the original copyright notice and the associated disclaimer. // +// // +// This source file is free software; you can redistribute it // +// and/or modify it under the terms of the GNU Lesser General // +// Public License as published by the Free Software Foundation; // +// either version 2.1 of the License, or (at your option) any // +// later version. // +// // +// This source is distributed in the hope that it will be // +// useful, but WITHOUT ANY WARRANTY; without even the implied // +// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // +// PURPOSE. See the GNU Lesser General Public License for more // +// details. // +// // +// You should have received a copy of the GNU Lesser General // +// Public License along with this source; if not, download it // +// from http://www.opencores.org/lgpl.shtml // +// // +////////////////////////////////////////////////////////////////// +`include "ooops_defs.v" + +module icache_ctl ( + input wire clk, + input wire rst, + input wire rob_pipe_flush, + + // Coprocessor interface (for IC enable) + input wire cp0_ic_enable, + + // IF interface + input wire if_ic_req, + input wire [`ADDR_SZ-1:0] if_ic_fpc, + input wire [`ADDR_SZ-1:0] r_if_ic_fpc, + output wire [`INSTR_SZ-1:0] ic_if_data, + output wire ic_if_data_valid, + output wire ic_if_ready, + + // Interface to cache memories + input wire [`IC_TAGRAM_SZ-1:0] ic_tagram_data, + input wire [`IC_LINE_SZ-1:0] ic_dataram_data, + output wire [`IC_LINE_SZ-1:0] ic_dataram_wr_data, + output wire [`IC_SI_SZ-1:0] ic_dataram_addr, + output wire ic_dataram_wren, + output wire [`IC_TAGRAM_SZ-1:0] ic_tagram_wr_data, + output wire [`IC_SI_SZ-1:0] ic_tagram_addr, + output wire ic_tagram_wren, + + // Memory interface + output wire ic2bus_req, + output wire [`ADDR_SZ-1:0] ic2bus_fpc, + input wire bus2ic_valid, + input wire [`SYS_BUS_SZ-1:0] bus2ic_data + ); + + parameter IC_STATE_IDLE = 3'h0, + IC_STATE_REQ = 3'h1, + IC_STATE_WAIT = 3'h2, + IC_STATE_WR_RAM = 3'h3, + IC_STATE_INIT = 3'h4; + + // Internal wires/regs + wire ic_cache_hit; + wire ic_tag_match; + wire ic_tag_valid; + wire [`IC_TAG_SZ-1:0] ic_tag; + wire r_if_ic_req; + + wire [2:0] ic_state; + reg [2:0] ic_nstate; + wire [`IC_SI] ic_init_ctr; + wire [`IC_SI] ic_init_nctr; + wire ic_do_init; + wire ic_init_done; + wire ic_initialized; + wire ic_initialized_ld; + + // Latch req signal so we can correctly assert "hit" and also request to memory + // on a cache miss. + MDFFR #(1) r_if_ic_req_ff (clk, rst, 1'b0, if_ic_req, r_if_ic_req); + + // If we get a cache miss and then a flush happens, need to make sure that instruction + // coming back isn't sent down the pipe. + wire rob_pipe_flush_seen, rob_pipe_flush_seen_in; + assign rob_pipe_flush_seen_in = rob_pipe_flush_seen ? ~bus2ic_valid : rob_pipe_flush & (ic_state != IC_STATE_IDLE); + MDFFR #(1) rob_pipe_flush_seen_ff (clk, rst, 1'b0, rob_pipe_flush_seen_in, rob_pipe_flush_seen); + + // Handle interface to Data and Tag SRAMs + `ifdef USE_IC + assign ic_do_init = (ic_state == IC_STATE_INIT); + assign ic_dataram_wren = ic_do_init || (bus2ic_valid & !rob_pipe_flush_seen); + assign ic_dataram_wr_data = ic_do_init ? {`IC_LINE_SZ{1'b0}} : bus2ic_data; + assign ic_dataram_addr = (ic_state == IC_STATE_INIT) ? ic_init_ctr[`IC_SI] : + (ic_state == IC_STATE_WAIT) ? ic2bus_fpc[`IC_SI] : + if_ic_fpc[`IC_SI]; + + assign ic_tagram_wren = ic_do_init || (bus2ic_valid & !rob_pipe_flush_seen); + assign ic_tagram_wr_data = ic_do_init ? {`IC_TAGRAM_SZ{1'b0}} : {1'b1, 1'b0, ic2bus_fpc[`IC_TAG]}; + assign ic_tagram_addr = (ic_state == IC_STATE_INIT) ? ic_init_ctr[`IC_SI] : + (ic_state == IC_STATE_WAIT) ? ic2bus_fpc[`IC_SI] : + if_ic_fpc[`IC_SI]; + `else + // If not including Icache, then just zero cache inputs out + assign ic_do_init = 1'b0; + assign ic_dataram_wren = 1'b0; + assign ic_dataram_wr_data = {`IC_LINE_SZ{1'b0}}; + assign ic_dataram_addr = {`IC_SI_SZ{1'b0}}; + + assign ic_tagram_wren = 1'b0; + assign ic_tagram_wr_data = {`IC_LINE_SZ{1'b0}}; + assign ic_tagram_addr = {`IC_SI_SZ{1'b0}}; + `endif + + // Handle tag comparison and IF interface + // Note: ic_if_ready just means we've initialized SRAMs. This will block + // IF requests and stall the pipeline on startup. + `ifdef USE_IC + assign ic_tag = ic_tagram_data[`IC_TAGRAM_TAG]; + assign ic_tag_valid = ic_tagram_data[`IC_TAGRAM_VLD]; + assign ic_tag_match = (ic_tag == r_if_ic_fpc[`IC_TAG]); + assign ic_cache_hit = ic_tag_match & (r_if_ic_req & ic_tag_valid & cp0_ic_enable & ic_initialized & !rob_pipe_flush_seen); + assign ic_if_ready = ic_initialized & (ic_nstate == IC_STATE_IDLE); + assign ic_if_data = r_if_ic_fpc[2] ? ic_dataram_data[63:32] : ic_dataram_data[31:0]; + assign ic_if_data_valid = ic_cache_hit; + `else + // If not including Icache, then need to force everything as a cache miss and only return bus2ic_data + assign ic_tag = {`IC_TAG_SZ{1'b0}}; + assign ic_tag_valid = 1'b0; + assign ic_tag_match = 1'b0; + assign ic_cache_hit = bus2ic_valid & !rob_pipe_flush_seen; + assign ic_if_ready = 1'b1; // No need to initialize cache + assign ic_if_data = r_if_ic_fpc[2] ? bus2ic_data[63:32] : bus2ic_data[31:0]; + assign ic_if_data_valid = ic_cache_hit; + + `endif + + + // Icache state machine + always @* + case (ic_state) + // From the IDLE state + // + move to req if we detect a miss + IC_STATE_IDLE: begin + if (r_if_ic_req & !ic_cache_hit) + ic_nstate = IC_STATE_REQ; + else + ic_nstate = IC_STATE_IDLE; + end + + // In the REQ state we send the request to memory for the needed data. + // Then transition to WAIT state to wait for memory response + // TODO: Need to stall here if arbiter doesn't accept our request + IC_STATE_REQ: begin + ic_nstate = IC_STATE_WAIT; + end + + // In the WAIT state we wait for memory response, then transition to + // WR_RAM state to write the data. + IC_STATE_WAIT: begin + if (bus2ic_valid) ic_nstate = IC_STATE_IDLE; + else ic_nstate = IC_STATE_WAIT; + end + + // From the INIT state, we initialize each line of the cache to invalid + // and transition to IDLE after writing each line. + IC_STATE_INIT: begin + if (ic_init_ctr == `IC_NUM_LINES-1) + ic_nstate = IC_STATE_IDLE; + else + ic_nstate = IC_STATE_INIT; + end + + default: ic_nstate = IC_STATE_IDLE; + endcase + + // For the initialization, just loop through every set in cache and write it as invalid. When done + // set ic_initialized. + `ifdef USE_IC + assign ic_init_done = (ic_state == IC_STATE_INIT) & (ic_init_ctr == `IC_NUM_LINES-1); + assign ic_initialized_ld = ic_init_done; + assign ic_init_nctr = ic_init_ctr + `IC_SI_SZ'h1; + `else + assign ic_init_done = 1'b1; + assign ic_initialized_ld = 1'b0; + assign ic_init_nctr = {`IC_SI_SZ{1'b0}}; + `endif + + MDFFR #(3) ic_state_ff (clk, rst, IC_STATE_INIT, ic_nstate, ic_state); + MDFFLR #(`IC_SI_SZ) ic_init_ctr_ff (clk, rst, (ic_state == IC_STATE_INIT), `IC_SI_SZ'h0, ic_init_nctr, ic_init_ctr); + MDFFLR #(1) ic_initialized_ff (clk, rst, ic_initialized_ld, 1'b0, 1'b1, ic_initialized); + + // Handle memory request outputs + // Note that we request from the bus the cycle after detecting a hit, so if_ic_fpc should be corrected from miss already. + assign ic2bus_fpc = if_ic_fpc; + assign ic2bus_req = (ic_state == IC_STATE_REQ); + +endmodule
trunk/rtl/icache_ctl.v Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/rtl/icache_top.v =================================================================== --- trunk/rtl/icache_top.v (nonexistent) +++ trunk/rtl/icache_top.v (revision 2) @@ -0,0 +1,127 @@ +////////////////////////////////////////////////////////////////// +// // +// OoOPs Core Instruction Cache module // +// // +// This file is part of the OoOPs project // +// http://www.opencores.org/project,oops // +// // +// Description: // +// Top-level module for Instruction Cache block. This includes// +// the instantiation of the data and tag RAMs as well as the // +// cache controller logic. // +// // +// Author(s): // +// - Joshua Smith, smjoshua@umich.edu // +// // +////////////////////////////////////////////////////////////////// +// // +// Copyright (C) 2012 Authors and OPENCORES.ORG // +// // +// This source file may be used and distributed without // +// restriction provided that this copyright statement is not // +// removed from the file and that any derivative work contains // +// the original copyright notice and the associated disclaimer. // +// // +// This source file is free software; you can redistribute it // +// and/or modify it under the terms of the GNU Lesser General // +// Public License as published by the Free Software Foundation; // +// either version 2.1 of the License, or (at your option) any // +// later version. // +// // +// This source is distributed in the hope that it will be // +// useful, but WITHOUT ANY WARRANTY; without even the implied // +// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // +// PURPOSE. See the GNU Lesser General Public License for more // +// details. // +// // +// You should have received a copy of the GNU Lesser General // +// Public License along with this source; if not, download it // +// from http://www.opencores.org/lgpl.shtml // +// // +////////////////////////////////////////////////////////////////// +`include "ooops_defs.v" + +module icache_top ( + input wire clk, + input wire rst, + input wire rob_pipe_flush, + + // Coprocessor interface + input wire cp0_ic_enable, + + // IF interface + input wire if_ic_req, + input wire [`ADDR_SZ-1:0] if_ic_fpc, + input wire [`ADDR_SZ-1:0] r_if_ic_fpc, + output wire [`INSTR_SZ-1:0] ic_if_data, + output wire ic_if_data_valid, + output wire ic_if_cache_hit, + output wire ic_if_cache_miss, + output wire ic_if_ready, + + // Memory interface + output wire ic2bus_req, + output wire [`ADDR_SZ-1:0] ic2bus_fpc, + input wire bus2ic_valid, + input wire [`SYS_BUS_SZ-1:0] bus2ic_data + ); + + // Internal wires + wire [`IC_SI_SZ-1:0] ic_dataram_addr; + wire [`IC_LINE_SZ-1:0] ic_dataram_data; + wire [`IC_LINE_SZ-1:0] ic_dataram_wr_data; + wire ic_dataram_wren; + + wire [`IC_SI_SZ-1:0] ic_tagram_addr; + wire [`IC_TAGRAM_SZ-1:0] ic_tagram_data; + wire [`IC_TAGRAM_SZ-1:0] ic_tagram_wr_data; + wire ic_tagram_wren; + + // Instantiate IC controller + icache_ctl icache_ctl0( + .clk(clk), + .rst(rst), + .rob_pipe_flush(rob_pipe_flush), + .cp0_ic_enable(cp0_ic_enable), + .if_ic_req(if_ic_req), + .if_ic_fpc(if_ic_fpc), + .r_if_ic_fpc(r_if_ic_fpc), + .ic_if_data(ic_if_data), + .ic_if_data_valid(ic_if_data_valid), + .ic_if_ready(ic_if_ready), + .ic_tagram_data(ic_tagram_data), + .ic_dataram_data(ic_dataram_data), + .ic_dataram_wr_data(ic_dataram_wr_data), + .ic_dataram_addr(ic_dataram_addr), + .ic_dataram_wren(ic_dataram_wren), + .ic_tagram_wr_data(ic_tagram_wr_data), + .ic_tagram_addr(ic_tagram_addr), + .ic_tagram_wren(ic_tagram_wren), + .ic2bus_req(ic2bus_req), + .ic2bus_fpc(ic2bus_fpc), + .bus2ic_valid(bus2ic_valid), + .bus2ic_data(bus2ic_data) + ); + + // Instantiate IC data and tag RAMs + `ifdef USE_IC + sp_sram #(.DW(`IC_LINE_SZ), .IW(`IC_SI_SZ)) d0 ( + .clk(clk), + .addr(ic_dataram_addr), + .wren(ic_dataram_wren), + .din(ic_dataram_wr_data), + .dout(ic_dataram_data) + ); + sp_sram #(.DW(`IC_TAGRAM_SZ), .IW(`IC_SI_SZ)) t0 ( + .clk(clk), + .addr(ic_tagram_addr), + .wren(ic_tagram_wren), + .din(ic_tagram_wr_data), + .dout(ic_tagram_data) + ); + `else + assign ic_dataram_data = {`IC_LINE_SZ{1'b0}}; + assign ic_tagram_data = {`IC_TAGRAM_SZ{1'b0}}; + `endif + +endmodule
trunk/rtl/icache_top.v Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/rtl/id_stage.v =================================================================== --- trunk/rtl/id_stage.v (nonexistent) +++ trunk/rtl/id_stage.v (revision 2) @@ -0,0 +1,190 @@ +////////////////////////////////////////////////////////////////// +// // +// OoOPs Core Instruction Decode module // +// // +// This file is part of the OoOPs project // +// http://www.opencores.org/project,oops // +// // +// Description: // +// Handles basic decoding of instruction type and register // +// sources and destinations for Dispatch stages. // +// We could do full instruction decoding in this stage, but // +// to save on pipeline flops we will only decode what is needed// +// for dispatch. We can use the issue stage to do necessary // +// decoding for each functional unit. // +// // +// Author(s): // +// - Joshua Smith, smjoshua@umich.edu // +// // +////////////////////////////////////////////////////////////////// +// // +// Copyright (C) 2012 Authors and OPENCORES.ORG // +// // +// This source file may be used and distributed without // +// restriction provided that this copyright statement is not // +// removed from the file and that any derivative work contains // +// the original copyright notice and the associated disclaimer. // +// // +// This source file is free software; you can redistribute it // +// and/or modify it under the terms of the GNU Lesser General // +// Public License as published by the Free Software Foundation; // +// either version 2.1 of the License, or (at your option) any // +// later version. // +// // +// This source is distributed in the hope that it will be // +// useful, but WITHOUT ANY WARRANTY; without even the implied // +// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // +// PURPOSE. See the GNU Lesser General Public License for more // +// details. // +// // +// You should have received a copy of the GNU Lesser General // +// Public License along with this source; if not, download it // +// from http://www.opencores.org/lgpl.shtml // +// // +////////////////////////////////////////////////////////////////// +`include "ooops_defs.v" + +module id_stage ( + input wire clk, + input wire rst, + + // Flush/stall interface + input wire rob_pipe_flush, + input wire ds_stall, + + // Interface to IF stage + input wire if_id_valid, + input wire [`INSTR_SZ-1:0] if_id_instr, + input wire [`ADDR_SZ-1:0] if_id_fpc, + input wire [`BP_SZ-1:0] if_id_bprd_info, + + // Interface to Dispatch stage + output wire id_ds1_valid, + output wire [`ADDR_SZ-1:0] id_ds1_fpc, + output wire id_ds1_in_dly_slot, + output wire [`DEC_BUS_SZ-1:0] id_ds1_dec_bus, + output wire [`BP_SZ-1:0] id_ds1_bprd_info + ); + + // Internal wires + wire id_type_br; + wire id_type_ldst; + wire id_type_multdiv; + wire id_type_alu; + wire [`DEC_BUS_SZ-1:0] id_dec_bus; + wire id_rs_need; + wire id_rt_need; + wire id_rd_write; + wire [`REG_IDX_SZ-1:0] id_rs_idx; + wire [`REG_IDX_SZ-1:0] id_rt_idx; + wire [`REG_IDX_SZ-1:0] id_rd_idx; + + // Handle stalling indications + wire id_stall = ds_stall; + + // Determine basic instruction type + wire id_instr_special = ~(|if_id_instr[31:26]); + + // MULT/DIV + wire id_mult = (id_instr_special & if_id_instr[5:1]==5'b01100); + wire id_div = (id_instr_special & if_id_instr[5:1]==5'b01101); + assign id_type_multdiv = id_mult | id_div; + + // LDST + wire id_mem_ld = (if_id_instr[31:29]==3'b100) && (~if_id_instr[27] | (if_id_instr[28:26]==3'b011)); + wire id_mem_st = (if_id_instr[31:28]==4'b1010) && (if_id_instr[27:26]!=2'b10); + assign id_type_ldst = id_mem_ld || id_mem_st; + + // Branch + wire id_br_beq = (if_id_instr[31:27]==5'b00010); + wire id_br_bge = (if_id_instr[31:26]==6'b000001) && (if_id_instr[20:17]==4'b0000); + wire id_br_bgt = (if_id_instr[31:27]==5'b00011) && (if_id_instr[20:16]==5'b00000); + wire id_br_neg = (id_br_beq && if_id_instr[26]) || // BNE + (id_br_bge && if_id_instr[17]) || // BLTZ + (id_br_bgt && !if_id_instr[26]); // BLEZ + wire id_br_j = (if_id_instr[31:27]==5'b00001); + wire id_br_jr = id_instr_special && (if_id_instr[5:1]==5'b00100); + wire id_br_link = (id_br_bge && if_id_instr[20]) || // BGEZAL, BLTZAL + (id_br_j && if_id_instr[16]); // JAL + wire id_br_link_reg = (id_br_jr && if_id_instr[0]); // JALR + wire id_except = id_instr_special && (if_id_instr[5:1]==5'b00110); + //wire id_break = id_except && if_id_instr[0]; + wire id_syscall = id_except && !if_id_instr[0]; + assign id_type_br = (id_br_beq | id_br_bge | id_br_bgt | id_br_j | id_br_jr | id_except); + + // ALU + wire id_alu_shift = id_instr_special && (if_id_instr[5:3]==3'b000); + wire id_alu_shift_imm = id_alu_shift & !if_id_instr[2]; + wire id_alu_cmp = (id_instr_special && if_id_instr[5:1]==5'b10101) || // SLT/SLTU + (if_id_instr[31:27]==5'b00101); // SLTI/SLTIU + wire id_alu_log_reg = id_instr_special && (if_id_instr[5:2]==4'b1001); + wire id_alu_log_imm = (if_id_instr[31:28]==4'b0011) && !(&if_id_instr[27:26]); + wire id_hilo_mov = id_instr_special & (if_id_instr[5:2]==4'b0100); + wire id_mfhi = id_hilo_mov & (if_id_instr[1:0]==2'b00); + wire id_mflo = id_hilo_mov & (if_id_instr[1:0]==2'b10); + wire id_mthi = id_hilo_mov & (if_id_instr[1:0]==2'b01); + wire id_mtlo = id_hilo_mov & (if_id_instr[1:0]==2'b11); + wire id_alu_add_sub = (id_instr_special && (if_id_instr[5:2]==4'b1000)) || // ADD/SUB Reg + (if_id_instr[31:27]==5'b00100); // ADDI + wire id_alu_lui = (if_id_instr[31:26]==6'b001111); + + // Coprocessor ops included + wire id_cp_op = (if_id_instr[31:26]==6'b010000) && !(|if_id_instr[25:24]) && !(|if_id_instr[22:21]); + wire id_cp_to = id_cp_op && if_id_instr[23]; + //wire id_cp_num = if_id_instr[27:26]; + + assign id_type_alu = id_alu_shift | id_alu_cmp | id_alu_log_reg | id_alu_log_imm | + id_type_br | id_hilo_mov | id_alu_add_sub | id_alu_lui | id_cp_op; + + + + // Determine register indices + // Figure out whether operands require register values. + // This is so we know whether to stall for forwarded data for an operand + // For ALU operations, all but LUI need reg_s. Immediate instructions don't need reg_t + assign id_rs_need = (id_type_alu & !id_alu_lui & !id_br_j) | + (id_alu_shift & !id_alu_shift_imm) | + (id_div | id_mult) | + (id_type_ldst); + assign id_rt_need = (id_type_alu & !id_alu_imm) | id_type_multdiv | id_br_beq | id_cp_to | id_mem_st; + assign id_rd_wr = !(id_mem_st | (id_type_br & !id_br_link) | id_cp_to | id_syscall); + + // Handle moves to/from HI and LO + assign id_rs_idx = id_mfhi ? `REG_IDX_SZ'd32 : + id_mflo ? `REG_IDX_SZ'd33 : id_reg_s_idx_pre; + assign id_rt_idx = id_reg_t_idx_pre; + assign id_rd_idx = id_mthi ? `REG_IDX_SZ'd32 : + id_mtlo ? `REG_IDX_SZ'd33 : id_reg_d_idx_pre; + + // Determine if instructions are in a delay slot + // This is needed by the ROB in case a branch is mispredicted so we know not to flush the delay instruction. + wire id_in_dly_slot_set = !id_stall & (if_id_valid & id_type_br); + wire id_in_dly_slot_rst = !id_stall & (if_id_valid & id_in_dly_slot); + wire id_in_dly_slot_in = (id_in_dly_slot_set | id_in_dly_slot) & !id_in_dly_slot_rst; + MDFFR #(1) id_in_dly_slot_ff (clk, rst, 1'b0, id_in_dly_slot_in, id_in_dly_slot); + + wire id_in_dly_slot = if_id_valid & id_in_dly_slot; + + // Put together decode bus + assign id_dec_bus[`DEC_REG_D_IDX] = id_rd_idx; + assign id_dec_bus[`DEC_REG_T_IDX] = id_rt_idx; + assign id_dec_bus[`DEC_REG_S_IDX] = id_rs_idx; + assign id_dec_bus[`DEC_REG_D_WR] = id_rd_wr; + assign id_dec_bus[`DEC_REG_T_NEED] = id_rt_need; + assign id_dec_bus[`DEC_REG_S_NEED] = id_rs_need; + assign id_dec_bus[`DEC_TYPE_CP] = id_cp_op; + assign id_dec_bus[`DEC_TYPE_BR] = id_type_br; + assign id_dec_bus[`DEC_TYPE_LDST] = id_type_ldst; + assign id_dec_bus[`DEC_TYPE_MULTDIV] = id_type_multdiv; + assign id_dec_bus[`DEC_TYPE_ALU] = id_type_alu; + + wire id_valid = if_id_valid & !rob_pipe_flush; + + // Flop outputs to DS stage + MDFFLR #(1) id_ds1_valid_ff (clk, rst, !id_stall, 1'b0, id_valid, id_ds1_valid); + MDFFL #(`ADDR_SZ) id_ds1_fpc_ff (clk, if_id_valid, if_id_fpc, id_ds1_fpc); + MDFFLR #(1) id_ds1_in_dly_slot_ff (clk, rst, if_id_valid, 1'b0, id_in_dly_slot, id_ds1_in_dly_slot); + MDFFL #(`DEC_BUS_SZ) id_ds1_dec_bus_ff (clk, if_id_valid, id_dec_bus, id_ds1_dec_bus); + MDFFL #(`BP_SZ) id_ds1_bprd_info_ff (clk, if_id_valid, if_id_bprd_info, id_ds1_bprd_info); + +endmodule
trunk/rtl/id_stage.v Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/rtl/ooops_defs.v =================================================================== --- trunk/rtl/ooops_defs.v (nonexistent) +++ trunk/rtl/ooops_defs.v (revision 2) @@ -0,0 +1,244 @@ +/* + Josh Smith + + File: oops_defs.v + Description: File for the global defines +*/ +`timescale 1ns/10ps +`define SD #1 + +// Common field widths +`define ADDR_SZ 32 // Address width/size +`define INSTR_SZ 32 // Instruction width/size +`define DATA_SZ 32 // Data width/size +`define IMM_SZ 16 // Immediate width/size + +// ROB defines +`define ROB_ENTRIES 8 // Size of ReorderBuffer +`define ROB_PTR_SZ 4 // Size of ROB ptr (1 extra bit for full/empty detection) + +// Register file and map table/free list defines +`define ARCH_REGS 34 // GPR 0-31, HI/LO +`define REG_IDX_SZ 6 // Architected register index size (6 bits to include HI/LO) +`define TAG_SZ 6 // Register tag size +`define TAGS (`ARCH_REGS+`ROB_ENTRIES) // 32 GPRs, HI/LO, and ROB size +`define FL_SZ (`TAGS) +`define FL_PTR_SZ `TAG_SZ +`define LO_REG `TAG_SZ'd33 +`define ZERO_REG `TAG_SZ'd0 + +`define CHKPT_NUM 4 // Number of RAT checkpoints +`define CHKPT_PTR_SZ 2 + +// CDB defines +`define NUM_CDB 4 // 2 ALU, 1 LD/ST, 1 MULT/DIV +`define CDB_SZ (1+`TAG_SZ+`REG_IDX_SZ+`ROB_PTR_SZ) // 1 valid bit, 1 tag, 1 architectural reg index, 1 ROB index +`define CDB_VLD `TAG_SZ+`REG_IDX_SZ+`ROB_PTR_SZ // Valid field of CDB +`define CDB_ROB_IDX `TAG_SZ+`REG_IDX_SZ+`ROB_PTR_SZ-1:`TAG_SZ+`REG_IDX_SZ +`define CDB_TAG `REG_IDX_SZ+`TAG_SZ-1:`REG_IDX_SZ // Tag field of CDB +`define CDB_REG_IDX `REG_IDX_SZ-1:0 // Arch. reg index field of CDB +`define CDB_BUS_SZ (`NUM_CDB*`CDB_SZ) // `NUM_CDB valid bits and tags +`define CDB_DATA_SZ (`NUM_CDB*`DATA_SZ) + +// Branch prediction defines +`define BP_IDX_SZ 4 // Size of Index into branch predictor +`define BP_ENTRIES (1 << `BP_IDX_SZ) // Number of branch predictor entries + +// System Bus defines +`define SYS_BUS_SZ 64 +`define SYS_BUS_BE_SZ 8 + +// Instruction Cache defines +`define IC_LINE_SZ (2*`INSTR_SZ) // Size of instruction cache line +`define IC_BO_SZ 3 // Block-offset size +`define IC_SI_SZ 8 // Set index size +`define IC_TAG_SZ (`ADDR_SZ-`IC_SI_SZ-`IC_BO_SZ) // Tag size +`define IC_TAG `ADDR_SZ-1 -: `IC_TAG_SZ // Tag field of PC +`define IC_SI `IC_SI_SZ+`IC_BO_SZ-1:`IC_BO_SZ // Set index field of PC +`define IC_NUM_LINES (1<<`IC_SI_SZ) // Number of instruction cache lines +`define IC_TAGRAM_SZ (1+1+`IC_TAG_SZ) // +2 bits for valid/dirty (dirty not used) +`define IC_TAGRAM_VLD `IC_TAG_SZ+1 // Valid field +`define IC_TAGRAM_DRT `IC_TAG_SZ // Dirty field +`define IC_TAGRAM_TAG `IC_TAG_SZ-1:0 // tag field + +// Data Cache defines +`define DC_LINE_SZ (2*`DATA_SZ) // Size of data cache line +`define DC_BO_SZ 2 // Block-offset size +`define DC_SI_SZ 8 // Set index size +`define DC_TAG_SZ (`ADDR_SZ-`DC_SI_SZ-`DC_BO_SZ) // Tag size +`define DC_TAG `ADDR_SZ-1 -: `DC_TAG_SZ // Tag field of PC +`define DC_SI `ADDR_SZ-1-`DC_TAG_SZ -: `DC_SI_SZ // Set index field of PC +`define DC_TAGRAM_SZ (1+1+`DC_TAG_SZ) // +2 bits for valid/dirty +`define DC_NUM_LINES (1<<`DC_SI_SZ) // Number of data cache lines +`define DC_TAGRAM_VLD `DC_TAG_SZ+1 // Valid field +`define DC_TAGRAM_DRT `DC_TAG_SZ // Dirty field +`define DC_TAGRAM_TAG `DC_TAG_SZ-1:0 // tag field + +`define RESET_ADDR 32'h0 // FPC reset address + +// Fields of branch prediction bus +`define BP_SZ 34 +`define BP_TRGT 33:2 +`define BP_TKN 1 +`define BP_VLD 0 + +// Fields of Decode bus +/* +`define DEC_BUS_SZ 84 +`define DEC_IMM_DATA 83:68 // Immediate data for ALU and MEM +`define DEC_TYPE_INFO 67:65 // Instruction type info group +`define DEC_TYPE_ALU 67 // ALU/Branch instruction type +`define DEC_TYPE_MULT_DIV 66 // MULT/DIV instruction type +`define DEC_TYPE_MEM 65 // Load/Store instruction type +`define DEC_REG_INFO 64:44 // Register info group +`define DEC_REG_D_WR 64 // Writes to dest register +`define DEC_REG_T_NEED 63 // Need register T operand +`define DEC_REG_S_NEED 62 // Need register S operand +`define DEC_REG_D_INDX 61:56 // Destination register index +`define DEC_REG_T_INDX 55:50 // Operand register T index +`define DEC_REG_S_INDX 49:44 // Operand register S index +`define DEC_MULTDIV_SZ 8 // MULT/DIV info group +`define DEC_MULTDIV_INFO 43:36 // MULT/DIV info group +`define DEC_MTLO 43 // Move to LO +`define DEC_MTHI 42 // Move to HI +`define DEC_MFLO 41 // Move from LO +`define DEC_MFHI 40 // Move from HI +`define DEC_MD_SIGNED 39 // Mult/Div signed +`define DEC_DIV 38 // Divide +`define DEC_MULT 37 // Multiply +`define DEC_WR_HILO 36 // Write to HI and LO registers +`define DEC_MEM_SZ 6 +`define DEC_MEM_INFO 35:30 // Load/Store info group +`define DEC_MEM_W 35 // Word load/store +`define DEC_MEM_HW 34 // Halfword load/store +`define DEC_MEM_B 33 // Byte load/store +`define DEC_MEM_ST 32 // Memory store +`define DEC_MEM_SIGNED 31 // Load Signed +`define DEC_MEM_LD 30 // Memory load +`define DEC_CP_SZ 7 +`define DEC_CP_INFO 29:23 // Coprocessor info group +`define DEC_CP_SEL 29:27 // Coprocessor Sel index +`define DEC_CP_NUM 26:25 // Coprocessor number +`define DEC_CP_TO 24 // Move To coprocessor (from if 0) +`define DEC_CP_OP 23 // Coprocessor Operation +`define DEC_BR_SZ 10 +`define DEC_BR_INFO 22:13 // Branch info group +`define DEC_BR_SYS 22 // SYSCALL +`define DEC_BR_BRK 21 // BREAK +`define DEC_BR_LINK 20 // Branch/Jump and link +`define DEC_BR_JR 19 // JR/JALR +`define DEC_BR_J 18 // J/JAL +`define DEC_BR_NEG 17 // Negate condition (to get the rest of the conditions) +`define DEC_BR_BGT 16 // BGTZ condition +`define DEC_BR_BGE 15 // BGEZ condition +`define DEC_BR_BEQ 14 // BEQ condition +`define DEC_BR_INST 13 // Branch instruction +`define DEC_ALU_SZ 13 +`define DEC_ALU_INFO 12:0 // ALU info group +`define DEC_ALU_SIGNED 12 // Signed operation +`define DEC_ALU_IMM 11 // Use immediate instead of register +`define DEC_ALU_LUI 10 // LUI (will treat as shift operation with immediate inputs) +`define DEC_ALU_S_A 9 // Shift arithmetic (if 1, logical if 0) +`define DEC_ALU_SR 8 // Shift right +`define DEC_ALU_SL 7 // Shift left +`define DEC_ALU_CMP 6 // Compare (SLT) +`define DEC_ALU_OR 5 // Logical OR +`define DEC_ALU_NOR 4 // Logical NOR +`define DEC_ALU_XOR 3 // Logical XOR +`define DEC_ALU_AND 2 // Logical AND +`define DEC_ALU_SUB 1 // Subtraction +`define DEC_ALU_ADD 0 // Addition +*/ + +// Fields of instruction decode bus from ID stage. +// Note: to save on flops, ID stage will only determine basic instruction type +// and register operand/destination information. Complete instruction decoding +// will happen during last Dispatch cycle into Reservation Station. +`define DEC_BUS_SZ 26 +`define DEC_REG_D_IDX 25:20 // Rd index +`define DEC_REG_T_IDX 19:14 // Rt index +`define DEC_REG_S_IDX 13:8 // Rs index +`define DEC_REG_D_WR 7 // Writes to Rd +`define DEC_REG_T_NEED 6 // Needs Rt operand +`define DEC_REG_S_NEED 5 // Needs Rs operand +`define DEC_TYPE_CP 4 // CP move instruction +`define DEC_TYPE_BR 3 // Branch instruction +`define DEC_TYPE_LDST 2 // Instruction handled by LDST unit +`define DEC_TYPE_MULTDIV 1 // Instruction handled by MULT/DIV unit +`define DEC_TYPE_ALU 0 // Instruction handled by ALU unit + +// ALU control bus for ALU operation. +`define ALU_CTL_SZ 1 + +// Fields of Branch/Jump operation bus +`define BR_INFO_SZ 10 +`define BR_SYS 9 // SYSCALL +`define BR_BRK 8 // BREAK +`define BR_LINK 7 // Branch/Jump and link +`define BR_JR 6 // JR/JALR +`define BR_J 5 // J/JAL +`define BR_NEG 4 // Negate condition (to get the rest of the conditions) +`define BR_BGT 3 // BGTZ condition +`define BR_BGE 2 // BGEZ condition +`define BR_BEQ 1 // BEQ condition +`define BR_INST 0 // Branch instruction + +// Fields of ALU information bus +`define ALU_INFO_SZ 13 +`define ALU_SIGNED 12 // Signed operation +`define ALU_IMM 11 // Use immediate instead of register +`define ALU_LUI 10 // LUI (treated as shift op) +`define ALU_S_A 9 // Shift arithmetic (if 1, logical if 0) +`define ALU_SR 8 // Shift right +`define ALU_SL 7 // Shift left +`define ALU_CMP 6 // Compare (SLT) +`define ALU_OR 5 // Logical OR +`define ALU_NOR 4 // Logical NOR +`define ALU_XOR 3 // Logical XOR +`define ALU_AND 2 // Logical AND +`define ALU_SUB 1 // Subtraction +`define ALU_ADD 0 // Addition + +// Fields of rename information +`define REN_BUS_SZ 35 +`define REN_DEST_IDX 34:29 // Destination (reg_d) index +`define REN_DEST_VLD 28 // Writes to destination +`define REN_DEST_TAG_OLD 27:22 // Destination (reg_d) old tag +`define REN_DEST_TAG 21:16 // Destination (reg_d) tag +`define REN_SRC2_VLD 15 // Source 2 data valid in register file +`define REN_SRC2_NEED 14 // Need source 2 register data +`define REN_SRC2_TAG 13:8 // Source 2 (reg_s) tag +`define REN_SRC1_VLD 7 // Source 1 data valid in register file +`define REN_SRC1_NEED 6 // Need source 1 register data +`define REN_SRC1_TAG 5:0 // Source 1 (reg_s) tag + +// Reservation Station defines +`define ALU_RS_ENTRIES 4 // Size of Reservation Station for ALU and branch +`define ALU_RS_CNT_SZ 3 // Size of occupancy counter +//`define ALU_RS_CNTL_SZ (`DEC_ALU_SZ+`DEC_BR_SZ+`DEC_CP_SZ+`ADDR_SZ+`IMM_SZ) +`define MULTDIV_RS_ENTRIES 2 // Size of Reservation Station for MULT/DIV +`define MULTDIV_RS_CNT_SZ 2 // Size of occupancy counter +//`define MULTDIV_RS_CNTL_SZ (`DEC_MULTDIV_SZ) +`define LDST_RS_ENTRIES 2 // Size of Reservation Station for Load/Store +`define LDST_RS_CNT_SZ 2 // Size of occupancy counter +//`define LDST_RS_CNTL_SZ (`DEC_MEM_SZ+`IMM_SZ) + +// CP0 Register fields +`define CP0_STATUS_EXL 1 + +// Feature ifdefs +// Comment out define to remove feature from compilation +//`define USE_PLL // Include PLL (exclude for simulation) +`define USE_IC // include Instruction cache +`define USE_DC // include Data cache +//`define DYN_BPRD // TODO: Add back in later +`define USE_IFB // Include instruction buffer between IF and ID stages + +`ifdef USE_IFB + `define IFB_ENTRIES 4 // Number of fetch buffer entries + `define IFB_ENTRY_SZ (`INSTR_SZ+`ADDR_SZ+`BP_SZ+1) + `define IFB_PTR_SZ 2 // Fetch buffer pointer width +`endif + +//`define TIMING_OPT // Use timing-optimized RTL in some portions (area affected) +//`define ALTERA // Used to instantiate ALTERA megafunctions over generic logic
trunk/rtl/ooops_defs.v Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/rtl/if_stage.v =================================================================== --- trunk/rtl/if_stage.v (nonexistent) +++ trunk/rtl/if_stage.v (revision 2) @@ -0,0 +1,152 @@ +////////////////////////////////////////////////////////////////// +// // +// OoOPs Core Instruction Fetch module // +// // +// This file is part of the OoOPs project // +// http://www.opencores.org/project,oops // +// // +// Description: // +// Handles updating Program Counter and fetching instructions // +// from the Instruction Cache. // +// // +// Author(s): // +// - Joshua Smith, smjoshua@umich.edu // +// // +////////////////////////////////////////////////////////////////// +// // +// Copyright (C) 2012 Authors and OPENCORES.ORG // +// // +// This source file may be used and distributed without // +// restriction provided that this copyright statement is not // +// removed from the file and that any derivative work contains // +// the original copyright notice and the associated disclaimer. // +// // +// This source file is free software; you can redistribute it // +// and/or modify it under the terms of the GNU Lesser General // +// Public License as published by the Free Software Foundation; // +// either version 2.1 of the License, or (at your option) any // +// later version. // +// // +// This source is distributed in the hope that it will be // +// useful, but WITHOUT ANY WARRANTY; without even the implied // +// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // +// PURPOSE. See the GNU Lesser General Public License for more // +// details. // +// // +// You should have received a copy of the GNU Lesser General // +// Public License along with this source; if not, download it // +// from http://www.opencores.org/lgpl.shtml // +// // +////////////////////////////////////////////////////////////////// +`include "ooops_defs.v" + +module if_stage ( + input wire clk, + input wire rst, + + // Flush/stall interfaces + input wire rob_pipe_flush, + input wire [`ADDR_SZ-1:0] rob_flush_target, + input wire ds_stall, + + // Instruction cache interface + output wire if_ic_req, + output wire [`ADDR_SZ-1:0] if_ic_fpc, + output wire [`ADDR_SZ-1:0] r_if_ic_fpc, + input wire [`INSTR_SZ-1:0] ic_if_data, + input wire ic_if_data_valid, + input wire ic_if_ready, + + // Interface to ID stage + output wire if_id_valid, + output wire [`INSTR_SZ-1:0] if_id_instr, + output wire [`ADDR_SZ-1:0] if_id_fpc, + output wire [`BP_SZ-1:0] if_id_bprd_info + ); + + // Internal wires/regs + wire if_stall; + wire if_valid; + wire [`ADDR_SZ-1:0] if_fpc; // Current fetch pc + wire [`ADDR_SZ-1:0] r_if_fpc; // flopped FPC + wire [`ADDR_SZ-1:0] r_if_fpc_in; + wire [`INSTR_SZ-1:0] if_instr; + wire [`BP_SZ-1:0] if_bprd_info; + wire if_br_predict_valid; + wire if_br_predict_taken; + wire [`ADDR_SZ-1:0] if_br_predict_target; + + // Note that Icache will have 1 cycle latency, so we won't know if it's a miss until + // one cycle later. Since we don't want to have to wait to figure out if it's a hit + // before we increment the FPC (want to be optimistic), we'll have to be able to reset + // the FPC if it's a miss. + assign if_ic_req = !if_stall; + assign if_ic_fpc = if_fpc; + assign r_if_ic_fpc = r_if_fpc; + assign if_valid = ic_if_data_valid & !rob_pipe_flush; + assign if_instr = ic_if_data; + + // Handle the FPC generation + wire [`ADDR_SZ-1:0] if_fpc_p4 = if_fpc + `ADDR_SZ'h4; + reg [`ADDR_SZ-1:0] if_fpc_in; + always @* begin + casez({rob_pipe_flush, if_br_predict_taken, !ic_if_ready}) + 3'b1??: if_fpc_in = rob_flush_target; // Flush target + 3'b01?: if_fpc_in = if_br_predict_target; // Taken branch target + 3'b001: if_fpc_in = r_if_fpc; // Previous FPC + default: if_fpc_in = if_fpc_p4; // Next incrmented FPC + endcase + end + + MDFFLR #(`ADDR_SZ) if_fpc_ff (clk, rst, if_ic_req, `RESET_ADDR, if_fpc_in, if_fpc); + + // Flop Icache request signals so we can re-request if it ends up being a miss + assign r_if_fpc_in = rob_pipe_flush ? rob_flush_target : if_ic_fpc; + MDFFR #(`ADDR_SZ) r_if_fpc_ff (clk, rst, `RESET_ADDR, if_fpc, r_if_fpc); + + // Handle branch prediction + // TODO: throw in branch prediction + // Note: Try to identify jumps and other unconditional branches here, for quick recovery + `ifdef DYN_BPRD + assign if_br_predict_valid = 1'b0; + assign if_br_predict_taken = 1'b0; + assign if_br_predict_target = {`ADDR_SZ{1'b0}}; + assign if_bprd_info = {if_br_predict_target, if_br_predict_taken, if_br_predict_valid}; + `else + // tie-offs should optimize logic away + assign if_br_predict_valid = 1'b0; + assign if_br_predict_taken = 1'b0; + assign if_br_predict_target = {`ADDR_SZ{1'b0}}; + assign if_bprd_info = {if_br_predict_target, if_br_predict_taken, if_br_predict_valid}; + `endif + + `ifdef USE_IFB + wire ifb_full; + if_buffer ifb ( + .clk(clk), + .rst(rst), + .flush(rob_pipe_flush), + .if_valid(if_valid), + .if_instr(if_instr), + .if_fpc(if_fpc), + .if_bprd_info(if_bprd_info), + .if_ifb_pop_en(!ds_stall), + .ifb_full(ifb_full), + .if_id_valid(if_id_valid), + .if_id_instr(if_id_instr), + .if_id_fpc(if_id_fpc), + .if_id_bprd_info(if_id_bprd_info) + ); + + assign if_stall = ifb_full; // Only stall if IFB is full + `else + + MDFFLR #(1) if_id_valid_ff (clk, rst, !if_stall, 1'b0, if_valid, if_id_valid); + MDFFL #(`INSTR_SZ) if_id_instr_ff (clk, !if_stall, if_instr, if_id_instr); + MDFFL #(`ADDR_SZ) if_id_fpc_ff (clk, !if_stall, if_ic_fpc_q, if_id_fpc); + MDFFL #(`BP_SZ) if_id_bprd_info_ff (clk, if_valid, if_bprd_info, if_id_bprd_info); + + assign if_stall = if_id_valid & ds_stall; // Stall if we have a valid instruction going to ID and DS stalling + + `endif // USE_IFB +endmodule
trunk/rtl/if_stage.v Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/rtl/psel.v =================================================================== --- trunk/rtl/psel.v (nonexistent) +++ trunk/rtl/psel.v (revision 2) @@ -0,0 +1,19 @@ +// Generic priority selector module +module psel (req, gnt); + //synopsys template + parameter WIDTH=8; + input wire [WIDTH-1:0] req; + output wire [WIDTH-1:0] gnt; + + //priority selector + genvar i; + generate + for(i = WIDTH-1; i>0; i=i-1) + begin: sel + assign gnt[i] = req[i] & ~(|req[i-1:0]); + end + + assign gnt[0] = req[0]; + endgenerate + +endmodule Index: trunk/rtl/map_table.v =================================================================== --- trunk/rtl/map_table.v (nonexistent) +++ trunk/rtl/map_table.v (revision 2) @@ -0,0 +1,350 @@ +////////////////////////////////////////////////////////////////// +// // +// OoOPs Core Register Map Table module // +// // +// This file is part of the OoOPs project // +// http://www.opencores.org/project,oops // +// // +// Description: // +// The Map Table is responsible for maintaining the mapping // +// from architectural->physical registers. This block // +// consists of a free list for allocating new physical // +// registers and also the tables for mapping source operands. // +// // +// To avoid excessive flop usage for the map tables, block rams// +// will be used instead. // +// // +// Author(s): // +// - Joshua Smith, smjoshua@umich.edu // +// // +////////////////////////////////////////////////////////////////// +// // +// Copyright (C) 2012 Authors and OPENCORES.ORG // +// // +// This source file may be used and distributed without // +// restriction provided that this copyright statement is not // +// removed from the file and that any derivative work contains // +// the original copyright notice and the associated disclaimer. // +// // +// This source file is free software; you can redistribute it // +// and/or modify it under the terms of the GNU Lesser General // +// Public License as published by the Free Software Foundation; // +// either version 2.1 of the License, or (at your option) any // +// later version. // +// // +// This source is distributed in the hope that it will be // +// useful, but WITHOUT ANY WARRANTY; without even the implied // +// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // +// PURPOSE. See the GNU Lesser General Public License for more // +// details. // +// // +// You should have received a copy of the GNU Lesser General // +// Public License along with this source; if not, download it // +// from http://www.opencores.org/lgpl.shtml // +// // +////////////////////////////////////////////////////////////////// +`include "ooops_defs.v" + +module map_table ( + input wire clk, + input wire rst, + output wire map_table_init, + + // Rename port + input wire ds1_valid, + input wire [`REG_IDX_SZ-1:0] ds1_src1_idx, + input wire [`REG_IDX_SZ-1:0] ds1_src2_idx, + input wire [`REG_IDX_SZ-1:0] ds1_dest_idx, + input wire ds1_dest_wr, + input wire ds1_type_br, + output wire [`TAG_SZ-1:0] ds2_src1_tag, + output wire [`TAG_SZ-1:0] ds2_src2_tag, + output wire ds2_src1_valid, + output wire ds2_src2_valid, + output wire [`TAG_SZ-1:0] ds2_dest_tag, + output wire [`TAG_SZ-1:0] ds2_dest_tag_old, + output wire [`FL_PTR_SZ-1:0] ds2_fl_head_ptr, + output wire [`CHKPT_PTR_SZ-1:0] ds2_chkpt_ptr, + + // Writeback port + //input wire [`CDB_BUS_SZ-1:0] ex_cdb_bus, + + // Retire and flush port + input wire rob_pipe_flush, + input wire rob_ds_ret_valid, + input wire rob_ds_ret_dest_write, + input wire [`CHKPT_PTR_SZ-1:0] rob_ds_chkpt_ptr, + input wire [`FL_PTR_SZ-1:0] rob_ds_fl_head_ptr, + input wire rob_ds_ret_chkpt_free, + input wire [`REG_IDX_SZ-1:0] rob_ds_ret_idx, + input wire [`TAG_SZ-1:0] rob_ds_ret_tag, + input wire [`TAG_SZ-1:0] rob_ds_ret_tag_old + ); + + // Internal wires and regs + wire [`TAG_SZ-1:0] ds1_dest_tag; + wire [`FL_PTR_SZ-1:0] ds1_fl_head_ptr; + wire [`ARCH_REGS-1:0] dfa_dirty_bit [`CHKPT_NUM-1:0]; + wire [`ARCH_REGS-1:0] dfa_dirty_bit_in [`CHKPT_NUM-1:0]; + wire [`CHKPT_NUM-1:0] dfa_dirty_bit_ld; // load is per checkpoint/column + wire [`CHKPT_NUM-1:0] dfa_dirty_bit_row [`ARCH_REGS-1:0]; + wire [`CHKPT_PTR_SZ-1:0] ds1_src1_chkpt, ds1_src2_chkpt, ds1_dest_chkpt; + + wire [`CHKPT_PTR_SZ-1:0] chkpt_head_ptr; + wire [`CHKPT_PTR_SZ-1:0] chkpt_head_ptr_p1; + wire [`CHKPT_PTR_SZ-1:0] chkpt_tail_ptr; + wire [`CHKPT_PTR_SZ-1:0] chkpt_tail_ptr_p1; + wire [`CHKPT_NUM-1:0] chkpt_valid_mask; + wire [`CHKPT_NUM-1:0] chkpt_valid_mask_in; + wire chkpt_allocate; + + genvar g,k; + + // Instantiate free list + free_list fl ( + .clk(clk), + .rst(rst), + .ds1_dest_wr(ds1_dest_wr), + .rob_pipe_flush(rob_pipe_flush), + .rob_ds_fl_head_ptr(rob_ds_fl_head_ptr), + .rob_ds_ret_valid(rob_ds_ret_valid), + .rob_ds_ret_dest_write(rob_ds_ret_dest_write), + .rob_ds_ret_tag_old(rob_ds_ret_tag_old), + .ds1_dest_tag(ds1_dest_tag), + .ds1_fl_head_ptr(ds1_fl_head_ptr) + ); + + // Maintain the checkpoint head/tail pointers + // Operation: + // - Upon a pipe flush, restore both head and tail pointers to same pointer value from the ROB. + // - When a new checkpoint is allocated, advance head pointer by 1 + // - When an instruction which allocated a checkpoint retires, advance tail pointer by 1. + assign chkpt_allocate = ds1_type_br & ~rob_pipe_flush; + wire [`CHKPT_PTR_SZ-1:0] chkpt_head_ptr_in = rob_pipe_flush ? rob_ds_chkpt_ptr : chkpt_head_ptr_p1; + wire [`CHKPT_PTR_SZ-1:0] chkpt_tail_ptr_in = rob_pipe_flush ? rob_ds_chkpt_ptr : chkpt_tail_ptr_p1; + wire chkpt_head_ptr_ld = rob_pipe_flush | chkpt_allocate; + wire chkpt_tail_ptr_ld = rob_pipe_flush | (rob_ds_ret_valid & rob_ds_ret_chkpt_free); + + MDFFLR #(`CHKPT_PTR_SZ) chkpt_head_ptr_ff (clk, rst, chkpt_head_ptr_ld, `CHKPT_PTR_SZ'h0, chkpt_head_ptr_in, chkpt_head_ptr); + MDFFLR #(`CHKPT_PTR_SZ) chkpt_tail_ptr_ff (clk, rst, chkpt_tail_ptr_ld, `CHKPT_PTR_SZ'h0, chkpt_tail_ptr_in, chkpt_tail_ptr); + + assign chkpt_head_ptr_p1 = (chkpt_head_ptr == `CHKPT_NUM-1) ? `CHKPT_PTR_SZ'h0 : chkpt_head_ptr + `CHKPT_PTR_SZ'h1; + assign chkpt_tail_ptr_p1 = (chkpt_tail_ptr == `CHKPT_NUM-1) ? `CHKPT_PTR_SZ'h0 : chkpt_tail_ptr + `CHKPT_PTR_SZ'h1; + + // Keep a bit-vector mask of valid (allocated) checkpoints for the DFA search + // Initialize checkpoint 0 to valid, this will be the checkpoint used out of reset. + wire [`CHKPT_NUM-1:0] allocated_chkpt = (`CHKPT_NUM'h1 << chkpt_head_ptr_p1) & {`CHKPT_NUM{chkpt_allocate}}; + wire [`CHKPT_NUM-1:0] freed_chkpt = (`CHKPT_NUM'h1 << chkpt_tail_ptr) & {`CHKPT_NUM{rob_ds_ret_chkpt_free}}; + wire [`CHKPT_NUM-1:0] rob_ds_chkpt_vec = (`CHKPT_NUM'h1 << rob_ds_chkpt_ptr); + assign chkpt_valid_mask_in = rob_pipe_flush ? rob_ds_chkpt_vec : ((chkpt_valid_mask | allocated_chkpt) & ~freed_chkpt); + MDFFLR #(`CHKPT_NUM) chkpt_valid_mask_ff (clk, rst, chkpt_head_ptr_ld | chkpt_tail_ptr_ld, `CHKPT_NUM'h1, chkpt_valid_mask_in, chkpt_valid_mask); + + /* + Handle the DFA (Dirty Flag Array) for determining which checkpoint contains the + most recent mapping for an architectural register. This is needed to setup the + SRAM address input for the RAT lookup. + + Structure: + - Maintain a grid of bits (one row for each arch. reg, one column for each checkpoint). + - Head/Tail pointer keep track of most recently/least recently allocated valid checkpoints. + + Operation: + - When a new checkpoint is allocated for a branch or speculation point, we advance + the head pointer and clear the entire DFA column for that checkpoint. For branches which + write a register, the write should update the old checkpoint, not the newly allocated one. + - When a register write operation comes through, update the row of the head checkpoint + corresponding to the destination architectural register index. + + */ + wire [`CHKPT_NUM-1:0] dfa_column_clear = allocated_chkpt; + wire [`CHKPT_NUM-1:0] ds1_active_chkpt = (`CHKPT_NUM'h1 << chkpt_head_ptr); + assign dfa_dirty_bit_ld = dfa_column_clear | // Clear newly allocated checkpoint + (ds1_active_chkpt & {`CHKPT_NUM{ds1_dest_wr}}); // Update current checkpoint + + wire [`ARCH_REGS-1:0] ds1_dest_idx_vec = (1 << ds1_dest_idx); + generate + for (g=0; g<`CHKPT_NUM; g=g+1) begin : dfa_gen + for (k=0; k<`ARCH_REGS; k=k+1) begin : dfa_dirty_bit_gen + assign dfa_dirty_bit_in[g][k] = ~dfa_column_clear[g] & ((ds1_dest_idx_vec[k] & ds1_dest_wr) ? 1'b1 : dfa_dirty_bit[g][k]); + + MDFFLR #(1) dfa_dirty_bit_ff (clk, rst, dfa_dirty_bit_ld[g], 1'b0, dfa_dirty_bit_in[g][k], dfa_dirty_bit[g][k]); + + // generate a "row" version as well + assign dfa_dirty_bit_row[k][g] = dfa_dirty_bit[g][k] & chkpt_valid_mask[g]; + end + end + endgenerate + + // Determine which checkpoint contains the most recent mapping for each source and the destination + // TODO: For now, assume 4 checkpoints. Find a nice way to make this general. + assign ds1_src1_chkpt = (chkpt_head_ptr == 2'h0) ? (dfa_dirty_bit_row[ds1_src1_idx][0] ? 2'h0 : + dfa_dirty_bit_row[ds1_src1_idx][1] ? 2'h1 : + dfa_dirty_bit_row[ds1_src1_idx][2] ? 2'h2 : 2'h3) : + (chkpt_head_ptr == 2'h1) ? (dfa_dirty_bit_row[ds1_src1_idx][1] ? 2'h1 : + dfa_dirty_bit_row[ds1_src1_idx][2] ? 2'h2 : + dfa_dirty_bit_row[ds1_src1_idx][3] ? 2'h3 : 2'h0) : + (chkpt_head_ptr == 2'h2) ? (dfa_dirty_bit_row[ds1_src1_idx][2] ? 2'h2 : + dfa_dirty_bit_row[ds1_src1_idx][3] ? 2'h3 : + dfa_dirty_bit_row[ds1_src1_idx][0] ? 2'h0 : 2'h1) : + (dfa_dirty_bit_row[ds1_src1_idx][3] ? 2'h3 : + dfa_dirty_bit_row[ds1_src1_idx][0] ? 2'h0 : + dfa_dirty_bit_row[ds1_src1_idx][1] ? 2'h1 : 2'h2); + assign ds1_src2_chkpt = (chkpt_head_ptr == 2'h0) ? (dfa_dirty_bit_row[ds1_src2_idx][0] ? 2'h0 : + dfa_dirty_bit_row[ds1_src2_idx][1] ? 2'h1 : + dfa_dirty_bit_row[ds1_src2_idx][2] ? 2'h2 : 2'h3) : + (chkpt_head_ptr == 2'h1) ? (dfa_dirty_bit_row[ds1_src2_idx][1] ? 2'h1 : + dfa_dirty_bit_row[ds1_src2_idx][2] ? 2'h2 : + dfa_dirty_bit_row[ds1_src2_idx][3] ? 2'h3 : 2'h0) : + (chkpt_head_ptr == 2'h2) ? (dfa_dirty_bit_row[ds1_src2_idx][2] ? 2'h2 : + dfa_dirty_bit_row[ds1_src2_idx][3] ? 2'h3 : + dfa_dirty_bit_row[ds1_src2_idx][0] ? 2'h0 : 2'h1) : + (dfa_dirty_bit_row[ds1_src2_idx][3] ? 2'h3 : + dfa_dirty_bit_row[ds1_src2_idx][0] ? 2'h0 : + dfa_dirty_bit_row[ds1_src2_idx][1] ? 2'h1 : 2'h2); + assign ds1_dest_chkpt = (chkpt_head_ptr == 2'h0) ? (dfa_dirty_bit_row[ds1_dest_idx][0] ? 2'h0 : + dfa_dirty_bit_row[ds1_dest_idx][1] ? 2'h1 : + dfa_dirty_bit_row[ds1_dest_idx][2] ? 2'h2 : 2'h3) : + (chkpt_head_ptr == 2'h1) ? (dfa_dirty_bit_row[ds1_dest_idx][1] ? 2'h1 : + dfa_dirty_bit_row[ds1_dest_idx][2] ? 2'h2 : + dfa_dirty_bit_row[ds1_dest_idx][3] ? 2'h3 : 2'h0) : + (chkpt_head_ptr == 2'h2) ? (dfa_dirty_bit_row[ds1_dest_idx][2] ? 2'h2 : + dfa_dirty_bit_row[ds1_dest_idx][3] ? 2'h3 : + dfa_dirty_bit_row[ds1_dest_idx][0] ? 2'h0 : 2'h1) : + (dfa_dirty_bit_row[ds1_dest_idx][3] ? 2'h3 : + dfa_dirty_bit_row[ds1_dest_idx][0] ? 2'h0 : + dfa_dirty_bit_row[ds1_dest_idx][1] ? 2'h1 : 2'h2); + + // If no dirty bit set for any of the valid checkpoints, then committed copy must have latest mapping + wire ds1_src1_use_rrat = ~(|dfa_dirty_bit_row[ds1_src1_idx]); + wire ds1_src2_use_rrat = ~(|dfa_dirty_bit_row[ds1_src2_idx]); + wire ds1_dest_use_rrat = ~(|dfa_dirty_bit_row[ds1_dest_idx]); + + // Generate the RAT SRAM read/write addresses and controls + // Note: since tables are SRAM-based, we need to initialize the RRAT so that + // registers are mapped correctly out of reset + wire [`REG_IDX_SZ-1:0] map_table_init_ctr, map_table_init_ctr_in; + wire map_table_init_in = map_table_init & (map_table_init_ctr != `ARCH_REGS); + MDFFR #(1) map_table_init_ff (clk, rst, 1'b1, map_table_init_in, map_table_init); + + assign map_table_init_ctr_in = map_table_init_ctr + `REG_IDX_SZ'h1; + MDFFLR #(`REG_IDX_SZ) map_table_init_ctr_ff (clk, rst, map_table_init, `REG_IDX_SZ'h0, map_table_init_ctr_in, map_table_init_ctr); + + wire [`REG_IDX_SZ+`CHKPT_PTR_SZ-1:0] ds1_rat_src1_rd_addr = {ds1_src1_idx,ds1_src1_chkpt}; + wire [`REG_IDX_SZ+`CHKPT_PTR_SZ-1:0] ds1_rat_src2_rd_addr = {ds1_src2_idx,ds1_src2_chkpt}; + wire [`REG_IDX_SZ+`CHKPT_PTR_SZ-1:0] ds1_rat_dest_rd_addr = {ds1_dest_idx,ds1_dest_chkpt}; + + // Writes need to come from DS2 stage in case we read and write the same arch. register + wire ds2_rat_wren, ds2_rat_wren_in; + wire [`REG_IDX_SZ+`CHKPT_PTR_SZ-1:0] ds2_rat_wr_addr, ds2_rat_wr_addr_in; + wire [`TAG_SZ-1:0] ds2_rat_wr_data; + + assign ds2_rat_wren_in = ds1_dest_wr; + assign ds2_rat_wr_addr_in = {ds1_dest_idx,chkpt_head_ptr}; + MDFFR #(1) ds2_rat_wren_ff (clk, rst, 1'b0, ds2_rat_wren_in, ds2_rat_wren); + MDFFR #(`REG_IDX_SZ+`CHKPT_PTR_SZ) ds2_rat_wr_addr_ff (clk, rst, 1'b0, ds2_rat_wr_addr_in, ds2_rat_wr_addr); + assign ds2_rat_wr_data = ds2_dest_tag; + + wire [`TAG_SZ-1:0] ds2_rat_src1_rd_data, ds2_rrat_src1_rd_data; + wire [`TAG_SZ-1:0] ds2_rat_src2_rd_data, ds2_rrat_src2_rd_data; + wire [`TAG_SZ-1:0] ds2_rat_dest_rd_data, ds2_rrat_dest_rd_data; + + wire [`REG_IDX_SZ-1:0] ds_rrat_wr_addr = map_table_init ? map_table_init_ctr : rob_ds_ret_idx; + wire [`TAG_SZ-1:0] ds_rrat_wr_data = map_table_init ? map_table_init_ctr : rob_ds_ret_tag; + wire ds_rrat_wren = map_table_init | rob_ds_ret_valid & rob_ds_ret_dest_write; + + // Instantiate RAT SRAM blocks + // Note that we need 3 copies for the required 3 read ports (2 source operand tag reads, 1 previous dest tag read) + // Read copy 1 + dp_sram #(.DW(`TAG_SZ), .IW(`REG_IDX_SZ+`CHKPT_PTR_SZ)) rat0 ( + .clk(clk), + .a_addr(ds1_rat_src1_rd_addr), // Read port + .a_dout(ds2_rat_src1_rd_data), + + .b_addr(ds2_rat_wr_addr), // Write port + .b_wren(ds2_rat_wren), + .b_din(ds2_rat_wr_data) + ); + + // Read copy 2 + dp_sram #(.DW(`TAG_SZ), .IW(`REG_IDX_SZ+`CHKPT_PTR_SZ)) rat1 ( + .clk(clk), + .a_addr(ds1_rat_src2_rd_addr), // Read port + .a_dout(ds2_rat_src2_rd_data), + + .b_addr(ds2_rat_wr_addr), // Write port + .b_wren(ds2_rat_wren), + .b_din(ds2_rat_wr_data) + ); + + // Write copy 1 + dp_sram #(.DW(`TAG_SZ), .IW(`REG_IDX_SZ+`CHKPT_PTR_SZ)) rat2 ( + .clk(clk), + .a_addr(ds1_rat_dest_rd_addr), // Read port + .a_dout(ds2_rat_dest_rd_data), + + .b_addr(ds2_rat_wr_addr), // Write port + .b_wren(ds2_rat_wren), + .b_din(ds2_rat_wr_data) + ); + + // Instantiate tables for the committed RAT copies + dp_sram #(.DW(`TAG_SZ), .IW(`REG_IDX_SZ)) rrat0 ( + .clk(clk), + .a_addr(ds1_src1_idx), // Read port + .a_dout(ds2_rrat_src1_rd_data), + + .b_addr(ds_rrat_wr_addr), // Write port (controlled by retire) + .b_wren(ds_rrat_wren), + .b_din(ds_rrat_wr_data) + ); + dp_sram #(.DW(`TAG_SZ), .IW(`REG_IDX_SZ)) rrat1 ( + .clk(clk), + .a_addr(ds1_src2_idx), // Read port + .a_dout(ds2_rrat_src2_rd_data), + + .b_addr(ds_rrat_wr_addr), // Write port (controlled by retire) + .b_wren(ds_rrat_wren), + .b_din(ds_rrat_wr_data) + ); + dp_sram #(.DW(`TAG_SZ), .IW(`REG_IDX_SZ)) rrat2 ( + .clk(clk), + .a_addr(ds1_dest_idx), // Read port + .a_dout(ds2_rrat_dest_rd_data), + + .b_addr(ds_rrat_wr_addr), // Write port (controlled by retire) + .b_wren(ds_rrat_wren), + .b_din(ds_rrat_wr_data) + ); + + // Since writes to map tables occur in DS2 stage, need to detect forwarding from previous instructions + wire ds1_src1_wr_fwd = (ds1_src1_idx == ds2_rat_wr_addr[`REG_IDX_SZ+`CHKPT_PTR_SZ-1:`CHKPT_PTR_SZ]) & ds2_rat_wren; + wire ds1_src2_wr_fwd = (ds1_src2_idx == ds2_rat_wr_addr[`REG_IDX_SZ+`CHKPT_PTR_SZ-1:`CHKPT_PTR_SZ]) & ds2_rat_wren; + wire ds1_dest_wr_fwd = (ds1_dest_idx == ds2_rat_wr_addr[`REG_IDX_SZ+`CHKPT_PTR_SZ-1:`CHKPT_PTR_SZ]) & ds2_rat_wren; + wire ds2_src1_wr_fwd, ds2_src2_wr_fwd, ds2_dest_wr_fwd; + MDFFR #(1) ds2_src1_wr_fwd_ff (clk, rst, 1'b0, ds1_src1_wr_fwd, ds2_src1_wr_fwd); + MDFFR #(1) ds2_src2_wr_fwd_ff (clk, rst, 1'b0, ds1_src2_wr_fwd, ds2_src2_wr_fwd); + MDFFR #(1) ds2_dest_wr_fwd_ff (clk, rst, 1'b0, ds1_dest_wr_fwd, ds2_dest_wr_fwd); + + wire [`TAG_SZ-1:0] r_ds2_rat_wr_data; + wire r_ds2_wr_data_ld = ds2_rat_wren & (ds1_src1_wr_fwd | ds1_src2_wr_fwd | ds1_dest_wr_fwd); + MDFFL #(`TAG_SZ) r_ds2_rat_wr_data_ff (clk, r_ds2_wr_data_ld, ds2_rat_wr_data, r_ds2_rat_wr_data); + + // Generate DS2 stage outputs + // Mux between RRAT and RAT outputs + MDFFL #(`CHKPT_PTR_SZ) ds2_chkpt_ptr_ff (clk, ds1_valid, chkpt_head_ptr, ds2_chkpt_ptr); + MDFFL #(`FL_PTR_SZ) ds2_fl_head_ptr_ff (clk, ds1_valid, ds1_fl_head_ptr, ds2_fl_head_ptr); + + wire ds2_src1_use_rrat, ds2_src2_use_rrat, ds2_dest_use_rrat; + MDFFLR #(1) ds2_src1_use_rrat_ff (clk, rst, ds1_valid, 1'b0, ds1_src1_use_rrat, ds2_src1_use_rrat); + MDFFLR #(1) ds2_src2_use_rrat_ff (clk, rst, ds1_valid, 1'b0, ds1_src2_use_rrat, ds2_src2_use_rrat); + MDFFLR #(1) ds2_dest_use_rrat_ff (clk, rst, ds1_valid, 1'b0, ds1_dest_use_rrat, ds2_dest_use_rrat); + MDFFL #(`TAG_SZ) ds2_dest_tag_ff (clk, ds1_valid, ds1_dest_tag, ds2_dest_tag); + + assign ds2_src1_tag = ds2_src1_wr_fwd ? r_ds2_rat_wr_data : ds2_src1_use_rrat ? ds2_rrat_src1_rd_data : ds2_rat_src1_rd_data; + assign ds2_src2_tag = ds2_src2_wr_fwd ? r_ds2_rat_wr_data : ds2_src2_use_rrat ? ds2_rrat_src2_rd_data : ds2_rat_src2_rd_data; + assign ds2_dest_tag_old = ds2_dest_wr_fwd ? r_ds2_rat_wr_data : ds2_dest_use_rrat ? ds2_rrat_dest_rd_data : ds2_rat_dest_rd_data; + + + + +endmodule Index: trunk/rtl/dp_sram.v =================================================================== --- trunk/rtl/dp_sram.v (nonexistent) +++ trunk/rtl/dp_sram.v (revision 2) @@ -0,0 +1,47 @@ +/* + Josh Smith + + File: dp_sram.v + Description: Module for SRAM slice. This is written + as a generic dual-port SRAM, so should be inferred as SRAM by + tool. +*/ + +`include "ooops_defs.v" + +module dp_sram + #(parameter DW = `DATA_SZ, + parameter IW = `TAG_SZ, + parameter ENTRIES = (1 << IW) + ) + ( + input wire clk, + + // Port A + input wire [IW-1:0] a_addr, + output wire [DW-1:0] a_dout, + + // Port B + input wire [IW-1:0] b_addr, + input wire b_wren, + input wire [DW-1:0] b_din + ); + + reg [DW-1:0] rf_data [ENTRIES-1:0]; + reg [IW-1:0] a_addr_q; + reg [IW-1:0] b_addr_q; + + // Port A + always @(posedge clk) begin + a_addr_q <= `SD a_addr; + end + assign a_dout = rf_data[a_addr_q]; + + // Port B + always @(posedge clk) begin + if (b_wren) begin + rf_data[b_addr] <= `SD b_din; + end + end +endmodule +
trunk/rtl/dp_sram.v Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/rtl/ooops_lib.v =================================================================== --- trunk/rtl/ooops_lib.v (nonexistent) +++ trunk/rtl/ooops_lib.v (revision 2) @@ -0,0 +1,98 @@ + +////////////////////////////////////////////////////////////////// +// // +// OoOPs common module library // +// // +// This file is part of the OoOPs project // +// http://www.opencores.org/project,oops // +// // +// Description: // +// Basic library of common blocks such as different types of // +// flops, etc... // +// // +// Author(s): // +// - Joshua Smith, smjoshua@umich.edu // +// // +////////////////////////////////////////////////////////////////// +// // +// Copyright (C) 2012 Authors and OPENCORES.ORG // +// // +// This source file may be used and distributed without // +// restriction provided that this copyright statement is not // +// removed from the file and that any derivative work contains // +// the original copyright notice and the associated disclaimer. // +// // +// This source file is free software; you can redistribute it // +// and/or modify it under the terms of the GNU Lesser General // +// Public License as published by the Free Software Foundation; // +// either version 2.1 of the License, or (at your option) any // +// later version. // +// // +// This source is distributed in the hope that it will be // +// useful, but WITHOUT ANY WARRANTY; without even the implied // +// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // +// PURPOSE. See the GNU Lesser General Public License for more // +// details. // +// // +// You should have received a copy of the GNU Lesser General // +// Public License along with this source; if not, download it // +// from http://www.opencores.org/lgpl.shtml // +// // +////////////////////////////////////////////////////////////////// +`include "ooops_defs.v" + +// Regular DFF +module MDFF #(parameter DW = 1) ( + input wire clk, + input wire [DW-1:0] din, + output reg [DW-1:0] dout + ); + + always @(posedge clk) + dout <= `SD din; + +endmodule + +// Loadable DFF +module MDFFL #(parameter DW = 1) ( + input wire clk, + input wire ld, + input wire [DW-1:0] din, + output reg [DW-1:0] dout + ); + + always @(posedge clk) + if (ld) dout <= `SD din; + +endmodule + +// Resetable DFF +module MDFFR #(parameter DW = 1) ( + input wire clk, + input wire rst, + input wire [DW-1:0] rst_din, + input wire [DW-1:0] din, + output reg [DW-1:0] dout + ); + + always @(posedge clk) + if (rst) dout <= `SD rst_din; + else dout <= `SD din; + +endmodule + +// Loadable, resetable DFF +module MDFFLR #(parameter DW = 1) ( + input wire clk, + input wire rst, + input wire ld, + input wire [DW-1:0] rst_din, + input wire [DW-1:0] din, + output reg [DW-1:0] dout + ); + + always @(posedge clk) + if (rst) dout <= `SD rst_din; + else if (ld) dout <= `SD din; + +endmodule
trunk/rtl/ooops_lib.v Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/rtl/if_buffer.v =================================================================== --- trunk/rtl/if_buffer.v (nonexistent) +++ trunk/rtl/if_buffer.v (revision 2) @@ -0,0 +1,119 @@ +////////////////////////////////////////////////////////////////// +// // +// OoOPs Core Instruction Fetch Buffer module // +// // +// This file is part of the OoOPs project // +// http://www.opencores.org/project,oops // +// // +// Description: // +// Buffer for fetched instructions to help reduce penalty of // +// cache misses during stall cycles. // +// // +// Author(s): // +// - Joshua Smith, smjoshua@umich.edu // +// // +////////////////////////////////////////////////////////////////// +// // +// Copyright (C) 2012 Authors and OPENCORES.ORG // +// // +// This source file may be used and distributed without // +// restriction provided that this copyright statement is not // +// removed from the file and that any derivative work contains // +// the original copyright notice and the associated disclaimer. // +// // +// This source file is free software; you can redistribute it // +// and/or modify it under the terms of the GNU Lesser General // +// Public License as published by the Free Software Foundation; // +// either version 2.1 of the License, or (at your option) any // +// later version. // +// // +// This source is distributed in the hope that it will be // +// useful, but WITHOUT ANY WARRANTY; without even the implied // +// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // +// PURPOSE. See the GNU Lesser General Public License for more // +// details. // +// // +// You should have received a copy of the GNU Lesser General // +// Public License along with this source; if not, download it // +// from http://www.opencores.org/lgpl.shtml // +// // +////////////////////////////////////////////////////////////////// +`include "ooops_defs.v" + +module if_buffer ( + input wire clk, + input wire rst, + input wire flush, + + // Write interface + input wire if_valid, + input wire [`INSTR_SZ-1:0] if_instr, + input wire [`ADDR_SZ-1:0] if_fpc, + input wire [`BP_SZ-1:0] if_bprd_info, + + // Read interface + input wire if_ifb_pop_en, + output wire ifb_full, + output wire if_id_valid, + output wire [`INSTR_SZ-1:0] if_id_instr, + output wire [`ADDR_SZ-1:0] if_id_fpc, + output wire [`BP_SZ-1:0] if_id_bprd_info + ); + + // Local wires + wire [`IFB_PTR_SZ:0] ifb_rd_ptr, ifb_rd_ptr_in; + wire [`IFB_PTR_SZ:0] ifb_wr_ptr, ifb_wr_ptr_in; + wire [`IFB_ENTRIES-1:0] ifb_rd_ptr_vec; // 1-hot vector for reading + wire ifb_empty; + wire ifb_push; + wire ifb_pop; + wire [`IFB_PTR_SZ:0] ifb_valid_counter, ifb_valid_counter_in; + wire ifb_valid_counter_ld; + + wire [`IFB_ENTRY_SZ-1:0] ifb_entry [`IFB_ENTRIES-1:0]; + wire [`IFB_ENTRY_SZ-1:0] ifb_entry_in; + wire [`IFB_ENTRIES-1:0] ifb_entry_ld; + reg [`IFB_ENTRY_SZ-1:0] ifb_rd_entry; + + // Handle muxing outputs + integer i; + always @* begin + ifb_rd_entry = {`IFB_ENTRY_SZ{1'b0}}; + for (i=0; i<`IFB_ENTRIES; i=i+1) begin + ifb_rd_entry = ifb_rd_entry | ({`IFB_ENTRY_SZ{ifb_rd_ptr_vec[i]}} & ifb_entry[i]); + end + end + assign if_id_valid = !ifb_empty; + assign {if_id_instr,if_id_fpc,if_id_bprd_info} = ifb_rd_entry; + + // Handle updating the read and write pointers + assign ifb_push = if_valid & !ifb_full; + assign ifb_pop = if_ifb_pop_en & !ifb_empty; + assign ifb_wr_ptr_in = ((ifb_wr_ptr==`IFB_ENTRIES) | flush) ? {`IFB_PTR_SZ+1{1'b0}} : ifb_wr_ptr + 1; + assign ifb_rd_ptr_in = ((ifb_rd_ptr==`IFB_ENTRIES) | flush) ? {`IFB_PTR_SZ+1{1'b0}} : ifb_rd_ptr + 1; + + wire [`IFB_ENTRIES-1:0] ifb_rd_ptr_vec_in = (`IFB_ENTRIES'h1 << ifb_rd_ptr_in); + + wire ifb_wr_ptr_ld = ifb_push | flush; + wire ifb_rd_ptr_ld = ifb_pop | flush; + MDFFLR #(`IFB_PTR_SZ+1) ifb_wr_ptr_ff (clk, rst, ifb_wr_ptr_ld, {`IFB_PTR_SZ+1{1'b0}}, ifb_wr_ptr_in, ifb_wr_ptr); + MDFFLR #(`IFB_PTR_SZ+1) ifb_rd_ptr_ff (clk, rst, ifb_rd_ptr_ld, {`IFB_PTR_SZ+1{1'b0}}, ifb_rd_ptr_in, ifb_rd_ptr); + MDFFLR #(`IFB_ENTRIES) ifb_rd_ptr_vec_ff (clk, rst, ifb_rd_ptr_ld, `IFB_ENTRIES'h1, ifb_rd_ptr_vec_in, ifb_rd_ptr_vec); + + // Handle occupancy detection + wire ifb_full_in = (ifb_wr_ptr_in[`IFB_PTR_SZ] ^ ifb_rd_ptr_in[`IFB_PTR_SZ]) & (ifb_wr_ptr_in[`IFB_PTR_SZ-1:0]==ifb_rd_ptr_in[`IFB_PTR_SZ-1:0]); + wire ifb_empty_in = (ifb_wr_ptr_in[`IFB_PTR_SZ] ~^ ifb_rd_ptr_in[`IFB_PTR_SZ]) & (ifb_wr_ptr_in[`IFB_PTR_SZ-1:0]==ifb_rd_ptr_in[`IFB_PTR_SZ-1:0]); + MDFFR #(1) ifb_full_ff (clk, rst, 1'b0, ifb_full_in, ifb_full); + MDFFR #(1) ifb_empty_ff (clk, rst, 1'b1, ifb_empty_in, ifb_empty); + + // Instantiate flops for entries + assign ifb_entry_in = {if_valid,if_instr, if_fpc, if_bprd_info}; + genvar g; + generate + for (g=0; g<`IFB_ENTRIES; g=g+1) + begin : ifb_entry_gen + MDFFL #(`IFB_ENTRY_SZ) entry_ff (clk, ifb_entry_ld[g], ifb_entry_in, ifb_entry[g]); + end + endgenerate + +endmodule
trunk/rtl/if_buffer.v Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/rtl/ds_stage.v =================================================================== --- trunk/rtl/ds_stage.v (nonexistent) +++ trunk/rtl/ds_stage.v (revision 2) @@ -0,0 +1,223 @@ +////////////////////////////////////////////////////////////////// +// // +// OoOPs Core Instruction Dispatch module // +// // +// This file is part of the OoOPs project // +// http://www.opencores.org/project,oops // +// // +// Description: // +// Instruction dispatch block handles instruction register // +// renaming, and dependency checking, and dispatching // +// instructions to the ROB and appropriate Reservation Station.// +// Due to the structure of the map table, Dispatch is pipelined// +// into 2 stages: DS1 and DS2. // +// // +// DS1 stage will be for determining which checkpoint has the // +// latest valid mapping for a register, and for allocating the // +// destination physical register. // +// // +// DS2 stage will be for reading the map tables and dispatching// +// to the Reservation Stations. // +// // +// Author(s): // +// - Joshua Smith, smjoshua@umich.edu // +// // +////////////////////////////////////////////////////////////////// +// // +// Copyright (C) 2012 Authors and OPENCORES.ORG // +// // +// This source file may be used and distributed without // +// restriction provided that this copyright statement is not // +// removed from the file and that any derivative work contains // +// the original copyright notice and the associated disclaimer. // +// // +// This source file is free software; you can redistribute it // +// and/or modify it under the terms of the GNU Lesser General // +// Public License as published by the Free Software Foundation; // +// either version 2.1 of the License, or (at your option) any // +// later version. // +// // +// This source is distributed in the hope that it will be // +// useful, but WITHOUT ANY WARRANTY; without even the implied // +// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // +// PURPOSE. See the GNU Lesser General Public License for more // +// details. // +// // +// You should have received a copy of the GNU Lesser General // +// Public License along with this source; if not, download it // +// from http://www.opencores.org/lgpl.shtml // +// // +////////////////////////////////////////////////////////////////// +`include "ooops_defs.v" + +module ds_stage ( + input wire clk, + input wire rst, + + // Flush/stall interface + input wire rob_pipe_flush, + output wire ds_stall, + + // Interface to ID stage + input wire id_ds1_valid, + input wire [`ADDR_SZ-1:0] id_ds1_fpc, + input wire id_ds1_in_dly_slot, + input wire [`DEC_BUS_SZ-1:0] id_ds1_dec_bus, + input wire [`BP_SZ-1:0] id_ds1_bprd_info, + + // Interface to CDB (for tag monitoring) + input wire ex_cdb_valid, + input wire [`TAG_SZ-1:0] ex_cdb_tag, + input wire [`REG_IDX_SZ-1:0] ex_cdb_dest_idx, + + // Interface to ROB + input wire rob_ds_full, + input wire [`ROB_PTR_SZ-1:0] rob_ds_tail_ptr, + input wire [`CHKPT_PTR_SZ-1:0] rob_ds_chkpt_ptr, + input wire [`FL_PTR_SZ-1:0] rob_ds_fl_head_ptr, + input wire rob_ds_ret_valid, + input wire rob_ds_ret_dest_write, + input wire rob_ds_ret_chkpt_free, + input wire [`REG_IDX_SZ-1:0] rob_ds_ret_idx, + input wire [`TAG_SZ-1:0] rob_ds_ret_tag, + input wire [`TAG_SZ-1:0] rob_ds_ret_tag_old, + output wire ds2_rob_valid, + output wire [`ADDR_SZ-1:0] ds2_rob_fpc, + output wire ds2_rob_in_dly_slot, + output wire [`DEC_BUS_SZ-1:0] ds2_rob_dec_bus, + output wire [`REN_BUS_SZ-1:0] ds2_rob_ren_info, + output wire [`BP_SZ-1:0] ds2_rob_bprd_info, + output wire [`CHKPT_PTR_SZ-1:0] ds2_rob_chkpt_ptr, + output wire [`FL_PTR_SZ-1:0] ds2_rob_fl_head_ptr, + + // Interface to ALU RS + input wire rs_ds_alu_full, + output wire ds2_rs_alu_valid, + output wire [`ADDR_SZ-1:0] ds2_rs_alu_fpc, + output wire [`REN_BUS_SZ-1:0] ds2_rs_alu_ren_info, + output wire [`ALU_CTL_SZ-1:0] ds2_rs_alu_ctl, + output wire [`ROB_PTR_SZ-1:0] ds2_rs_alu_rob_ptr + + // Interface to MULT/DIV RS + //input wire rs_ds_mult_div_full, + //output wire ds2_rs_mult_div_valid, + //output wire [`REN_BUS_SZ-1:0] ds2_rs_mult_div_ren_info, + //output wire [`MULTDIV_CTL_SZ-1:0] ds2_rs_mult_div_ctl, + //output wire [`ROB_PTR_SZ-1:0] ds2_rs_mult_div_rob_ptr, + + //// Interface to LDST RS + //input wire rs_ds_ldst_full, + //output wire ds2_rs_ldst_valid, + //output wire [`REN_BUS_SZ-1:0] ds2_rs_ldst_ren_info, + //output wire [`LDST_CTL_SZ-1:0] ds2_rs_ldst_ctl, + //output wire [`ROB_PTR_SZ-1:0] ds2_rs_ldst_rob_ptr + ); + + // Internal wires/regs + // DS1 stage signals + wire [`REG_IDX_SZ-1:0] ds1_src1_idx, ds1_src2_idx, ds1_dest_idx; + wire ds1_dest_wr; + wire ds1_type_br; + + // DS2 stage signals + wire ds2_valid; + wire [`ADDR_SZ-1:0] ds2_fpc; + wire ds2_in_dly_slot; + wire [`DEC_BUS_SZ-1:0] ds2_dec_bus; + wire [`BP_SZ-1:0] ds2_bprd_info; + wire [`TAG_SZ-1:0] ds2_src1_tag, ds2_src2_tag; + wire ds2_src1_valid, ds2_src2_valid; + wire [`TAG_SZ-1:0] ds2_dest_tag; + wire [`TAG_SZ-1:0] ds2_dest_tag_old; + wire [`REN_BUS_SZ-1:0] ds2_ren_info; + wire [`FL_PTR_SZ-1:0] ds2_fl_head_ptr; + + // Handle stalling pipe for full ROB/RS + // Since the stall has to propagate back to IF/ID stages, we may need this + // to be an early signal (from a flop ideally). In this case we may need ID stage + // signals to determine the stall. + assign ds_stall = map_table_init | ds2_valid & ( + rob_ds_full | + (rs_ds_alu_full & ds2_dec_bus[`DEC_TYPE_ALU]) | + (rs_ds_mult_div_full & ds2_dec_bus[`DEC_TYPE_MULTDIV]) | + (rs_ds_ldst_full & ds2_dec_bus[`DEC_TYPE_LDST])); + + + // Instantiate Map table for register renaming + // Note: for MULT/DIV we will use both rename ports for the single instruction + // because they write to both HI and LO. + assign ds1_src1_idx = id_ds1_dec_bus[`DEC_REG_S_IDX]; + assign ds1_src2_idx = id_ds1_dec_bus[`DEC_REG_T_IDX]; + assign ds1_dest_idx = id_ds1_dec_bus[`DEC_REG_D_IDX]; + assign ds1_dest_wr = id_ds1_valid & id_ds1_dec_bus[`DEC_REG_D_WR] & !ds_stall; + assign ds1_type_br = id_ds1_valid & id_ds1_dec_bus[`DEC_TYPE_BR] & !ds_stall; + + map_table mt0 ( + .clk(clk), + .rst(rst), + .map_table_init(map_table_init), + + .ds1_valid(id_ds1_valid), + .ds1_src1_idx(ds1_src1_idx), + .ds1_src2_idx(ds1_src2_idx), + .ds1_dest_idx(ds1_dest_idx), + .ds1_dest_wr(ds1_dest_wr), + .ds1_type_br(ds1_type_br), + .ds2_src1_tag(ds2_src1_tag), + .ds2_src2_tag(ds2_src2_tag), + .ds2_src1_valid(ds_src1_valid), + .ds2_src2_valid(ds_src2_valid), + .ds2_dest_tag(ds2_dest_tag), + .ds2_dest_tag_old(ds2_dest_tag_old), + .ds2_fl_head_ptr(ds2_fl_head_ptr), + .ds2_chkpt_ptr(ds2_rob_chkpt_ptr), + + //.ex_cdb_bus(ex_cdb_bus), + .rob_pipe_flush(rob_pipe_flush), + .rob_ds_ret_valid(rob_ds_ret_valid), + .rob_ds_ret_dest_write(rob_ds_ret_dest_write), + .rob_ds_chkpt_ptr(rob_ds_chkpt_ptr), + .rob_ds_fl_head_ptr(rob_ds_fl_head_ptr), + .rob_ds_ret_chkpt_free(rob_ds_ret_chkpt_free), + .rob_ds_ret_idx(rob_ds_ret_idx), + .rob_ds_ret_tag(rob_ds_ret_tag), + .rob_ds_ret_tag_old(rob_ds_ret_tag_old) + ); + + // Flop info into DS2 stage + MDFFLR #(1) ds2_valid_ff (clk, rst, !ds_stall, 1'b0, id_ds1_valid, ds2_valid); + MDFFL #(`ADDR_SZ) ds2_fpc_ff (clk, id_ds1_valid, id_ds1_fpc, ds2_fpc); + MDFFLR #(1) ds2_in_dly_slot_ff (clk, rst, id_ds1_valid, 1'b0, id_ds1_in_dly_slot, ds2_in_dly_slot); + MDFFL #(`DEC_BUS_SZ) ds2_dec_bus_ff (clk, id_ds1_valid, id_ds1_dec_bus, ds2_dec_bus); + MDFFL #(`BP_SZ) ds2_bprd_info_ff (clk, id_ds1_valid, id_ds1_bprd_info, ds2_bprd_info); + + + // Construct dispatch packets to the different Reservation Stations + assign ds2_ren_info = { ds2_dec_bus[`DEC_REG_D_IDX], // DEST_IDX + ds2_dec_bus[`DEC_REG_D_WR], // DEST_VLD + ds2_dest_tag_old, // DEST_TAG_OLD + ds2_dest_tag, // DEST_TAG + ds2_src2_valid, // SRC2_VLD + ds2_dec_bus[`DEC_REG_T_NEED], // SRC2_NEED + ds2_src2_tag, // SRC2_TAG + ds2_src1_valid, // SRC1_VLD + ds2_dec_bus[`DEC_REG_S_NEED], // SRC1_NEED + ds2_src1_tag // SRC1_TAG + }; + + // Handle outputs to ROB + assign ds_rob_valid = ds2_valid; + assign ds_rob_fpc = ds2_fpc; + assign ds_rob_in_dly_slot = ds2_in_dly_slot; + assign ds_rob_dec_bus = ds2_dec_bus; + assign ds_rob_ren_info = ds2_ren_info; + assign ds_rob_bprd_info = ds2_bprd_info; + assign ds_rob_fl_head_ptr = ds2_fl_head_ptr; + + // Handle outputs to ALU RS + assign ds2_rs_alu_valid = ds2_valid & ds2_dec_bus[`DEC_TYPE_ALU] & !ds_stall; + assign ds2_rs_alu_fpc = ds2_fpc; + assign ds2_rs_alu_ren_info = ds2_ren_info; + assign ds2_rs_alu_rob_ptr = rob_ds_tail_ptr; + +endmodule
trunk/rtl/ds_stage.v Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/rtl/sources.list =================================================================== --- trunk/rtl/sources.list (nonexistent) +++ trunk/rtl/sources.list (revision 2) @@ -0,0 +1,19 @@ +rtl/ooops_defs.v +rtl/ooops_lib.v +rtl/core.v +rtl/sp_sram.v +rtl/if_stage.v +rtl/if_buffer.v +rtl/icache_top.v +rtl/icache_ctl.v +rtl/id_stage.v +rtl/ds_stage.v +rtl/map_table.v +rtl/free_list.v +rtl/rs_top.v +rtl/rs.v +rtl/rs_entry.v +rtl/regfile.v +rtl/psel.v +rtl/dp_sram.v +rtl/wb_master.v Index: trunk/tb/test_map_table.v =================================================================== --- trunk/tb/test_map_table.v (nonexistent) +++ trunk/tb/test_map_table.v (revision 2) @@ -0,0 +1,289 @@ +////////////////////////////////////////////////////////////////// +// // +// OoOPs Core Register Map Table testbench // +// // +// This file is part of the OoOPs project // +// http://www.opencores.org/project,oops // +// // +// Description: // +// Small, self-contained testbench for basic functionality of // +// the Map Table. // +// // +// Author(s): // +// - Joshua Smith, smjoshua@umich.edu // +// // +////////////////////////////////////////////////////////////////// +// // +// Copyright (C) 2012 Authors and OPENCORES.ORG // +// // +// This source file may be used and distributed without // +// restriction provided that this copyright statement is not // +// removed from the file and that any derivative work contains // +// the original copyright notice and the associated disclaimer. // +// // +// This source file is free software; you can redistribute it // +// and/or modify it under the terms of the GNU Lesser General // +// Public License as published by the Free Software Foundation; // +// either version 2.1 of the License, or (at your option) any // +// later version. // +// // +// This source is distributed in the hope that it will be // +// useful, but WITHOUT ANY WARRANTY; without even the implied // +// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // +// PURPOSE. See the GNU Lesser General Public License for more // +// details. // +// // +// You should have received a copy of the GNU Lesser General // +// Public License along with this source; if not, download it // +// from http://www.opencores.org/lgpl.shtml // +// // +////////////////////////////////////////////////////////////////// +`include "ooops_defs.v" + +module test_map_table; + + // I/O to Map Table DUT + reg clk; + reg rst; + reg ds1_valid; + reg [`REG_IDX_SZ-1:0] ds1_src1_idx; + reg [`REG_IDX_SZ-1:0] ds1_src2_idx; + reg [`REG_IDX_SZ-1:0] ds1_dest_idx; + reg ds1_dest_wr; + reg ds1_type_br; + reg rob_pipe_flush; + reg rob_ds_ret_valid; + reg rob_ds_ret_dest_write; + reg [`CHKPT_PTR_SZ-1:0] rob_ds_chkpt_ptr; + reg [`FL_PTR_SZ-1:0] rob_ds_fl_head_ptr; + reg rob_ds_ret_chkpt_free; + reg [`REG_IDX_SZ-1:0] rob_ds_ret_idx; + reg [`TAG_SZ-1:0] rob_ds_ret_tag; + reg [`TAG_SZ-1:0] rob_ds_ret_tag_old; + + wire map_table_init; + wire [`TAG_SZ-1:0] ds2_src1_tag; + wire [`TAG_SZ-1:0] ds2_src2_tag; + wire ds2_src1_valid; + wire ds2_src2_valid; + wire [`TAG_SZ-1:0] ds2_dest_tag; + wire [`TAG_SZ-1:0] ds2_dest_tag_old; + wire [`FL_PTR_SZ-1:0] ds2_fl_head_ptr; + wire [`CHKPT_PTR_SZ-1:0] ds2_chkpt_ptr; + + + // Instantiate DUT + map_table m0 ( + .clk(clk), + .rst(rst), + .map_table_init(map_table_init), + + .ds1_valid(ds1_valid), + .ds1_src1_idx(ds1_src1_idx), + .ds1_src2_idx(ds1_src2_idx), + .ds1_dest_idx(ds1_dest_idx), + .ds1_dest_wr(ds1_dest_wr), + .ds1_type_br(ds1_type_br), + .ds2_src1_tag(ds2_src1_tag), + .ds2_src2_tag(ds2_src2_tag), + .ds2_src1_valid(ds2_src1_valid), + .ds2_src2_valid(ds2_src2_valid), + .ds2_dest_tag(ds2_dest_tag), + .ds2_dest_tag_old(ds2_dest_tag_old), + .ds2_fl_head_ptr(ds2_fl_head_ptr), + .ds2_chkpt_ptr(ds2_chkpt_ptr), + + .rob_pipe_flush(rob_pipe_flush), + .rob_ds_ret_valid(rob_ds_ret_valid), + .rob_ds_ret_dest_write(rob_ds_ret_dest_write), + .rob_ds_chkpt_ptr(rob_ds_chkpt_ptr), + .rob_ds_fl_head_ptr(rob_ds_fl_head_ptr), + .rob_ds_ret_chkpt_free(rob_ds_ret_chkpt_free), + .rob_ds_ret_idx(rob_ds_ret_idx), + .rob_ds_ret_tag(rob_ds_ret_tag), + .rob_ds_ret_tag_old(rob_ds_ret_tag_old) + ); + + + // generate clk + always begin + #5; + clk = ~clk; + end + + initial begin + // Initialize clk and inputs + clk = 1'b0; + rst = 1'b1; + + ds1_valid = 0; + ds1_src1_idx = 0; + ds1_src2_idx = 0; + ds1_dest_idx = 0; + ds1_dest_wr = 1'b0; + ds1_type_br = 1'b0; + rob_pipe_flush = 1'b0; + rob_ds_ret_valid = 1'b0; + rob_ds_ret_dest_write = 1'b0; + rob_ds_chkpt_ptr = 0; + rob_ds_fl_head_ptr = 0; + rob_ds_ret_chkpt_free = 0; + rob_ds_ret_idx = 0; + rob_ds_ret_tag = 0; + rob_ds_ret_tag_old = 0; + + // Set up waveform dump + `ifdef WAVE_DUMP + $dumpfile("wave.vcd"); + $dumpvars(0,test_map_table); + `endif + + // Assert reset for a couple clks + $display("Asserting reset..."); + repeat (3) @(negedge clk); + rst = 1'b0; + $display("Reset done."); + + // Wait for initialization to be done + while (map_table_init) + @(negedge clk); + + // Rename one instruction + set_rename_inputs(1, 2, 3, 1'b1, 1'b0); // Read r1, r2; write r3; not branch + @(negedge clk); + clear_rename_inputs; + + // Check output src and dest tags + if ((ds2_src1_tag != 'd1) || (ds2_src2_tag != 'd2) || (ds2_dest_tag != 'd34)) + fail('d1); + + // Rename a second dependent instruction + set_rename_inputs(1, 3, 4, 1'b1, 1'b0); // Read r1, r3; write r4; not branch + @(negedge clk); + clear_rename_inputs; + + // Check output src and dest tags + if ((ds2_src1_tag != 'd1) || (ds2_src2_tag != 'd34) || (ds2_dest_tag != 'd35)) + fail('d2); + + + // Rename a branch which does not write a register to allocate new checkpoint + set_rename_inputs(3, 4, 4, 1'b0, 1'b1); // Read r3, r4; no write; is branch + @(negedge clk); + clear_rename_inputs; + + // Check output src tags and checkpoint ptr + if ((ds2_src1_tag != 'd34) || (ds2_src2_tag != 'd35) || (ds2_dest_tag != 'd36) || + (ds2_chkpt_ptr != 'd0)) + fail('d3); + + + // Rename two more instructions to overwrite r3 and r4, then recover from checkpoint + set_rename_inputs(1, 2, 3, 1'b1, 1'b0); // Read r1, r2; write r3; not branch + @(negedge clk); + // Check tag and chkpt_ptr outputs + if ((ds2_src1_tag != 'd1) || (ds2_src2_tag != 'd2) || (ds2_dest_tag != 'd36) || (ds2_dest_tag_old != 'd34) || (ds2_chkpt_ptr != 'd1)) + fail('d4); + + set_rename_inputs(1, 3, 4, 1'b1, 1'b0); // Read r1, r3; write r4; not branch + @(negedge clk); + clear_rename_inputs; + // Check tag and chkpt_ptr outputs + if ((ds2_src1_tag != 'd1) || (ds2_src2_tag != 'd36) || (ds2_dest_tag_old != 'd35) || (ds2_chkpt_ptr != 'd1)) + fail('d5); + + // Retire in-flight instructions, then recover checkpoint from branch misprediction + set_retire_inputs(1'b0, 1'b1, 1'b0, 0, 'd35, 'd3, 'd34, 'd3); + @(negedge clk); + set_retire_inputs(1'b0, 1'b1, 1'b0, 0, 'd36, 'd4, 'd35, 'd4); + @(negedge clk); + set_retire_inputs(1'b1, 1'b0, 1'b0, 0, 'd37, 'd4, 'd36, 'd35); // Branch flush, don't free checkpoint + @(negedge clk); + clear_retire_inputs; + + // Now rename instruction that reads r3 and r4 + set_rename_inputs(3, 4, 4, 1'b1, 1'b0); // Read r3, r4; write r4; not branch + @(negedge clk); + clear_rename_inputs; + // Check output tags and chkpt_ptr + if ((ds2_src1_tag != 'd34) || (ds2_src2_tag != 'd35) || (ds2_dest_tag != 'd37) || (ds2_dest_tag_old != 'd35) || (ds2_chkpt_ptr != 'd0)) + fail('d6); + + + + // Let clock run for a few cycles before finishing + repeat (5) @(negedge clk); + $display("Finished!"); + $finish; + end + + + // Task to easily set all rename inputs + task set_rename_inputs; + input [`REG_IDX_SZ-1:0] src1_idx, src2_idx, dest_idx; + input dest_wr; + input type_br; + + begin + ds1_valid = 1'b1; + ds1_src1_idx = src1_idx; + ds1_src2_idx = src2_idx; + ds1_dest_idx = dest_idx; + ds1_dest_wr = dest_wr; + ds1_type_br = type_br; + end + endtask + + task clear_rename_inputs; + begin + ds1_valid = 1'b0; + ds1_src1_idx = 0; + ds1_src2_idx = 0; + ds1_dest_wr = 1'b0; + ds1_type_br = 1'b0; + end + endtask + + task set_retire_inputs; + input pipe_flush, dest_write, chkpt_free; + input [`CHKPT_PTR_SZ-1:0] chkpt_ptr; + input [`FL_PTR_SZ-1:0] fl_head_ptr; + input [`REG_IDX_SZ-1:0] dest_idx; + input [`TAG_SZ-1:0] dest_tag, dest_tag_old; + + begin + rob_ds_ret_valid = 1'b1; + rob_pipe_flush = pipe_flush; + rob_ds_ret_dest_write = dest_write; + rob_ds_ret_chkpt_free = chkpt_free; + rob_ds_chkpt_ptr = chkpt_ptr; + rob_ds_fl_head_ptr = fl_head_ptr; + rob_ds_ret_idx = dest_idx; + rob_ds_ret_tag = dest_tag; + rob_ds_ret_tag_old = dest_tag_old; + end + endtask + + task clear_retire_inputs; + begin + rob_ds_ret_valid = 1'b0; + rob_pipe_flush = 1'b0; + rob_ds_ret_dest_write = 1'b0; + rob_ds_ret_chkpt_free = 1'b0; + rob_ds_chkpt_ptr = 0; + rob_ds_fl_head_ptr = 0; + rob_ds_ret_idx = 0; + rob_ds_ret_tag = 0; + rob_ds_ret_tag_old = 0; + end + endtask + + task fail; + input integer test_num; + begin + $display("ERROR: Failed on test %0d at time %0d", test_num, $time); + repeat(3) @(negedge clk); + $finish; + end + endtask +endmodule Index: trunk/Makefile =================================================================== --- trunk/Makefile (nonexistent) +++ trunk/Makefile (revision 2) @@ -0,0 +1,28 @@ +# Some useful constants +SRC_LIST = ./rtl/sources.list +TB = tb/test_map_table.v + +# Simulator used for testing is Icarus Verilog +# To dump waves, add -DWAVE_DUMP to ICARUS_OPTS +INCLUDE_CMD = -I ./rtl +ICARUS_OPTS = -DWAVE_DUMP +ICARUS_CMD = iverilog + +MAP_TABLE_SRC = rtl/map_table.v \ + rtl/free_list.v \ + rtl/dp_sram.v \ + rtl/ooops_defs.v \ + rtl/ooops_lib.v +MAP_TABLE_TB = tb/test_map_table.v + +all: sim + +# Main command to compile simulation model +sim: + $(ICARUS_CMD) $(ICARUS_OPTS) $(INCLUDE_CMD) -f $(SRC_LIST) $(TB) -o sim.exe + +map_table: $(MAP_TABLE_SRC) $(MAP_TABLE_TB) + $(ICARUS_CMD) $(ICARUS_OPTS) $(INCLUDE_CMD) $(MAP_TABLE_SRC) $(MAP_TABLE_TB) -o sim.exe + +clean: + rm ./*.exe

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.