URL
https://opencores.org/ocsvn/oops/oops/trunk
Subversion Repositories oops
Compare Revisions
- This comparison shows the changes necessary to convert path
/oops
- from Rev 1 to Rev 2
- ↔ Reverse comparison
Rev 1 → Rev 2
/trunk/rtl/free_list.v
0,0 → 1,128
////////////////////////////////////////////////////////////////// |
// // |
// OoOPs Core Register Free List module // |
// // |
// This file is part of the OoOPs project // |
// http://www.opencores.org/project,oops // |
// // |
// Description: // |
// The free list is a circular FIFO used to keep track of free // |
// physical registers that can be allocated to new instructions// |
// New tags are allocated from the head and freed tags are // |
// written to the tail of the FIFO. // |
// The head pointer+1 is passed along with branches so that // |
// The FIFO state can be recovered after a misprediction. // |
// // |
// Note: MULT/DIV instructions will require two tags since // |
// they update both HI and LO registers. // |
// // |
// Author(s): // |
// - Joshua Smith, smjoshua@umich.edu // |
// // |
////////////////////////////////////////////////////////////////// |
// // |
// Copyright (C) 2012 Authors and OPENCORES.ORG // |
// // |
// This source file may be used and distributed without // |
// restriction provided that this copyright statement is not // |
// removed from the file and that any derivative work contains // |
// the original copyright notice and the associated disclaimer. // |
// // |
// This source file is free software; you can redistribute it // |
// and/or modify it under the terms of the GNU Lesser General // |
// Public License as published by the Free Software Foundation; // |
// either version 2.1 of the License, or (at your option) any // |
// later version. // |
// // |
// This source is distributed in the hope that it will be // |
// useful, but WITHOUT ANY WARRANTY; without even the implied // |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // |
// PURPOSE. See the GNU Lesser General Public License for more // |
// details. // |
// // |
// You should have received a copy of the GNU Lesser General // |
// Public License along with this source; if not, download it // |
// from http://www.opencores.org/lgpl.shtml // |
// // |
////////////////////////////////////////////////////////////////// |
`include "ooops_defs.v" |
|
// TODO: consider making this a bit-vector-based free list to save on |
// flop usage. With tag FIFO, we have (`ARCH_REGS+`ROB_SZ)*`TAG_SZ flops |
// just for the storage. |
module free_list ( |
input wire clk, |
input wire rst, |
input wire ds1_dest_wr, |
|
input wire rob_pipe_flush, |
input wire [`FL_PTR_SZ-1:0] rob_ds_fl_head_ptr, |
input wire rob_ds_ret_valid, |
input wire rob_ds_ret_dest_write, |
input wire [`TAG_SZ-1:0] rob_ds_ret_tag_old, |
|
output wire [`TAG_SZ-1:0] ds1_dest_tag, |
output wire [`FL_PTR_SZ-1:0] ds1_fl_head_ptr |
); |
|
// Internal wires/regs |
wire [`TAG_SZ-1:0] tag_list [`FL_SZ-1:0]; |
wire [`TAG_SZ-1:0] tag_list_in [`FL_SZ-1:0]; |
wire [`FL_SZ-1:0] tag_list_ld; |
wire [`FL_PTR_SZ-1:0] head_ptr; |
wire [`FL_PTR_SZ-1:0] head_ptr_p1; |
wire [`FL_PTR_SZ-1:0] head_ptr_p2; |
wire [`FL_PTR_SZ-1:0] head_ptr_in; |
wire [`FL_PTR_SZ-1:0] tail_ptr; |
wire [`FL_PTR_SZ-1:0] tail_ptr_p1; |
wire [`FL_PTR_SZ-1:0] tail_ptr_in; |
wire pop; |
wire push; |
wire [`FL_PTR_SZ-1:0] rob_ds_fl_head_ptr_p1; |
wire [`TAG_SZ-1:0] ds1_dest_tag_in; |
wire [`FL_PTR_SZ-1:0] ds1_fl_head_ptr_in; |
integer i; |
|
// Handle output generation |
// For timing, make dest_tag and fl_head_ptr available from a flop. |
// TODO: verify corner cases such as free list becomes empty (so next head_ptr is tail_ptr) and instruction retiring. |
assign ds1_dest_tag_in = rob_pipe_flush ? tag_list[rob_ds_fl_head_ptr] : |
//(head_ptr_p1 == tail_ptr) & push ? rob_ds_ret_tag_old : |
tag_list[head_ptr_p1]; // ds1_dest_wr case |
|
assign rob_ds_fl_head_ptr_p1 = (rob_ds_fl_head_ptr == `FL_SZ-1) ? {`FL_PTR_SZ{1'b0}} : rob_ds_fl_head_ptr + `FL_PTR_SZ'h1; |
assign ds1_fl_head_ptr_in = rob_pipe_flush ? rob_ds_fl_head_ptr_p1 : head_ptr_p2; |
|
MDFFLR #(`TAG_SZ) ds1_dest_tag_ff (clk, rst, pop | rob_pipe_flush, `ARCH_REGS, ds1_dest_tag_in, ds1_dest_tag); |
MDFFLR #(`FL_PTR_SZ) ds1_fl_head_ptr_ff (clk, rst, pop | rob_pipe_flush, `ARCH_REGS+1, ds1_fl_head_ptr_in, ds1_fl_head_ptr); |
|
|
// Handle updating head/tail pointers |
assign pop = ds1_dest_wr; |
assign push = rob_ds_ret_valid & rob_ds_ret_dest_write; |
assign head_ptr_p1 = (head_ptr == `FL_SZ-1) ? {`FL_PTR_SZ{1'b0}} : head_ptr + `FL_PTR_SZ'h1; |
assign head_ptr_p2 = (head_ptr == `FL_SZ-2) ? {`FL_PTR_SZ{1'b0}} : head_ptr + `FL_PTR_SZ'h2; |
assign tail_ptr_p1 = (tail_ptr == `FL_SZ-1) ? {`FL_PTR_SZ{1'b0}} : tail_ptr + `FL_PTR_SZ'h1; |
assign head_ptr_in = (rob_pipe_flush) ? rob_ds_fl_head_ptr : head_ptr_p1; |
assign tail_ptr_in = tail_ptr_p1; |
|
// Initialize head pointer to NUM_ARCH_REGS because architected registers will |
// be allocated out of reset. |
MDFFLR #(`FL_PTR_SZ) head_ptr_ff (clk, rst, pop | rob_pipe_flush, `ARCH_REGS, head_ptr_in, head_ptr); |
MDFFLR #(`FL_PTR_SZ) tail_ptr_ff (clk, rst, push, {`FL_PTR_SZ{1'b0}}, tail_ptr_in, tail_ptr); |
|
// Handle updating list |
// Reset list so that physical registers beyond 33 are initialized into free list |
assign tag_list_ld = (push << tail_ptr); |
genvar g; |
generate |
for (g=0; g<`FL_SZ; g=g+1) begin: fl_gen |
assign tag_list_in[g] = rob_ds_ret_tag_old; |
if (g < `ARCH_REGS) |
MDFFLR #(`TAG_SZ) tag_list_ff (clk, rst, tag_list_ld[g], `TAG_SZ'h0, tag_list_in[g], tag_list[g]); |
else |
MDFFLR #(`TAG_SZ) tag_list_ff (clk, rst, tag_list_ld[g], g[`TAG_SZ-1:0], tag_list_in[g], tag_list[g]); |
end |
endgenerate |
|
endmodule |
trunk/rtl/free_list.v
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: trunk/rtl/icache_ctl.v
===================================================================
--- trunk/rtl/icache_ctl.v (nonexistent)
+++ trunk/rtl/icache_ctl.v (revision 2)
@@ -0,0 +1,220 @@
+//////////////////////////////////////////////////////////////////
+// //
+// OoOPs Core Instruction Cache Control module //
+// //
+// This file is part of the OoOPs project //
+// http://www.opencores.org/project,oops //
+// //
+// Description: //
+// Controller for Instruction Cache. Block takes requests from//
+// the IF stage, handles the inputs to the cache RAMs, detects //
+// cache hits, and generates bus requests if the cache misses. //
+// The controller is only capable of handling one outstanding //
+// miss and does no prefetching. //
+// //
+// Author(s): //
+// - Joshua Smith, smjoshua@umich.edu //
+// //
+//////////////////////////////////////////////////////////////////
+// //
+// Copyright (C) 2012 Authors and OPENCORES.ORG //
+// //
+// This source file may be used and distributed without //
+// restriction provided that this copyright statement is not //
+// removed from the file and that any derivative work contains //
+// the original copyright notice and the associated disclaimer. //
+// //
+// This source file is free software; you can redistribute it //
+// and/or modify it under the terms of the GNU Lesser General //
+// Public License as published by the Free Software Foundation; //
+// either version 2.1 of the License, or (at your option) any //
+// later version. //
+// //
+// This source is distributed in the hope that it will be //
+// useful, but WITHOUT ANY WARRANTY; without even the implied //
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
+// PURPOSE. See the GNU Lesser General Public License for more //
+// details. //
+// //
+// You should have received a copy of the GNU Lesser General //
+// Public License along with this source; if not, download it //
+// from http://www.opencores.org/lgpl.shtml //
+// //
+//////////////////////////////////////////////////////////////////
+`include "ooops_defs.v"
+
+module icache_ctl (
+ input wire clk,
+ input wire rst,
+ input wire rob_pipe_flush,
+
+ // Coprocessor interface (for IC enable)
+ input wire cp0_ic_enable,
+
+ // IF interface
+ input wire if_ic_req,
+ input wire [`ADDR_SZ-1:0] if_ic_fpc,
+ input wire [`ADDR_SZ-1:0] r_if_ic_fpc,
+ output wire [`INSTR_SZ-1:0] ic_if_data,
+ output wire ic_if_data_valid,
+ output wire ic_if_ready,
+
+ // Interface to cache memories
+ input wire [`IC_TAGRAM_SZ-1:0] ic_tagram_data,
+ input wire [`IC_LINE_SZ-1:0] ic_dataram_data,
+ output wire [`IC_LINE_SZ-1:0] ic_dataram_wr_data,
+ output wire [`IC_SI_SZ-1:0] ic_dataram_addr,
+ output wire ic_dataram_wren,
+ output wire [`IC_TAGRAM_SZ-1:0] ic_tagram_wr_data,
+ output wire [`IC_SI_SZ-1:0] ic_tagram_addr,
+ output wire ic_tagram_wren,
+
+ // Memory interface
+ output wire ic2bus_req,
+ output wire [`ADDR_SZ-1:0] ic2bus_fpc,
+ input wire bus2ic_valid,
+ input wire [`SYS_BUS_SZ-1:0] bus2ic_data
+ );
+
+ parameter IC_STATE_IDLE = 3'h0,
+ IC_STATE_REQ = 3'h1,
+ IC_STATE_WAIT = 3'h2,
+ IC_STATE_WR_RAM = 3'h3,
+ IC_STATE_INIT = 3'h4;
+
+ // Internal wires/regs
+ wire ic_cache_hit;
+ wire ic_tag_match;
+ wire ic_tag_valid;
+ wire [`IC_TAG_SZ-1:0] ic_tag;
+ wire r_if_ic_req;
+
+ wire [2:0] ic_state;
+ reg [2:0] ic_nstate;
+ wire [`IC_SI] ic_init_ctr;
+ wire [`IC_SI] ic_init_nctr;
+ wire ic_do_init;
+ wire ic_init_done;
+ wire ic_initialized;
+ wire ic_initialized_ld;
+
+ // Latch req signal so we can correctly assert "hit" and also request to memory
+ // on a cache miss.
+ MDFFR #(1) r_if_ic_req_ff (clk, rst, 1'b0, if_ic_req, r_if_ic_req);
+
+ // If we get a cache miss and then a flush happens, need to make sure that instruction
+ // coming back isn't sent down the pipe.
+ wire rob_pipe_flush_seen, rob_pipe_flush_seen_in;
+ assign rob_pipe_flush_seen_in = rob_pipe_flush_seen ? ~bus2ic_valid : rob_pipe_flush & (ic_state != IC_STATE_IDLE);
+ MDFFR #(1) rob_pipe_flush_seen_ff (clk, rst, 1'b0, rob_pipe_flush_seen_in, rob_pipe_flush_seen);
+
+ // Handle interface to Data and Tag SRAMs
+ `ifdef USE_IC
+ assign ic_do_init = (ic_state == IC_STATE_INIT);
+ assign ic_dataram_wren = ic_do_init || (bus2ic_valid & !rob_pipe_flush_seen);
+ assign ic_dataram_wr_data = ic_do_init ? {`IC_LINE_SZ{1'b0}} : bus2ic_data;
+ assign ic_dataram_addr = (ic_state == IC_STATE_INIT) ? ic_init_ctr[`IC_SI] :
+ (ic_state == IC_STATE_WAIT) ? ic2bus_fpc[`IC_SI] :
+ if_ic_fpc[`IC_SI];
+
+ assign ic_tagram_wren = ic_do_init || (bus2ic_valid & !rob_pipe_flush_seen);
+ assign ic_tagram_wr_data = ic_do_init ? {`IC_TAGRAM_SZ{1'b0}} : {1'b1, 1'b0, ic2bus_fpc[`IC_TAG]};
+ assign ic_tagram_addr = (ic_state == IC_STATE_INIT) ? ic_init_ctr[`IC_SI] :
+ (ic_state == IC_STATE_WAIT) ? ic2bus_fpc[`IC_SI] :
+ if_ic_fpc[`IC_SI];
+ `else
+ // If not including Icache, then just zero cache inputs out
+ assign ic_do_init = 1'b0;
+ assign ic_dataram_wren = 1'b0;
+ assign ic_dataram_wr_data = {`IC_LINE_SZ{1'b0}};
+ assign ic_dataram_addr = {`IC_SI_SZ{1'b0}};
+
+ assign ic_tagram_wren = 1'b0;
+ assign ic_tagram_wr_data = {`IC_LINE_SZ{1'b0}};
+ assign ic_tagram_addr = {`IC_SI_SZ{1'b0}};
+ `endif
+
+ // Handle tag comparison and IF interface
+ // Note: ic_if_ready just means we've initialized SRAMs. This will block
+ // IF requests and stall the pipeline on startup.
+ `ifdef USE_IC
+ assign ic_tag = ic_tagram_data[`IC_TAGRAM_TAG];
+ assign ic_tag_valid = ic_tagram_data[`IC_TAGRAM_VLD];
+ assign ic_tag_match = (ic_tag == r_if_ic_fpc[`IC_TAG]);
+ assign ic_cache_hit = ic_tag_match & (r_if_ic_req & ic_tag_valid & cp0_ic_enable & ic_initialized & !rob_pipe_flush_seen);
+ assign ic_if_ready = ic_initialized & (ic_nstate == IC_STATE_IDLE);
+ assign ic_if_data = r_if_ic_fpc[2] ? ic_dataram_data[63:32] : ic_dataram_data[31:0];
+ assign ic_if_data_valid = ic_cache_hit;
+ `else
+ // If not including Icache, then need to force everything as a cache miss and only return bus2ic_data
+ assign ic_tag = {`IC_TAG_SZ{1'b0}};
+ assign ic_tag_valid = 1'b0;
+ assign ic_tag_match = 1'b0;
+ assign ic_cache_hit = bus2ic_valid & !rob_pipe_flush_seen;
+ assign ic_if_ready = 1'b1; // No need to initialize cache
+ assign ic_if_data = r_if_ic_fpc[2] ? bus2ic_data[63:32] : bus2ic_data[31:0];
+ assign ic_if_data_valid = ic_cache_hit;
+
+ `endif
+
+
+ // Icache state machine
+ always @*
+ case (ic_state)
+ // From the IDLE state
+ // + move to req if we detect a miss
+ IC_STATE_IDLE: begin
+ if (r_if_ic_req & !ic_cache_hit)
+ ic_nstate = IC_STATE_REQ;
+ else
+ ic_nstate = IC_STATE_IDLE;
+ end
+
+ // In the REQ state we send the request to memory for the needed data.
+ // Then transition to WAIT state to wait for memory response
+ // TODO: Need to stall here if arbiter doesn't accept our request
+ IC_STATE_REQ: begin
+ ic_nstate = IC_STATE_WAIT;
+ end
+
+ // In the WAIT state we wait for memory response, then transition to
+ // WR_RAM state to write the data.
+ IC_STATE_WAIT: begin
+ if (bus2ic_valid) ic_nstate = IC_STATE_IDLE;
+ else ic_nstate = IC_STATE_WAIT;
+ end
+
+ // From the INIT state, we initialize each line of the cache to invalid
+ // and transition to IDLE after writing each line.
+ IC_STATE_INIT: begin
+ if (ic_init_ctr == `IC_NUM_LINES-1)
+ ic_nstate = IC_STATE_IDLE;
+ else
+ ic_nstate = IC_STATE_INIT;
+ end
+
+ default: ic_nstate = IC_STATE_IDLE;
+ endcase
+
+ // For the initialization, just loop through every set in cache and write it as invalid. When done
+ // set ic_initialized.
+ `ifdef USE_IC
+ assign ic_init_done = (ic_state == IC_STATE_INIT) & (ic_init_ctr == `IC_NUM_LINES-1);
+ assign ic_initialized_ld = ic_init_done;
+ assign ic_init_nctr = ic_init_ctr + `IC_SI_SZ'h1;
+ `else
+ assign ic_init_done = 1'b1;
+ assign ic_initialized_ld = 1'b0;
+ assign ic_init_nctr = {`IC_SI_SZ{1'b0}};
+ `endif
+
+ MDFFR #(3) ic_state_ff (clk, rst, IC_STATE_INIT, ic_nstate, ic_state);
+ MDFFLR #(`IC_SI_SZ) ic_init_ctr_ff (clk, rst, (ic_state == IC_STATE_INIT), `IC_SI_SZ'h0, ic_init_nctr, ic_init_ctr);
+ MDFFLR #(1) ic_initialized_ff (clk, rst, ic_initialized_ld, 1'b0, 1'b1, ic_initialized);
+
+ // Handle memory request outputs
+ // Note that we request from the bus the cycle after detecting a hit, so if_ic_fpc should be corrected from miss already.
+ assign ic2bus_fpc = if_ic_fpc;
+ assign ic2bus_req = (ic_state == IC_STATE_REQ);
+
+endmodule
trunk/rtl/icache_ctl.v
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: trunk/rtl/icache_top.v
===================================================================
--- trunk/rtl/icache_top.v (nonexistent)
+++ trunk/rtl/icache_top.v (revision 2)
@@ -0,0 +1,127 @@
+//////////////////////////////////////////////////////////////////
+// //
+// OoOPs Core Instruction Cache module //
+// //
+// This file is part of the OoOPs project //
+// http://www.opencores.org/project,oops //
+// //
+// Description: //
+// Top-level module for Instruction Cache block. This includes//
+// the instantiation of the data and tag RAMs as well as the //
+// cache controller logic. //
+// //
+// Author(s): //
+// - Joshua Smith, smjoshua@umich.edu //
+// //
+//////////////////////////////////////////////////////////////////
+// //
+// Copyright (C) 2012 Authors and OPENCORES.ORG //
+// //
+// This source file may be used and distributed without //
+// restriction provided that this copyright statement is not //
+// removed from the file and that any derivative work contains //
+// the original copyright notice and the associated disclaimer. //
+// //
+// This source file is free software; you can redistribute it //
+// and/or modify it under the terms of the GNU Lesser General //
+// Public License as published by the Free Software Foundation; //
+// either version 2.1 of the License, or (at your option) any //
+// later version. //
+// //
+// This source is distributed in the hope that it will be //
+// useful, but WITHOUT ANY WARRANTY; without even the implied //
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
+// PURPOSE. See the GNU Lesser General Public License for more //
+// details. //
+// //
+// You should have received a copy of the GNU Lesser General //
+// Public License along with this source; if not, download it //
+// from http://www.opencores.org/lgpl.shtml //
+// //
+//////////////////////////////////////////////////////////////////
+`include "ooops_defs.v"
+
+module icache_top (
+ input wire clk,
+ input wire rst,
+ input wire rob_pipe_flush,
+
+ // Coprocessor interface
+ input wire cp0_ic_enable,
+
+ // IF interface
+ input wire if_ic_req,
+ input wire [`ADDR_SZ-1:0] if_ic_fpc,
+ input wire [`ADDR_SZ-1:0] r_if_ic_fpc,
+ output wire [`INSTR_SZ-1:0] ic_if_data,
+ output wire ic_if_data_valid,
+ output wire ic_if_cache_hit,
+ output wire ic_if_cache_miss,
+ output wire ic_if_ready,
+
+ // Memory interface
+ output wire ic2bus_req,
+ output wire [`ADDR_SZ-1:0] ic2bus_fpc,
+ input wire bus2ic_valid,
+ input wire [`SYS_BUS_SZ-1:0] bus2ic_data
+ );
+
+ // Internal wires
+ wire [`IC_SI_SZ-1:0] ic_dataram_addr;
+ wire [`IC_LINE_SZ-1:0] ic_dataram_data;
+ wire [`IC_LINE_SZ-1:0] ic_dataram_wr_data;
+ wire ic_dataram_wren;
+
+ wire [`IC_SI_SZ-1:0] ic_tagram_addr;
+ wire [`IC_TAGRAM_SZ-1:0] ic_tagram_data;
+ wire [`IC_TAGRAM_SZ-1:0] ic_tagram_wr_data;
+ wire ic_tagram_wren;
+
+ // Instantiate IC controller
+ icache_ctl icache_ctl0(
+ .clk(clk),
+ .rst(rst),
+ .rob_pipe_flush(rob_pipe_flush),
+ .cp0_ic_enable(cp0_ic_enable),
+ .if_ic_req(if_ic_req),
+ .if_ic_fpc(if_ic_fpc),
+ .r_if_ic_fpc(r_if_ic_fpc),
+ .ic_if_data(ic_if_data),
+ .ic_if_data_valid(ic_if_data_valid),
+ .ic_if_ready(ic_if_ready),
+ .ic_tagram_data(ic_tagram_data),
+ .ic_dataram_data(ic_dataram_data),
+ .ic_dataram_wr_data(ic_dataram_wr_data),
+ .ic_dataram_addr(ic_dataram_addr),
+ .ic_dataram_wren(ic_dataram_wren),
+ .ic_tagram_wr_data(ic_tagram_wr_data),
+ .ic_tagram_addr(ic_tagram_addr),
+ .ic_tagram_wren(ic_tagram_wren),
+ .ic2bus_req(ic2bus_req),
+ .ic2bus_fpc(ic2bus_fpc),
+ .bus2ic_valid(bus2ic_valid),
+ .bus2ic_data(bus2ic_data)
+ );
+
+ // Instantiate IC data and tag RAMs
+ `ifdef USE_IC
+ sp_sram #(.DW(`IC_LINE_SZ), .IW(`IC_SI_SZ)) d0 (
+ .clk(clk),
+ .addr(ic_dataram_addr),
+ .wren(ic_dataram_wren),
+ .din(ic_dataram_wr_data),
+ .dout(ic_dataram_data)
+ );
+ sp_sram #(.DW(`IC_TAGRAM_SZ), .IW(`IC_SI_SZ)) t0 (
+ .clk(clk),
+ .addr(ic_tagram_addr),
+ .wren(ic_tagram_wren),
+ .din(ic_tagram_wr_data),
+ .dout(ic_tagram_data)
+ );
+ `else
+ assign ic_dataram_data = {`IC_LINE_SZ{1'b0}};
+ assign ic_tagram_data = {`IC_TAGRAM_SZ{1'b0}};
+ `endif
+
+endmodule
trunk/rtl/icache_top.v
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: trunk/rtl/id_stage.v
===================================================================
--- trunk/rtl/id_stage.v (nonexistent)
+++ trunk/rtl/id_stage.v (revision 2)
@@ -0,0 +1,190 @@
+//////////////////////////////////////////////////////////////////
+// //
+// OoOPs Core Instruction Decode module //
+// //
+// This file is part of the OoOPs project //
+// http://www.opencores.org/project,oops //
+// //
+// Description: //
+// Handles basic decoding of instruction type and register //
+// sources and destinations for Dispatch stages. //
+// We could do full instruction decoding in this stage, but //
+// to save on pipeline flops we will only decode what is needed//
+// for dispatch. We can use the issue stage to do necessary //
+// decoding for each functional unit. //
+// //
+// Author(s): //
+// - Joshua Smith, smjoshua@umich.edu //
+// //
+//////////////////////////////////////////////////////////////////
+// //
+// Copyright (C) 2012 Authors and OPENCORES.ORG //
+// //
+// This source file may be used and distributed without //
+// restriction provided that this copyright statement is not //
+// removed from the file and that any derivative work contains //
+// the original copyright notice and the associated disclaimer. //
+// //
+// This source file is free software; you can redistribute it //
+// and/or modify it under the terms of the GNU Lesser General //
+// Public License as published by the Free Software Foundation; //
+// either version 2.1 of the License, or (at your option) any //
+// later version. //
+// //
+// This source is distributed in the hope that it will be //
+// useful, but WITHOUT ANY WARRANTY; without even the implied //
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
+// PURPOSE. See the GNU Lesser General Public License for more //
+// details. //
+// //
+// You should have received a copy of the GNU Lesser General //
+// Public License along with this source; if not, download it //
+// from http://www.opencores.org/lgpl.shtml //
+// //
+//////////////////////////////////////////////////////////////////
+`include "ooops_defs.v"
+
+module id_stage (
+ input wire clk,
+ input wire rst,
+
+ // Flush/stall interface
+ input wire rob_pipe_flush,
+ input wire ds_stall,
+
+ // Interface to IF stage
+ input wire if_id_valid,
+ input wire [`INSTR_SZ-1:0] if_id_instr,
+ input wire [`ADDR_SZ-1:0] if_id_fpc,
+ input wire [`BP_SZ-1:0] if_id_bprd_info,
+
+ // Interface to Dispatch stage
+ output wire id_ds1_valid,
+ output wire [`ADDR_SZ-1:0] id_ds1_fpc,
+ output wire id_ds1_in_dly_slot,
+ output wire [`DEC_BUS_SZ-1:0] id_ds1_dec_bus,
+ output wire [`BP_SZ-1:0] id_ds1_bprd_info
+ );
+
+ // Internal wires
+ wire id_type_br;
+ wire id_type_ldst;
+ wire id_type_multdiv;
+ wire id_type_alu;
+ wire [`DEC_BUS_SZ-1:0] id_dec_bus;
+ wire id_rs_need;
+ wire id_rt_need;
+ wire id_rd_write;
+ wire [`REG_IDX_SZ-1:0] id_rs_idx;
+ wire [`REG_IDX_SZ-1:0] id_rt_idx;
+ wire [`REG_IDX_SZ-1:0] id_rd_idx;
+
+ // Handle stalling indications
+ wire id_stall = ds_stall;
+
+ // Determine basic instruction type
+ wire id_instr_special = ~(|if_id_instr[31:26]);
+
+ // MULT/DIV
+ wire id_mult = (id_instr_special & if_id_instr[5:1]==5'b01100);
+ wire id_div = (id_instr_special & if_id_instr[5:1]==5'b01101);
+ assign id_type_multdiv = id_mult | id_div;
+
+ // LDST
+ wire id_mem_ld = (if_id_instr[31:29]==3'b100) && (~if_id_instr[27] | (if_id_instr[28:26]==3'b011));
+ wire id_mem_st = (if_id_instr[31:28]==4'b1010) && (if_id_instr[27:26]!=2'b10);
+ assign id_type_ldst = id_mem_ld || id_mem_st;
+
+ // Branch
+ wire id_br_beq = (if_id_instr[31:27]==5'b00010);
+ wire id_br_bge = (if_id_instr[31:26]==6'b000001) && (if_id_instr[20:17]==4'b0000);
+ wire id_br_bgt = (if_id_instr[31:27]==5'b00011) && (if_id_instr[20:16]==5'b00000);
+ wire id_br_neg = (id_br_beq && if_id_instr[26]) || // BNE
+ (id_br_bge && if_id_instr[17]) || // BLTZ
+ (id_br_bgt && !if_id_instr[26]); // BLEZ
+ wire id_br_j = (if_id_instr[31:27]==5'b00001);
+ wire id_br_jr = id_instr_special && (if_id_instr[5:1]==5'b00100);
+ wire id_br_link = (id_br_bge && if_id_instr[20]) || // BGEZAL, BLTZAL
+ (id_br_j && if_id_instr[16]); // JAL
+ wire id_br_link_reg = (id_br_jr && if_id_instr[0]); // JALR
+ wire id_except = id_instr_special && (if_id_instr[5:1]==5'b00110);
+ //wire id_break = id_except && if_id_instr[0];
+ wire id_syscall = id_except && !if_id_instr[0];
+ assign id_type_br = (id_br_beq | id_br_bge | id_br_bgt | id_br_j | id_br_jr | id_except);
+
+ // ALU
+ wire id_alu_shift = id_instr_special && (if_id_instr[5:3]==3'b000);
+ wire id_alu_shift_imm = id_alu_shift & !if_id_instr[2];
+ wire id_alu_cmp = (id_instr_special && if_id_instr[5:1]==5'b10101) || // SLT/SLTU
+ (if_id_instr[31:27]==5'b00101); // SLTI/SLTIU
+ wire id_alu_log_reg = id_instr_special && (if_id_instr[5:2]==4'b1001);
+ wire id_alu_log_imm = (if_id_instr[31:28]==4'b0011) && !(&if_id_instr[27:26]);
+ wire id_hilo_mov = id_instr_special & (if_id_instr[5:2]==4'b0100);
+ wire id_mfhi = id_hilo_mov & (if_id_instr[1:0]==2'b00);
+ wire id_mflo = id_hilo_mov & (if_id_instr[1:0]==2'b10);
+ wire id_mthi = id_hilo_mov & (if_id_instr[1:0]==2'b01);
+ wire id_mtlo = id_hilo_mov & (if_id_instr[1:0]==2'b11);
+ wire id_alu_add_sub = (id_instr_special && (if_id_instr[5:2]==4'b1000)) || // ADD/SUB Reg
+ (if_id_instr[31:27]==5'b00100); // ADDI
+ wire id_alu_lui = (if_id_instr[31:26]==6'b001111);
+
+ // Coprocessor ops included
+ wire id_cp_op = (if_id_instr[31:26]==6'b010000) && !(|if_id_instr[25:24]) && !(|if_id_instr[22:21]);
+ wire id_cp_to = id_cp_op && if_id_instr[23];
+ //wire id_cp_num = if_id_instr[27:26];
+
+ assign id_type_alu = id_alu_shift | id_alu_cmp | id_alu_log_reg | id_alu_log_imm |
+ id_type_br | id_hilo_mov | id_alu_add_sub | id_alu_lui | id_cp_op;
+
+
+
+ // Determine register indices
+ // Figure out whether operands require register values.
+ // This is so we know whether to stall for forwarded data for an operand
+ // For ALU operations, all but LUI need reg_s. Immediate instructions don't need reg_t
+ assign id_rs_need = (id_type_alu & !id_alu_lui & !id_br_j) |
+ (id_alu_shift & !id_alu_shift_imm) |
+ (id_div | id_mult) |
+ (id_type_ldst);
+ assign id_rt_need = (id_type_alu & !id_alu_imm) | id_type_multdiv | id_br_beq | id_cp_to | id_mem_st;
+ assign id_rd_wr = !(id_mem_st | (id_type_br & !id_br_link) | id_cp_to | id_syscall);
+
+ // Handle moves to/from HI and LO
+ assign id_rs_idx = id_mfhi ? `REG_IDX_SZ'd32 :
+ id_mflo ? `REG_IDX_SZ'd33 : id_reg_s_idx_pre;
+ assign id_rt_idx = id_reg_t_idx_pre;
+ assign id_rd_idx = id_mthi ? `REG_IDX_SZ'd32 :
+ id_mtlo ? `REG_IDX_SZ'd33 : id_reg_d_idx_pre;
+
+ // Determine if instructions are in a delay slot
+ // This is needed by the ROB in case a branch is mispredicted so we know not to flush the delay instruction.
+ wire id_in_dly_slot_set = !id_stall & (if_id_valid & id_type_br);
+ wire id_in_dly_slot_rst = !id_stall & (if_id_valid & id_in_dly_slot);
+ wire id_in_dly_slot_in = (id_in_dly_slot_set | id_in_dly_slot) & !id_in_dly_slot_rst;
+ MDFFR #(1) id_in_dly_slot_ff (clk, rst, 1'b0, id_in_dly_slot_in, id_in_dly_slot);
+
+ wire id_in_dly_slot = if_id_valid & id_in_dly_slot;
+
+ // Put together decode bus
+ assign id_dec_bus[`DEC_REG_D_IDX] = id_rd_idx;
+ assign id_dec_bus[`DEC_REG_T_IDX] = id_rt_idx;
+ assign id_dec_bus[`DEC_REG_S_IDX] = id_rs_idx;
+ assign id_dec_bus[`DEC_REG_D_WR] = id_rd_wr;
+ assign id_dec_bus[`DEC_REG_T_NEED] = id_rt_need;
+ assign id_dec_bus[`DEC_REG_S_NEED] = id_rs_need;
+ assign id_dec_bus[`DEC_TYPE_CP] = id_cp_op;
+ assign id_dec_bus[`DEC_TYPE_BR] = id_type_br;
+ assign id_dec_bus[`DEC_TYPE_LDST] = id_type_ldst;
+ assign id_dec_bus[`DEC_TYPE_MULTDIV] = id_type_multdiv;
+ assign id_dec_bus[`DEC_TYPE_ALU] = id_type_alu;
+
+ wire id_valid = if_id_valid & !rob_pipe_flush;
+
+ // Flop outputs to DS stage
+ MDFFLR #(1) id_ds1_valid_ff (clk, rst, !id_stall, 1'b0, id_valid, id_ds1_valid);
+ MDFFL #(`ADDR_SZ) id_ds1_fpc_ff (clk, if_id_valid, if_id_fpc, id_ds1_fpc);
+ MDFFLR #(1) id_ds1_in_dly_slot_ff (clk, rst, if_id_valid, 1'b0, id_in_dly_slot, id_ds1_in_dly_slot);
+ MDFFL #(`DEC_BUS_SZ) id_ds1_dec_bus_ff (clk, if_id_valid, id_dec_bus, id_ds1_dec_bus);
+ MDFFL #(`BP_SZ) id_ds1_bprd_info_ff (clk, if_id_valid, if_id_bprd_info, id_ds1_bprd_info);
+
+endmodule
trunk/rtl/id_stage.v
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: trunk/rtl/ooops_defs.v
===================================================================
--- trunk/rtl/ooops_defs.v (nonexistent)
+++ trunk/rtl/ooops_defs.v (revision 2)
@@ -0,0 +1,244 @@
+/*
+ Josh Smith
+
+ File: oops_defs.v
+ Description: File for the global defines
+*/
+`timescale 1ns/10ps
+`define SD #1
+
+// Common field widths
+`define ADDR_SZ 32 // Address width/size
+`define INSTR_SZ 32 // Instruction width/size
+`define DATA_SZ 32 // Data width/size
+`define IMM_SZ 16 // Immediate width/size
+
+// ROB defines
+`define ROB_ENTRIES 8 // Size of ReorderBuffer
+`define ROB_PTR_SZ 4 // Size of ROB ptr (1 extra bit for full/empty detection)
+
+// Register file and map table/free list defines
+`define ARCH_REGS 34 // GPR 0-31, HI/LO
+`define REG_IDX_SZ 6 // Architected register index size (6 bits to include HI/LO)
+`define TAG_SZ 6 // Register tag size
+`define TAGS (`ARCH_REGS+`ROB_ENTRIES) // 32 GPRs, HI/LO, and ROB size
+`define FL_SZ (`TAGS)
+`define FL_PTR_SZ `TAG_SZ
+`define LO_REG `TAG_SZ'd33
+`define ZERO_REG `TAG_SZ'd0
+
+`define CHKPT_NUM 4 // Number of RAT checkpoints
+`define CHKPT_PTR_SZ 2
+
+// CDB defines
+`define NUM_CDB 4 // 2 ALU, 1 LD/ST, 1 MULT/DIV
+`define CDB_SZ (1+`TAG_SZ+`REG_IDX_SZ+`ROB_PTR_SZ) // 1 valid bit, 1 tag, 1 architectural reg index, 1 ROB index
+`define CDB_VLD `TAG_SZ+`REG_IDX_SZ+`ROB_PTR_SZ // Valid field of CDB
+`define CDB_ROB_IDX `TAG_SZ+`REG_IDX_SZ+`ROB_PTR_SZ-1:`TAG_SZ+`REG_IDX_SZ
+`define CDB_TAG `REG_IDX_SZ+`TAG_SZ-1:`REG_IDX_SZ // Tag field of CDB
+`define CDB_REG_IDX `REG_IDX_SZ-1:0 // Arch. reg index field of CDB
+`define CDB_BUS_SZ (`NUM_CDB*`CDB_SZ) // `NUM_CDB valid bits and tags
+`define CDB_DATA_SZ (`NUM_CDB*`DATA_SZ)
+
+// Branch prediction defines
+`define BP_IDX_SZ 4 // Size of Index into branch predictor
+`define BP_ENTRIES (1 << `BP_IDX_SZ) // Number of branch predictor entries
+
+// System Bus defines
+`define SYS_BUS_SZ 64
+`define SYS_BUS_BE_SZ 8
+
+// Instruction Cache defines
+`define IC_LINE_SZ (2*`INSTR_SZ) // Size of instruction cache line
+`define IC_BO_SZ 3 // Block-offset size
+`define IC_SI_SZ 8 // Set index size
+`define IC_TAG_SZ (`ADDR_SZ-`IC_SI_SZ-`IC_BO_SZ) // Tag size
+`define IC_TAG `ADDR_SZ-1 -: `IC_TAG_SZ // Tag field of PC
+`define IC_SI `IC_SI_SZ+`IC_BO_SZ-1:`IC_BO_SZ // Set index field of PC
+`define IC_NUM_LINES (1<<`IC_SI_SZ) // Number of instruction cache lines
+`define IC_TAGRAM_SZ (1+1+`IC_TAG_SZ) // +2 bits for valid/dirty (dirty not used)
+`define IC_TAGRAM_VLD `IC_TAG_SZ+1 // Valid field
+`define IC_TAGRAM_DRT `IC_TAG_SZ // Dirty field
+`define IC_TAGRAM_TAG `IC_TAG_SZ-1:0 // tag field
+
+// Data Cache defines
+`define DC_LINE_SZ (2*`DATA_SZ) // Size of data cache line
+`define DC_BO_SZ 2 // Block-offset size
+`define DC_SI_SZ 8 // Set index size
+`define DC_TAG_SZ (`ADDR_SZ-`DC_SI_SZ-`DC_BO_SZ) // Tag size
+`define DC_TAG `ADDR_SZ-1 -: `DC_TAG_SZ // Tag field of PC
+`define DC_SI `ADDR_SZ-1-`DC_TAG_SZ -: `DC_SI_SZ // Set index field of PC
+`define DC_TAGRAM_SZ (1+1+`DC_TAG_SZ) // +2 bits for valid/dirty
+`define DC_NUM_LINES (1<<`DC_SI_SZ) // Number of data cache lines
+`define DC_TAGRAM_VLD `DC_TAG_SZ+1 // Valid field
+`define DC_TAGRAM_DRT `DC_TAG_SZ // Dirty field
+`define DC_TAGRAM_TAG `DC_TAG_SZ-1:0 // tag field
+
+`define RESET_ADDR 32'h0 // FPC reset address
+
+// Fields of branch prediction bus
+`define BP_SZ 34
+`define BP_TRGT 33:2
+`define BP_TKN 1
+`define BP_VLD 0
+
+// Fields of Decode bus
+/*
+`define DEC_BUS_SZ 84
+`define DEC_IMM_DATA 83:68 // Immediate data for ALU and MEM
+`define DEC_TYPE_INFO 67:65 // Instruction type info group
+`define DEC_TYPE_ALU 67 // ALU/Branch instruction type
+`define DEC_TYPE_MULT_DIV 66 // MULT/DIV instruction type
+`define DEC_TYPE_MEM 65 // Load/Store instruction type
+`define DEC_REG_INFO 64:44 // Register info group
+`define DEC_REG_D_WR 64 // Writes to dest register
+`define DEC_REG_T_NEED 63 // Need register T operand
+`define DEC_REG_S_NEED 62 // Need register S operand
+`define DEC_REG_D_INDX 61:56 // Destination register index
+`define DEC_REG_T_INDX 55:50 // Operand register T index
+`define DEC_REG_S_INDX 49:44 // Operand register S index
+`define DEC_MULTDIV_SZ 8 // MULT/DIV info group
+`define DEC_MULTDIV_INFO 43:36 // MULT/DIV info group
+`define DEC_MTLO 43 // Move to LO
+`define DEC_MTHI 42 // Move to HI
+`define DEC_MFLO 41 // Move from LO
+`define DEC_MFHI 40 // Move from HI
+`define DEC_MD_SIGNED 39 // Mult/Div signed
+`define DEC_DIV 38 // Divide
+`define DEC_MULT 37 // Multiply
+`define DEC_WR_HILO 36 // Write to HI and LO registers
+`define DEC_MEM_SZ 6
+`define DEC_MEM_INFO 35:30 // Load/Store info group
+`define DEC_MEM_W 35 // Word load/store
+`define DEC_MEM_HW 34 // Halfword load/store
+`define DEC_MEM_B 33 // Byte load/store
+`define DEC_MEM_ST 32 // Memory store
+`define DEC_MEM_SIGNED 31 // Load Signed
+`define DEC_MEM_LD 30 // Memory load
+`define DEC_CP_SZ 7
+`define DEC_CP_INFO 29:23 // Coprocessor info group
+`define DEC_CP_SEL 29:27 // Coprocessor Sel index
+`define DEC_CP_NUM 26:25 // Coprocessor number
+`define DEC_CP_TO 24 // Move To coprocessor (from if 0)
+`define DEC_CP_OP 23 // Coprocessor Operation
+`define DEC_BR_SZ 10
+`define DEC_BR_INFO 22:13 // Branch info group
+`define DEC_BR_SYS 22 // SYSCALL
+`define DEC_BR_BRK 21 // BREAK
+`define DEC_BR_LINK 20 // Branch/Jump and link
+`define DEC_BR_JR 19 // JR/JALR
+`define DEC_BR_J 18 // J/JAL
+`define DEC_BR_NEG 17 // Negate condition (to get the rest of the conditions)
+`define DEC_BR_BGT 16 // BGTZ condition
+`define DEC_BR_BGE 15 // BGEZ condition
+`define DEC_BR_BEQ 14 // BEQ condition
+`define DEC_BR_INST 13 // Branch instruction
+`define DEC_ALU_SZ 13
+`define DEC_ALU_INFO 12:0 // ALU info group
+`define DEC_ALU_SIGNED 12 // Signed operation
+`define DEC_ALU_IMM 11 // Use immediate instead of register
+`define DEC_ALU_LUI 10 // LUI (will treat as shift operation with immediate inputs)
+`define DEC_ALU_S_A 9 // Shift arithmetic (if 1, logical if 0)
+`define DEC_ALU_SR 8 // Shift right
+`define DEC_ALU_SL 7 // Shift left
+`define DEC_ALU_CMP 6 // Compare (SLT)
+`define DEC_ALU_OR 5 // Logical OR
+`define DEC_ALU_NOR 4 // Logical NOR
+`define DEC_ALU_XOR 3 // Logical XOR
+`define DEC_ALU_AND 2 // Logical AND
+`define DEC_ALU_SUB 1 // Subtraction
+`define DEC_ALU_ADD 0 // Addition
+*/
+
+// Fields of instruction decode bus from ID stage.
+// Note: to save on flops, ID stage will only determine basic instruction type
+// and register operand/destination information. Complete instruction decoding
+// will happen during last Dispatch cycle into Reservation Station.
+`define DEC_BUS_SZ 26
+`define DEC_REG_D_IDX 25:20 // Rd index
+`define DEC_REG_T_IDX 19:14 // Rt index
+`define DEC_REG_S_IDX 13:8 // Rs index
+`define DEC_REG_D_WR 7 // Writes to Rd
+`define DEC_REG_T_NEED 6 // Needs Rt operand
+`define DEC_REG_S_NEED 5 // Needs Rs operand
+`define DEC_TYPE_CP 4 // CP move instruction
+`define DEC_TYPE_BR 3 // Branch instruction
+`define DEC_TYPE_LDST 2 // Instruction handled by LDST unit
+`define DEC_TYPE_MULTDIV 1 // Instruction handled by MULT/DIV unit
+`define DEC_TYPE_ALU 0 // Instruction handled by ALU unit
+
+// ALU control bus for ALU operation.
+`define ALU_CTL_SZ 1
+
+// Fields of Branch/Jump operation bus
+`define BR_INFO_SZ 10
+`define BR_SYS 9 // SYSCALL
+`define BR_BRK 8 // BREAK
+`define BR_LINK 7 // Branch/Jump and link
+`define BR_JR 6 // JR/JALR
+`define BR_J 5 // J/JAL
+`define BR_NEG 4 // Negate condition (to get the rest of the conditions)
+`define BR_BGT 3 // BGTZ condition
+`define BR_BGE 2 // BGEZ condition
+`define BR_BEQ 1 // BEQ condition
+`define BR_INST 0 // Branch instruction
+
+// Fields of ALU information bus
+`define ALU_INFO_SZ 13
+`define ALU_SIGNED 12 // Signed operation
+`define ALU_IMM 11 // Use immediate instead of register
+`define ALU_LUI 10 // LUI (treated as shift op)
+`define ALU_S_A 9 // Shift arithmetic (if 1, logical if 0)
+`define ALU_SR 8 // Shift right
+`define ALU_SL 7 // Shift left
+`define ALU_CMP 6 // Compare (SLT)
+`define ALU_OR 5 // Logical OR
+`define ALU_NOR 4 // Logical NOR
+`define ALU_XOR 3 // Logical XOR
+`define ALU_AND 2 // Logical AND
+`define ALU_SUB 1 // Subtraction
+`define ALU_ADD 0 // Addition
+
+// Fields of rename information
+`define REN_BUS_SZ 35
+`define REN_DEST_IDX 34:29 // Destination (reg_d) index
+`define REN_DEST_VLD 28 // Writes to destination
+`define REN_DEST_TAG_OLD 27:22 // Destination (reg_d) old tag
+`define REN_DEST_TAG 21:16 // Destination (reg_d) tag
+`define REN_SRC2_VLD 15 // Source 2 data valid in register file
+`define REN_SRC2_NEED 14 // Need source 2 register data
+`define REN_SRC2_TAG 13:8 // Source 2 (reg_s) tag
+`define REN_SRC1_VLD 7 // Source 1 data valid in register file
+`define REN_SRC1_NEED 6 // Need source 1 register data
+`define REN_SRC1_TAG 5:0 // Source 1 (reg_s) tag
+
+// Reservation Station defines
+`define ALU_RS_ENTRIES 4 // Size of Reservation Station for ALU and branch
+`define ALU_RS_CNT_SZ 3 // Size of occupancy counter
+//`define ALU_RS_CNTL_SZ (`DEC_ALU_SZ+`DEC_BR_SZ+`DEC_CP_SZ+`ADDR_SZ+`IMM_SZ)
+`define MULTDIV_RS_ENTRIES 2 // Size of Reservation Station for MULT/DIV
+`define MULTDIV_RS_CNT_SZ 2 // Size of occupancy counter
+//`define MULTDIV_RS_CNTL_SZ (`DEC_MULTDIV_SZ)
+`define LDST_RS_ENTRIES 2 // Size of Reservation Station for Load/Store
+`define LDST_RS_CNT_SZ 2 // Size of occupancy counter
+//`define LDST_RS_CNTL_SZ (`DEC_MEM_SZ+`IMM_SZ)
+
+// CP0 Register fields
+`define CP0_STATUS_EXL 1
+
+// Feature ifdefs
+// Comment out define to remove feature from compilation
+//`define USE_PLL // Include PLL (exclude for simulation)
+`define USE_IC // include Instruction cache
+`define USE_DC // include Data cache
+//`define DYN_BPRD // TODO: Add back in later
+`define USE_IFB // Include instruction buffer between IF and ID stages
+
+`ifdef USE_IFB
+ `define IFB_ENTRIES 4 // Number of fetch buffer entries
+ `define IFB_ENTRY_SZ (`INSTR_SZ+`ADDR_SZ+`BP_SZ+1)
+ `define IFB_PTR_SZ 2 // Fetch buffer pointer width
+`endif
+
+//`define TIMING_OPT // Use timing-optimized RTL in some portions (area affected)
+//`define ALTERA // Used to instantiate ALTERA megafunctions over generic logic
trunk/rtl/ooops_defs.v
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: trunk/rtl/if_stage.v
===================================================================
--- trunk/rtl/if_stage.v (nonexistent)
+++ trunk/rtl/if_stage.v (revision 2)
@@ -0,0 +1,152 @@
+//////////////////////////////////////////////////////////////////
+// //
+// OoOPs Core Instruction Fetch module //
+// //
+// This file is part of the OoOPs project //
+// http://www.opencores.org/project,oops //
+// //
+// Description: //
+// Handles updating Program Counter and fetching instructions //
+// from the Instruction Cache. //
+// //
+// Author(s): //
+// - Joshua Smith, smjoshua@umich.edu //
+// //
+//////////////////////////////////////////////////////////////////
+// //
+// Copyright (C) 2012 Authors and OPENCORES.ORG //
+// //
+// This source file may be used and distributed without //
+// restriction provided that this copyright statement is not //
+// removed from the file and that any derivative work contains //
+// the original copyright notice and the associated disclaimer. //
+// //
+// This source file is free software; you can redistribute it //
+// and/or modify it under the terms of the GNU Lesser General //
+// Public License as published by the Free Software Foundation; //
+// either version 2.1 of the License, or (at your option) any //
+// later version. //
+// //
+// This source is distributed in the hope that it will be //
+// useful, but WITHOUT ANY WARRANTY; without even the implied //
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
+// PURPOSE. See the GNU Lesser General Public License for more //
+// details. //
+// //
+// You should have received a copy of the GNU Lesser General //
+// Public License along with this source; if not, download it //
+// from http://www.opencores.org/lgpl.shtml //
+// //
+//////////////////////////////////////////////////////////////////
+`include "ooops_defs.v"
+
+module if_stage (
+ input wire clk,
+ input wire rst,
+
+ // Flush/stall interfaces
+ input wire rob_pipe_flush,
+ input wire [`ADDR_SZ-1:0] rob_flush_target,
+ input wire ds_stall,
+
+ // Instruction cache interface
+ output wire if_ic_req,
+ output wire [`ADDR_SZ-1:0] if_ic_fpc,
+ output wire [`ADDR_SZ-1:0] r_if_ic_fpc,
+ input wire [`INSTR_SZ-1:0] ic_if_data,
+ input wire ic_if_data_valid,
+ input wire ic_if_ready,
+
+ // Interface to ID stage
+ output wire if_id_valid,
+ output wire [`INSTR_SZ-1:0] if_id_instr,
+ output wire [`ADDR_SZ-1:0] if_id_fpc,
+ output wire [`BP_SZ-1:0] if_id_bprd_info
+ );
+
+ // Internal wires/regs
+ wire if_stall;
+ wire if_valid;
+ wire [`ADDR_SZ-1:0] if_fpc; // Current fetch pc
+ wire [`ADDR_SZ-1:0] r_if_fpc; // flopped FPC
+ wire [`ADDR_SZ-1:0] r_if_fpc_in;
+ wire [`INSTR_SZ-1:0] if_instr;
+ wire [`BP_SZ-1:0] if_bprd_info;
+ wire if_br_predict_valid;
+ wire if_br_predict_taken;
+ wire [`ADDR_SZ-1:0] if_br_predict_target;
+
+ // Note that Icache will have 1 cycle latency, so we won't know if it's a miss until
+ // one cycle later. Since we don't want to have to wait to figure out if it's a hit
+ // before we increment the FPC (want to be optimistic), we'll have to be able to reset
+ // the FPC if it's a miss.
+ assign if_ic_req = !if_stall;
+ assign if_ic_fpc = if_fpc;
+ assign r_if_ic_fpc = r_if_fpc;
+ assign if_valid = ic_if_data_valid & !rob_pipe_flush;
+ assign if_instr = ic_if_data;
+
+ // Handle the FPC generation
+ wire [`ADDR_SZ-1:0] if_fpc_p4 = if_fpc + `ADDR_SZ'h4;
+ reg [`ADDR_SZ-1:0] if_fpc_in;
+ always @* begin
+ casez({rob_pipe_flush, if_br_predict_taken, !ic_if_ready})
+ 3'b1??: if_fpc_in = rob_flush_target; // Flush target
+ 3'b01?: if_fpc_in = if_br_predict_target; // Taken branch target
+ 3'b001: if_fpc_in = r_if_fpc; // Previous FPC
+ default: if_fpc_in = if_fpc_p4; // Next incrmented FPC
+ endcase
+ end
+
+ MDFFLR #(`ADDR_SZ) if_fpc_ff (clk, rst, if_ic_req, `RESET_ADDR, if_fpc_in, if_fpc);
+
+ // Flop Icache request signals so we can re-request if it ends up being a miss
+ assign r_if_fpc_in = rob_pipe_flush ? rob_flush_target : if_ic_fpc;
+ MDFFR #(`ADDR_SZ) r_if_fpc_ff (clk, rst, `RESET_ADDR, if_fpc, r_if_fpc);
+
+ // Handle branch prediction
+ // TODO: throw in branch prediction
+ // Note: Try to identify jumps and other unconditional branches here, for quick recovery
+ `ifdef DYN_BPRD
+ assign if_br_predict_valid = 1'b0;
+ assign if_br_predict_taken = 1'b0;
+ assign if_br_predict_target = {`ADDR_SZ{1'b0}};
+ assign if_bprd_info = {if_br_predict_target, if_br_predict_taken, if_br_predict_valid};
+ `else
+ // tie-offs should optimize logic away
+ assign if_br_predict_valid = 1'b0;
+ assign if_br_predict_taken = 1'b0;
+ assign if_br_predict_target = {`ADDR_SZ{1'b0}};
+ assign if_bprd_info = {if_br_predict_target, if_br_predict_taken, if_br_predict_valid};
+ `endif
+
+ `ifdef USE_IFB
+ wire ifb_full;
+ if_buffer ifb (
+ .clk(clk),
+ .rst(rst),
+ .flush(rob_pipe_flush),
+ .if_valid(if_valid),
+ .if_instr(if_instr),
+ .if_fpc(if_fpc),
+ .if_bprd_info(if_bprd_info),
+ .if_ifb_pop_en(!ds_stall),
+ .ifb_full(ifb_full),
+ .if_id_valid(if_id_valid),
+ .if_id_instr(if_id_instr),
+ .if_id_fpc(if_id_fpc),
+ .if_id_bprd_info(if_id_bprd_info)
+ );
+
+ assign if_stall = ifb_full; // Only stall if IFB is full
+ `else
+
+ MDFFLR #(1) if_id_valid_ff (clk, rst, !if_stall, 1'b0, if_valid, if_id_valid);
+ MDFFL #(`INSTR_SZ) if_id_instr_ff (clk, !if_stall, if_instr, if_id_instr);
+ MDFFL #(`ADDR_SZ) if_id_fpc_ff (clk, !if_stall, if_ic_fpc_q, if_id_fpc);
+ MDFFL #(`BP_SZ) if_id_bprd_info_ff (clk, if_valid, if_bprd_info, if_id_bprd_info);
+
+ assign if_stall = if_id_valid & ds_stall; // Stall if we have a valid instruction going to ID and DS stalling
+
+ `endif // USE_IFB
+endmodule
trunk/rtl/if_stage.v
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: trunk/rtl/psel.v
===================================================================
--- trunk/rtl/psel.v (nonexistent)
+++ trunk/rtl/psel.v (revision 2)
@@ -0,0 +1,19 @@
+// Generic priority selector module
+module psel (req, gnt);
+ //synopsys template
+ parameter WIDTH=8;
+ input wire [WIDTH-1:0] req;
+ output wire [WIDTH-1:0] gnt;
+
+ //priority selector
+ genvar i;
+ generate
+ for(i = WIDTH-1; i>0; i=i-1)
+ begin: sel
+ assign gnt[i] = req[i] & ~(|req[i-1:0]);
+ end
+
+ assign gnt[0] = req[0];
+ endgenerate
+
+endmodule
Index: trunk/rtl/map_table.v
===================================================================
--- trunk/rtl/map_table.v (nonexistent)
+++ trunk/rtl/map_table.v (revision 2)
@@ -0,0 +1,350 @@
+//////////////////////////////////////////////////////////////////
+// //
+// OoOPs Core Register Map Table module //
+// //
+// This file is part of the OoOPs project //
+// http://www.opencores.org/project,oops //
+// //
+// Description: //
+// The Map Table is responsible for maintaining the mapping //
+// from architectural->physical registers. This block //
+// consists of a free list for allocating new physical //
+// registers and also the tables for mapping source operands. //
+// //
+// To avoid excessive flop usage for the map tables, block rams//
+// will be used instead. //
+// //
+// Author(s): //
+// - Joshua Smith, smjoshua@umich.edu //
+// //
+//////////////////////////////////////////////////////////////////
+// //
+// Copyright (C) 2012 Authors and OPENCORES.ORG //
+// //
+// This source file may be used and distributed without //
+// restriction provided that this copyright statement is not //
+// removed from the file and that any derivative work contains //
+// the original copyright notice and the associated disclaimer. //
+// //
+// This source file is free software; you can redistribute it //
+// and/or modify it under the terms of the GNU Lesser General //
+// Public License as published by the Free Software Foundation; //
+// either version 2.1 of the License, or (at your option) any //
+// later version. //
+// //
+// This source is distributed in the hope that it will be //
+// useful, but WITHOUT ANY WARRANTY; without even the implied //
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
+// PURPOSE. See the GNU Lesser General Public License for more //
+// details. //
+// //
+// You should have received a copy of the GNU Lesser General //
+// Public License along with this source; if not, download it //
+// from http://www.opencores.org/lgpl.shtml //
+// //
+//////////////////////////////////////////////////////////////////
+`include "ooops_defs.v"
+
+module map_table (
+ input wire clk,
+ input wire rst,
+ output wire map_table_init,
+
+ // Rename port
+ input wire ds1_valid,
+ input wire [`REG_IDX_SZ-1:0] ds1_src1_idx,
+ input wire [`REG_IDX_SZ-1:0] ds1_src2_idx,
+ input wire [`REG_IDX_SZ-1:0] ds1_dest_idx,
+ input wire ds1_dest_wr,
+ input wire ds1_type_br,
+ output wire [`TAG_SZ-1:0] ds2_src1_tag,
+ output wire [`TAG_SZ-1:0] ds2_src2_tag,
+ output wire ds2_src1_valid,
+ output wire ds2_src2_valid,
+ output wire [`TAG_SZ-1:0] ds2_dest_tag,
+ output wire [`TAG_SZ-1:0] ds2_dest_tag_old,
+ output wire [`FL_PTR_SZ-1:0] ds2_fl_head_ptr,
+ output wire [`CHKPT_PTR_SZ-1:0] ds2_chkpt_ptr,
+
+ // Writeback port
+ //input wire [`CDB_BUS_SZ-1:0] ex_cdb_bus,
+
+ // Retire and flush port
+ input wire rob_pipe_flush,
+ input wire rob_ds_ret_valid,
+ input wire rob_ds_ret_dest_write,
+ input wire [`CHKPT_PTR_SZ-1:0] rob_ds_chkpt_ptr,
+ input wire [`FL_PTR_SZ-1:0] rob_ds_fl_head_ptr,
+ input wire rob_ds_ret_chkpt_free,
+ input wire [`REG_IDX_SZ-1:0] rob_ds_ret_idx,
+ input wire [`TAG_SZ-1:0] rob_ds_ret_tag,
+ input wire [`TAG_SZ-1:0] rob_ds_ret_tag_old
+ );
+
+ // Internal wires and regs
+ wire [`TAG_SZ-1:0] ds1_dest_tag;
+ wire [`FL_PTR_SZ-1:0] ds1_fl_head_ptr;
+ wire [`ARCH_REGS-1:0] dfa_dirty_bit [`CHKPT_NUM-1:0];
+ wire [`ARCH_REGS-1:0] dfa_dirty_bit_in [`CHKPT_NUM-1:0];
+ wire [`CHKPT_NUM-1:0] dfa_dirty_bit_ld; // load is per checkpoint/column
+ wire [`CHKPT_NUM-1:0] dfa_dirty_bit_row [`ARCH_REGS-1:0];
+ wire [`CHKPT_PTR_SZ-1:0] ds1_src1_chkpt, ds1_src2_chkpt, ds1_dest_chkpt;
+
+ wire [`CHKPT_PTR_SZ-1:0] chkpt_head_ptr;
+ wire [`CHKPT_PTR_SZ-1:0] chkpt_head_ptr_p1;
+ wire [`CHKPT_PTR_SZ-1:0] chkpt_tail_ptr;
+ wire [`CHKPT_PTR_SZ-1:0] chkpt_tail_ptr_p1;
+ wire [`CHKPT_NUM-1:0] chkpt_valid_mask;
+ wire [`CHKPT_NUM-1:0] chkpt_valid_mask_in;
+ wire chkpt_allocate;
+
+ genvar g,k;
+
+ // Instantiate free list
+ free_list fl (
+ .clk(clk),
+ .rst(rst),
+ .ds1_dest_wr(ds1_dest_wr),
+ .rob_pipe_flush(rob_pipe_flush),
+ .rob_ds_fl_head_ptr(rob_ds_fl_head_ptr),
+ .rob_ds_ret_valid(rob_ds_ret_valid),
+ .rob_ds_ret_dest_write(rob_ds_ret_dest_write),
+ .rob_ds_ret_tag_old(rob_ds_ret_tag_old),
+ .ds1_dest_tag(ds1_dest_tag),
+ .ds1_fl_head_ptr(ds1_fl_head_ptr)
+ );
+
+ // Maintain the checkpoint head/tail pointers
+ // Operation:
+ // - Upon a pipe flush, restore both head and tail pointers to same pointer value from the ROB.
+ // - When a new checkpoint is allocated, advance head pointer by 1
+ // - When an instruction which allocated a checkpoint retires, advance tail pointer by 1.
+ assign chkpt_allocate = ds1_type_br & ~rob_pipe_flush;
+ wire [`CHKPT_PTR_SZ-1:0] chkpt_head_ptr_in = rob_pipe_flush ? rob_ds_chkpt_ptr : chkpt_head_ptr_p1;
+ wire [`CHKPT_PTR_SZ-1:0] chkpt_tail_ptr_in = rob_pipe_flush ? rob_ds_chkpt_ptr : chkpt_tail_ptr_p1;
+ wire chkpt_head_ptr_ld = rob_pipe_flush | chkpt_allocate;
+ wire chkpt_tail_ptr_ld = rob_pipe_flush | (rob_ds_ret_valid & rob_ds_ret_chkpt_free);
+
+ MDFFLR #(`CHKPT_PTR_SZ) chkpt_head_ptr_ff (clk, rst, chkpt_head_ptr_ld, `CHKPT_PTR_SZ'h0, chkpt_head_ptr_in, chkpt_head_ptr);
+ MDFFLR #(`CHKPT_PTR_SZ) chkpt_tail_ptr_ff (clk, rst, chkpt_tail_ptr_ld, `CHKPT_PTR_SZ'h0, chkpt_tail_ptr_in, chkpt_tail_ptr);
+
+ assign chkpt_head_ptr_p1 = (chkpt_head_ptr == `CHKPT_NUM-1) ? `CHKPT_PTR_SZ'h0 : chkpt_head_ptr + `CHKPT_PTR_SZ'h1;
+ assign chkpt_tail_ptr_p1 = (chkpt_tail_ptr == `CHKPT_NUM-1) ? `CHKPT_PTR_SZ'h0 : chkpt_tail_ptr + `CHKPT_PTR_SZ'h1;
+
+ // Keep a bit-vector mask of valid (allocated) checkpoints for the DFA search
+ // Initialize checkpoint 0 to valid, this will be the checkpoint used out of reset.
+ wire [`CHKPT_NUM-1:0] allocated_chkpt = (`CHKPT_NUM'h1 << chkpt_head_ptr_p1) & {`CHKPT_NUM{chkpt_allocate}};
+ wire [`CHKPT_NUM-1:0] freed_chkpt = (`CHKPT_NUM'h1 << chkpt_tail_ptr) & {`CHKPT_NUM{rob_ds_ret_chkpt_free}};
+ wire [`CHKPT_NUM-1:0] rob_ds_chkpt_vec = (`CHKPT_NUM'h1 << rob_ds_chkpt_ptr);
+ assign chkpt_valid_mask_in = rob_pipe_flush ? rob_ds_chkpt_vec : ((chkpt_valid_mask | allocated_chkpt) & ~freed_chkpt);
+ MDFFLR #(`CHKPT_NUM) chkpt_valid_mask_ff (clk, rst, chkpt_head_ptr_ld | chkpt_tail_ptr_ld, `CHKPT_NUM'h1, chkpt_valid_mask_in, chkpt_valid_mask);
+
+ /*
+ Handle the DFA (Dirty Flag Array) for determining which checkpoint contains the
+ most recent mapping for an architectural register. This is needed to setup the
+ SRAM address input for the RAT lookup.
+
+ Structure:
+ - Maintain a grid of bits (one row for each arch. reg, one column for each checkpoint).
+ - Head/Tail pointer keep track of most recently/least recently allocated valid checkpoints.
+
+ Operation:
+ - When a new checkpoint is allocated for a branch or speculation point, we advance
+ the head pointer and clear the entire DFA column for that checkpoint. For branches which
+ write a register, the write should update the old checkpoint, not the newly allocated one.
+ - When a register write operation comes through, update the row of the head checkpoint
+ corresponding to the destination architectural register index.
+
+ */
+ wire [`CHKPT_NUM-1:0] dfa_column_clear = allocated_chkpt;
+ wire [`CHKPT_NUM-1:0] ds1_active_chkpt = (`CHKPT_NUM'h1 << chkpt_head_ptr);
+ assign dfa_dirty_bit_ld = dfa_column_clear | // Clear newly allocated checkpoint
+ (ds1_active_chkpt & {`CHKPT_NUM{ds1_dest_wr}}); // Update current checkpoint
+
+ wire [`ARCH_REGS-1:0] ds1_dest_idx_vec = (1 << ds1_dest_idx);
+ generate
+ for (g=0; g<`CHKPT_NUM; g=g+1) begin : dfa_gen
+ for (k=0; k<`ARCH_REGS; k=k+1) begin : dfa_dirty_bit_gen
+ assign dfa_dirty_bit_in[g][k] = ~dfa_column_clear[g] & ((ds1_dest_idx_vec[k] & ds1_dest_wr) ? 1'b1 : dfa_dirty_bit[g][k]);
+
+ MDFFLR #(1) dfa_dirty_bit_ff (clk, rst, dfa_dirty_bit_ld[g], 1'b0, dfa_dirty_bit_in[g][k], dfa_dirty_bit[g][k]);
+
+ // generate a "row" version as well
+ assign dfa_dirty_bit_row[k][g] = dfa_dirty_bit[g][k] & chkpt_valid_mask[g];
+ end
+ end
+ endgenerate
+
+ // Determine which checkpoint contains the most recent mapping for each source and the destination
+ // TODO: For now, assume 4 checkpoints. Find a nice way to make this general.
+ assign ds1_src1_chkpt = (chkpt_head_ptr == 2'h0) ? (dfa_dirty_bit_row[ds1_src1_idx][0] ? 2'h0 :
+ dfa_dirty_bit_row[ds1_src1_idx][1] ? 2'h1 :
+ dfa_dirty_bit_row[ds1_src1_idx][2] ? 2'h2 : 2'h3) :
+ (chkpt_head_ptr == 2'h1) ? (dfa_dirty_bit_row[ds1_src1_idx][1] ? 2'h1 :
+ dfa_dirty_bit_row[ds1_src1_idx][2] ? 2'h2 :
+ dfa_dirty_bit_row[ds1_src1_idx][3] ? 2'h3 : 2'h0) :
+ (chkpt_head_ptr == 2'h2) ? (dfa_dirty_bit_row[ds1_src1_idx][2] ? 2'h2 :
+ dfa_dirty_bit_row[ds1_src1_idx][3] ? 2'h3 :
+ dfa_dirty_bit_row[ds1_src1_idx][0] ? 2'h0 : 2'h1) :
+ (dfa_dirty_bit_row[ds1_src1_idx][3] ? 2'h3 :
+ dfa_dirty_bit_row[ds1_src1_idx][0] ? 2'h0 :
+ dfa_dirty_bit_row[ds1_src1_idx][1] ? 2'h1 : 2'h2);
+ assign ds1_src2_chkpt = (chkpt_head_ptr == 2'h0) ? (dfa_dirty_bit_row[ds1_src2_idx][0] ? 2'h0 :
+ dfa_dirty_bit_row[ds1_src2_idx][1] ? 2'h1 :
+ dfa_dirty_bit_row[ds1_src2_idx][2] ? 2'h2 : 2'h3) :
+ (chkpt_head_ptr == 2'h1) ? (dfa_dirty_bit_row[ds1_src2_idx][1] ? 2'h1 :
+ dfa_dirty_bit_row[ds1_src2_idx][2] ? 2'h2 :
+ dfa_dirty_bit_row[ds1_src2_idx][3] ? 2'h3 : 2'h0) :
+ (chkpt_head_ptr == 2'h2) ? (dfa_dirty_bit_row[ds1_src2_idx][2] ? 2'h2 :
+ dfa_dirty_bit_row[ds1_src2_idx][3] ? 2'h3 :
+ dfa_dirty_bit_row[ds1_src2_idx][0] ? 2'h0 : 2'h1) :
+ (dfa_dirty_bit_row[ds1_src2_idx][3] ? 2'h3 :
+ dfa_dirty_bit_row[ds1_src2_idx][0] ? 2'h0 :
+ dfa_dirty_bit_row[ds1_src2_idx][1] ? 2'h1 : 2'h2);
+ assign ds1_dest_chkpt = (chkpt_head_ptr == 2'h0) ? (dfa_dirty_bit_row[ds1_dest_idx][0] ? 2'h0 :
+ dfa_dirty_bit_row[ds1_dest_idx][1] ? 2'h1 :
+ dfa_dirty_bit_row[ds1_dest_idx][2] ? 2'h2 : 2'h3) :
+ (chkpt_head_ptr == 2'h1) ? (dfa_dirty_bit_row[ds1_dest_idx][1] ? 2'h1 :
+ dfa_dirty_bit_row[ds1_dest_idx][2] ? 2'h2 :
+ dfa_dirty_bit_row[ds1_dest_idx][3] ? 2'h3 : 2'h0) :
+ (chkpt_head_ptr == 2'h2) ? (dfa_dirty_bit_row[ds1_dest_idx][2] ? 2'h2 :
+ dfa_dirty_bit_row[ds1_dest_idx][3] ? 2'h3 :
+ dfa_dirty_bit_row[ds1_dest_idx][0] ? 2'h0 : 2'h1) :
+ (dfa_dirty_bit_row[ds1_dest_idx][3] ? 2'h3 :
+ dfa_dirty_bit_row[ds1_dest_idx][0] ? 2'h0 :
+ dfa_dirty_bit_row[ds1_dest_idx][1] ? 2'h1 : 2'h2);
+
+ // If no dirty bit set for any of the valid checkpoints, then committed copy must have latest mapping
+ wire ds1_src1_use_rrat = ~(|dfa_dirty_bit_row[ds1_src1_idx]);
+ wire ds1_src2_use_rrat = ~(|dfa_dirty_bit_row[ds1_src2_idx]);
+ wire ds1_dest_use_rrat = ~(|dfa_dirty_bit_row[ds1_dest_idx]);
+
+ // Generate the RAT SRAM read/write addresses and controls
+ // Note: since tables are SRAM-based, we need to initialize the RRAT so that
+ // registers are mapped correctly out of reset
+ wire [`REG_IDX_SZ-1:0] map_table_init_ctr, map_table_init_ctr_in;
+ wire map_table_init_in = map_table_init & (map_table_init_ctr != `ARCH_REGS);
+ MDFFR #(1) map_table_init_ff (clk, rst, 1'b1, map_table_init_in, map_table_init);
+
+ assign map_table_init_ctr_in = map_table_init_ctr + `REG_IDX_SZ'h1;
+ MDFFLR #(`REG_IDX_SZ) map_table_init_ctr_ff (clk, rst, map_table_init, `REG_IDX_SZ'h0, map_table_init_ctr_in, map_table_init_ctr);
+
+ wire [`REG_IDX_SZ+`CHKPT_PTR_SZ-1:0] ds1_rat_src1_rd_addr = {ds1_src1_idx,ds1_src1_chkpt};
+ wire [`REG_IDX_SZ+`CHKPT_PTR_SZ-1:0] ds1_rat_src2_rd_addr = {ds1_src2_idx,ds1_src2_chkpt};
+ wire [`REG_IDX_SZ+`CHKPT_PTR_SZ-1:0] ds1_rat_dest_rd_addr = {ds1_dest_idx,ds1_dest_chkpt};
+
+ // Writes need to come from DS2 stage in case we read and write the same arch. register
+ wire ds2_rat_wren, ds2_rat_wren_in;
+ wire [`REG_IDX_SZ+`CHKPT_PTR_SZ-1:0] ds2_rat_wr_addr, ds2_rat_wr_addr_in;
+ wire [`TAG_SZ-1:0] ds2_rat_wr_data;
+
+ assign ds2_rat_wren_in = ds1_dest_wr;
+ assign ds2_rat_wr_addr_in = {ds1_dest_idx,chkpt_head_ptr};
+ MDFFR #(1) ds2_rat_wren_ff (clk, rst, 1'b0, ds2_rat_wren_in, ds2_rat_wren);
+ MDFFR #(`REG_IDX_SZ+`CHKPT_PTR_SZ) ds2_rat_wr_addr_ff (clk, rst, 1'b0, ds2_rat_wr_addr_in, ds2_rat_wr_addr);
+ assign ds2_rat_wr_data = ds2_dest_tag;
+
+ wire [`TAG_SZ-1:0] ds2_rat_src1_rd_data, ds2_rrat_src1_rd_data;
+ wire [`TAG_SZ-1:0] ds2_rat_src2_rd_data, ds2_rrat_src2_rd_data;
+ wire [`TAG_SZ-1:0] ds2_rat_dest_rd_data, ds2_rrat_dest_rd_data;
+
+ wire [`REG_IDX_SZ-1:0] ds_rrat_wr_addr = map_table_init ? map_table_init_ctr : rob_ds_ret_idx;
+ wire [`TAG_SZ-1:0] ds_rrat_wr_data = map_table_init ? map_table_init_ctr : rob_ds_ret_tag;
+ wire ds_rrat_wren = map_table_init | rob_ds_ret_valid & rob_ds_ret_dest_write;
+
+ // Instantiate RAT SRAM blocks
+ // Note that we need 3 copies for the required 3 read ports (2 source operand tag reads, 1 previous dest tag read)
+ // Read copy 1
+ dp_sram #(.DW(`TAG_SZ), .IW(`REG_IDX_SZ+`CHKPT_PTR_SZ)) rat0 (
+ .clk(clk),
+ .a_addr(ds1_rat_src1_rd_addr), // Read port
+ .a_dout(ds2_rat_src1_rd_data),
+
+ .b_addr(ds2_rat_wr_addr), // Write port
+ .b_wren(ds2_rat_wren),
+ .b_din(ds2_rat_wr_data)
+ );
+
+ // Read copy 2
+ dp_sram #(.DW(`TAG_SZ), .IW(`REG_IDX_SZ+`CHKPT_PTR_SZ)) rat1 (
+ .clk(clk),
+ .a_addr(ds1_rat_src2_rd_addr), // Read port
+ .a_dout(ds2_rat_src2_rd_data),
+
+ .b_addr(ds2_rat_wr_addr), // Write port
+ .b_wren(ds2_rat_wren),
+ .b_din(ds2_rat_wr_data)
+ );
+
+ // Write copy 1
+ dp_sram #(.DW(`TAG_SZ), .IW(`REG_IDX_SZ+`CHKPT_PTR_SZ)) rat2 (
+ .clk(clk),
+ .a_addr(ds1_rat_dest_rd_addr), // Read port
+ .a_dout(ds2_rat_dest_rd_data),
+
+ .b_addr(ds2_rat_wr_addr), // Write port
+ .b_wren(ds2_rat_wren),
+ .b_din(ds2_rat_wr_data)
+ );
+
+ // Instantiate tables for the committed RAT copies
+ dp_sram #(.DW(`TAG_SZ), .IW(`REG_IDX_SZ)) rrat0 (
+ .clk(clk),
+ .a_addr(ds1_src1_idx), // Read port
+ .a_dout(ds2_rrat_src1_rd_data),
+
+ .b_addr(ds_rrat_wr_addr), // Write port (controlled by retire)
+ .b_wren(ds_rrat_wren),
+ .b_din(ds_rrat_wr_data)
+ );
+ dp_sram #(.DW(`TAG_SZ), .IW(`REG_IDX_SZ)) rrat1 (
+ .clk(clk),
+ .a_addr(ds1_src2_idx), // Read port
+ .a_dout(ds2_rrat_src2_rd_data),
+
+ .b_addr(ds_rrat_wr_addr), // Write port (controlled by retire)
+ .b_wren(ds_rrat_wren),
+ .b_din(ds_rrat_wr_data)
+ );
+ dp_sram #(.DW(`TAG_SZ), .IW(`REG_IDX_SZ)) rrat2 (
+ .clk(clk),
+ .a_addr(ds1_dest_idx), // Read port
+ .a_dout(ds2_rrat_dest_rd_data),
+
+ .b_addr(ds_rrat_wr_addr), // Write port (controlled by retire)
+ .b_wren(ds_rrat_wren),
+ .b_din(ds_rrat_wr_data)
+ );
+
+ // Since writes to map tables occur in DS2 stage, need to detect forwarding from previous instructions
+ wire ds1_src1_wr_fwd = (ds1_src1_idx == ds2_rat_wr_addr[`REG_IDX_SZ+`CHKPT_PTR_SZ-1:`CHKPT_PTR_SZ]) & ds2_rat_wren;
+ wire ds1_src2_wr_fwd = (ds1_src2_idx == ds2_rat_wr_addr[`REG_IDX_SZ+`CHKPT_PTR_SZ-1:`CHKPT_PTR_SZ]) & ds2_rat_wren;
+ wire ds1_dest_wr_fwd = (ds1_dest_idx == ds2_rat_wr_addr[`REG_IDX_SZ+`CHKPT_PTR_SZ-1:`CHKPT_PTR_SZ]) & ds2_rat_wren;
+ wire ds2_src1_wr_fwd, ds2_src2_wr_fwd, ds2_dest_wr_fwd;
+ MDFFR #(1) ds2_src1_wr_fwd_ff (clk, rst, 1'b0, ds1_src1_wr_fwd, ds2_src1_wr_fwd);
+ MDFFR #(1) ds2_src2_wr_fwd_ff (clk, rst, 1'b0, ds1_src2_wr_fwd, ds2_src2_wr_fwd);
+ MDFFR #(1) ds2_dest_wr_fwd_ff (clk, rst, 1'b0, ds1_dest_wr_fwd, ds2_dest_wr_fwd);
+
+ wire [`TAG_SZ-1:0] r_ds2_rat_wr_data;
+ wire r_ds2_wr_data_ld = ds2_rat_wren & (ds1_src1_wr_fwd | ds1_src2_wr_fwd | ds1_dest_wr_fwd);
+ MDFFL #(`TAG_SZ) r_ds2_rat_wr_data_ff (clk, r_ds2_wr_data_ld, ds2_rat_wr_data, r_ds2_rat_wr_data);
+
+ // Generate DS2 stage outputs
+ // Mux between RRAT and RAT outputs
+ MDFFL #(`CHKPT_PTR_SZ) ds2_chkpt_ptr_ff (clk, ds1_valid, chkpt_head_ptr, ds2_chkpt_ptr);
+ MDFFL #(`FL_PTR_SZ) ds2_fl_head_ptr_ff (clk, ds1_valid, ds1_fl_head_ptr, ds2_fl_head_ptr);
+
+ wire ds2_src1_use_rrat, ds2_src2_use_rrat, ds2_dest_use_rrat;
+ MDFFLR #(1) ds2_src1_use_rrat_ff (clk, rst, ds1_valid, 1'b0, ds1_src1_use_rrat, ds2_src1_use_rrat);
+ MDFFLR #(1) ds2_src2_use_rrat_ff (clk, rst, ds1_valid, 1'b0, ds1_src2_use_rrat, ds2_src2_use_rrat);
+ MDFFLR #(1) ds2_dest_use_rrat_ff (clk, rst, ds1_valid, 1'b0, ds1_dest_use_rrat, ds2_dest_use_rrat);
+ MDFFL #(`TAG_SZ) ds2_dest_tag_ff (clk, ds1_valid, ds1_dest_tag, ds2_dest_tag);
+
+ assign ds2_src1_tag = ds2_src1_wr_fwd ? r_ds2_rat_wr_data : ds2_src1_use_rrat ? ds2_rrat_src1_rd_data : ds2_rat_src1_rd_data;
+ assign ds2_src2_tag = ds2_src2_wr_fwd ? r_ds2_rat_wr_data : ds2_src2_use_rrat ? ds2_rrat_src2_rd_data : ds2_rat_src2_rd_data;
+ assign ds2_dest_tag_old = ds2_dest_wr_fwd ? r_ds2_rat_wr_data : ds2_dest_use_rrat ? ds2_rrat_dest_rd_data : ds2_rat_dest_rd_data;
+
+
+
+
+endmodule
Index: trunk/rtl/dp_sram.v
===================================================================
--- trunk/rtl/dp_sram.v (nonexistent)
+++ trunk/rtl/dp_sram.v (revision 2)
@@ -0,0 +1,47 @@
+/*
+ Josh Smith
+
+ File: dp_sram.v
+ Description: Module for SRAM slice. This is written
+ as a generic dual-port SRAM, so should be inferred as SRAM by
+ tool.
+*/
+
+`include "ooops_defs.v"
+
+module dp_sram
+ #(parameter DW = `DATA_SZ,
+ parameter IW = `TAG_SZ,
+ parameter ENTRIES = (1 << IW)
+ )
+ (
+ input wire clk,
+
+ // Port A
+ input wire [IW-1:0] a_addr,
+ output wire [DW-1:0] a_dout,
+
+ // Port B
+ input wire [IW-1:0] b_addr,
+ input wire b_wren,
+ input wire [DW-1:0] b_din
+ );
+
+ reg [DW-1:0] rf_data [ENTRIES-1:0];
+ reg [IW-1:0] a_addr_q;
+ reg [IW-1:0] b_addr_q;
+
+ // Port A
+ always @(posedge clk) begin
+ a_addr_q <= `SD a_addr;
+ end
+ assign a_dout = rf_data[a_addr_q];
+
+ // Port B
+ always @(posedge clk) begin
+ if (b_wren) begin
+ rf_data[b_addr] <= `SD b_din;
+ end
+ end
+endmodule
+
trunk/rtl/dp_sram.v
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: trunk/rtl/ooops_lib.v
===================================================================
--- trunk/rtl/ooops_lib.v (nonexistent)
+++ trunk/rtl/ooops_lib.v (revision 2)
@@ -0,0 +1,98 @@
+
+//////////////////////////////////////////////////////////////////
+// //
+// OoOPs common module library //
+// //
+// This file is part of the OoOPs project //
+// http://www.opencores.org/project,oops //
+// //
+// Description: //
+// Basic library of common blocks such as different types of //
+// flops, etc... //
+// //
+// Author(s): //
+// - Joshua Smith, smjoshua@umich.edu //
+// //
+//////////////////////////////////////////////////////////////////
+// //
+// Copyright (C) 2012 Authors and OPENCORES.ORG //
+// //
+// This source file may be used and distributed without //
+// restriction provided that this copyright statement is not //
+// removed from the file and that any derivative work contains //
+// the original copyright notice and the associated disclaimer. //
+// //
+// This source file is free software; you can redistribute it //
+// and/or modify it under the terms of the GNU Lesser General //
+// Public License as published by the Free Software Foundation; //
+// either version 2.1 of the License, or (at your option) any //
+// later version. //
+// //
+// This source is distributed in the hope that it will be //
+// useful, but WITHOUT ANY WARRANTY; without even the implied //
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
+// PURPOSE. See the GNU Lesser General Public License for more //
+// details. //
+// //
+// You should have received a copy of the GNU Lesser General //
+// Public License along with this source; if not, download it //
+// from http://www.opencores.org/lgpl.shtml //
+// //
+//////////////////////////////////////////////////////////////////
+`include "ooops_defs.v"
+
+// Regular DFF
+module MDFF #(parameter DW = 1) (
+ input wire clk,
+ input wire [DW-1:0] din,
+ output reg [DW-1:0] dout
+ );
+
+ always @(posedge clk)
+ dout <= `SD din;
+
+endmodule
+
+// Loadable DFF
+module MDFFL #(parameter DW = 1) (
+ input wire clk,
+ input wire ld,
+ input wire [DW-1:0] din,
+ output reg [DW-1:0] dout
+ );
+
+ always @(posedge clk)
+ if (ld) dout <= `SD din;
+
+endmodule
+
+// Resetable DFF
+module MDFFR #(parameter DW = 1) (
+ input wire clk,
+ input wire rst,
+ input wire [DW-1:0] rst_din,
+ input wire [DW-1:0] din,
+ output reg [DW-1:0] dout
+ );
+
+ always @(posedge clk)
+ if (rst) dout <= `SD rst_din;
+ else dout <= `SD din;
+
+endmodule
+
+// Loadable, resetable DFF
+module MDFFLR #(parameter DW = 1) (
+ input wire clk,
+ input wire rst,
+ input wire ld,
+ input wire [DW-1:0] rst_din,
+ input wire [DW-1:0] din,
+ output reg [DW-1:0] dout
+ );
+
+ always @(posedge clk)
+ if (rst) dout <= `SD rst_din;
+ else if (ld) dout <= `SD din;
+
+endmodule
trunk/rtl/ooops_lib.v
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: trunk/rtl/if_buffer.v
===================================================================
--- trunk/rtl/if_buffer.v (nonexistent)
+++ trunk/rtl/if_buffer.v (revision 2)
@@ -0,0 +1,119 @@
+//////////////////////////////////////////////////////////////////
+// //
+// OoOPs Core Instruction Fetch Buffer module //
+// //
+// This file is part of the OoOPs project //
+// http://www.opencores.org/project,oops //
+// //
+// Description: //
+// Buffer for fetched instructions to help reduce penalty of //
+// cache misses during stall cycles. //
+// //
+// Author(s): //
+// - Joshua Smith, smjoshua@umich.edu //
+// //
+//////////////////////////////////////////////////////////////////
+// //
+// Copyright (C) 2012 Authors and OPENCORES.ORG //
+// //
+// This source file may be used and distributed without //
+// restriction provided that this copyright statement is not //
+// removed from the file and that any derivative work contains //
+// the original copyright notice and the associated disclaimer. //
+// //
+// This source file is free software; you can redistribute it //
+// and/or modify it under the terms of the GNU Lesser General //
+// Public License as published by the Free Software Foundation; //
+// either version 2.1 of the License, or (at your option) any //
+// later version. //
+// //
+// This source is distributed in the hope that it will be //
+// useful, but WITHOUT ANY WARRANTY; without even the implied //
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
+// PURPOSE. See the GNU Lesser General Public License for more //
+// details. //
+// //
+// You should have received a copy of the GNU Lesser General //
+// Public License along with this source; if not, download it //
+// from http://www.opencores.org/lgpl.shtml //
+// //
+//////////////////////////////////////////////////////////////////
+`include "ooops_defs.v"
+
+module if_buffer (
+ input wire clk,
+ input wire rst,
+ input wire flush,
+
+ // Write interface
+ input wire if_valid,
+ input wire [`INSTR_SZ-1:0] if_instr,
+ input wire [`ADDR_SZ-1:0] if_fpc,
+ input wire [`BP_SZ-1:0] if_bprd_info,
+
+ // Read interface
+ input wire if_ifb_pop_en,
+ output wire ifb_full,
+ output wire if_id_valid,
+ output wire [`INSTR_SZ-1:0] if_id_instr,
+ output wire [`ADDR_SZ-1:0] if_id_fpc,
+ output wire [`BP_SZ-1:0] if_id_bprd_info
+ );
+
+ // Local wires
+ wire [`IFB_PTR_SZ:0] ifb_rd_ptr, ifb_rd_ptr_in;
+ wire [`IFB_PTR_SZ:0] ifb_wr_ptr, ifb_wr_ptr_in;
+ wire [`IFB_ENTRIES-1:0] ifb_rd_ptr_vec; // 1-hot vector for reading
+ wire ifb_empty;
+ wire ifb_push;
+ wire ifb_pop;
+ wire [`IFB_PTR_SZ:0] ifb_valid_counter, ifb_valid_counter_in;
+ wire ifb_valid_counter_ld;
+
+ wire [`IFB_ENTRY_SZ-1:0] ifb_entry [`IFB_ENTRIES-1:0];
+ wire [`IFB_ENTRY_SZ-1:0] ifb_entry_in;
+ wire [`IFB_ENTRIES-1:0] ifb_entry_ld;
+ reg [`IFB_ENTRY_SZ-1:0] ifb_rd_entry;
+
+ // Handle muxing outputs
+ integer i;
+ always @* begin
+ ifb_rd_entry = {`IFB_ENTRY_SZ{1'b0}};
+ for (i=0; i<`IFB_ENTRIES; i=i+1) begin
+ ifb_rd_entry = ifb_rd_entry | ({`IFB_ENTRY_SZ{ifb_rd_ptr_vec[i]}} & ifb_entry[i]);
+ end
+ end
+ assign if_id_valid = !ifb_empty;
+ assign {if_id_instr,if_id_fpc,if_id_bprd_info} = ifb_rd_entry;
+
+ // Handle updating the read and write pointers
+ assign ifb_push = if_valid & !ifb_full;
+ assign ifb_pop = if_ifb_pop_en & !ifb_empty;
+ assign ifb_wr_ptr_in = ((ifb_wr_ptr==`IFB_ENTRIES) | flush) ? {`IFB_PTR_SZ+1{1'b0}} : ifb_wr_ptr + 1;
+ assign ifb_rd_ptr_in = ((ifb_rd_ptr==`IFB_ENTRIES) | flush) ? {`IFB_PTR_SZ+1{1'b0}} : ifb_rd_ptr + 1;
+
+ wire [`IFB_ENTRIES-1:0] ifb_rd_ptr_vec_in = (`IFB_ENTRIES'h1 << ifb_rd_ptr_in);
+
+ wire ifb_wr_ptr_ld = ifb_push | flush;
+ wire ifb_rd_ptr_ld = ifb_pop | flush;
+ MDFFLR #(`IFB_PTR_SZ+1) ifb_wr_ptr_ff (clk, rst, ifb_wr_ptr_ld, {`IFB_PTR_SZ+1{1'b0}}, ifb_wr_ptr_in, ifb_wr_ptr);
+ MDFFLR #(`IFB_PTR_SZ+1) ifb_rd_ptr_ff (clk, rst, ifb_rd_ptr_ld, {`IFB_PTR_SZ+1{1'b0}}, ifb_rd_ptr_in, ifb_rd_ptr);
+ MDFFLR #(`IFB_ENTRIES) ifb_rd_ptr_vec_ff (clk, rst, ifb_rd_ptr_ld, `IFB_ENTRIES'h1, ifb_rd_ptr_vec_in, ifb_rd_ptr_vec);
+
+ // Handle occupancy detection
+ wire ifb_full_in = (ifb_wr_ptr_in[`IFB_PTR_SZ] ^ ifb_rd_ptr_in[`IFB_PTR_SZ]) & (ifb_wr_ptr_in[`IFB_PTR_SZ-1:0]==ifb_rd_ptr_in[`IFB_PTR_SZ-1:0]);
+ wire ifb_empty_in = (ifb_wr_ptr_in[`IFB_PTR_SZ] ~^ ifb_rd_ptr_in[`IFB_PTR_SZ]) & (ifb_wr_ptr_in[`IFB_PTR_SZ-1:0]==ifb_rd_ptr_in[`IFB_PTR_SZ-1:0]);
+ MDFFR #(1) ifb_full_ff (clk, rst, 1'b0, ifb_full_in, ifb_full);
+ MDFFR #(1) ifb_empty_ff (clk, rst, 1'b1, ifb_empty_in, ifb_empty);
+
+ // Instantiate flops for entries
+ assign ifb_entry_in = {if_valid,if_instr, if_fpc, if_bprd_info};
+ genvar g;
+ generate
+ for (g=0; g<`IFB_ENTRIES; g=g+1)
+ begin : ifb_entry_gen
+ MDFFL #(`IFB_ENTRY_SZ) entry_ff (clk, ifb_entry_ld[g], ifb_entry_in, ifb_entry[g]);
+ end
+ endgenerate
+
+endmodule
trunk/rtl/if_buffer.v
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: trunk/rtl/ds_stage.v
===================================================================
--- trunk/rtl/ds_stage.v (nonexistent)
+++ trunk/rtl/ds_stage.v (revision 2)
@@ -0,0 +1,223 @@
+//////////////////////////////////////////////////////////////////
+// //
+// OoOPs Core Instruction Dispatch module //
+// //
+// This file is part of the OoOPs project //
+// http://www.opencores.org/project,oops //
+// //
+// Description: //
+// Instruction dispatch block handles instruction register //
+// renaming, and dependency checking, and dispatching //
+// instructions to the ROB and appropriate Reservation Station.//
+// Due to the structure of the map table, Dispatch is pipelined//
+// into 2 stages: DS1 and DS2. //
+// //
+// DS1 stage will be for determining which checkpoint has the //
+// latest valid mapping for a register, and for allocating the //
+// destination physical register. //
+// //
+// DS2 stage will be for reading the map tables and dispatching//
+// to the Reservation Stations. //
+// //
+// Author(s): //
+// - Joshua Smith, smjoshua@umich.edu //
+// //
+//////////////////////////////////////////////////////////////////
+// //
+// Copyright (C) 2012 Authors and OPENCORES.ORG //
+// //
+// This source file may be used and distributed without //
+// restriction provided that this copyright statement is not //
+// removed from the file and that any derivative work contains //
+// the original copyright notice and the associated disclaimer. //
+// //
+// This source file is free software; you can redistribute it //
+// and/or modify it under the terms of the GNU Lesser General //
+// Public License as published by the Free Software Foundation; //
+// either version 2.1 of the License, or (at your option) any //
+// later version. //
+// //
+// This source is distributed in the hope that it will be //
+// useful, but WITHOUT ANY WARRANTY; without even the implied //
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
+// PURPOSE. See the GNU Lesser General Public License for more //
+// details. //
+// //
+// You should have received a copy of the GNU Lesser General //
+// Public License along with this source; if not, download it //
+// from http://www.opencores.org/lgpl.shtml //
+// //
+//////////////////////////////////////////////////////////////////
+`include "ooops_defs.v"
+
+module ds_stage (
+ input wire clk,
+ input wire rst,
+
+ // Flush/stall interface
+ input wire rob_pipe_flush,
+ output wire ds_stall,
+
+ // Interface to ID stage
+ input wire id_ds1_valid,
+ input wire [`ADDR_SZ-1:0] id_ds1_fpc,
+ input wire id_ds1_in_dly_slot,
+ input wire [`DEC_BUS_SZ-1:0] id_ds1_dec_bus,
+ input wire [`BP_SZ-1:0] id_ds1_bprd_info,
+
+ // Interface to CDB (for tag monitoring)
+ input wire ex_cdb_valid,
+ input wire [`TAG_SZ-1:0] ex_cdb_tag,
+ input wire [`REG_IDX_SZ-1:0] ex_cdb_dest_idx,
+
+ // Interface to ROB
+ input wire rob_ds_full,
+ input wire [`ROB_PTR_SZ-1:0] rob_ds_tail_ptr,
+ input wire [`CHKPT_PTR_SZ-1:0] rob_ds_chkpt_ptr,
+ input wire [`FL_PTR_SZ-1:0] rob_ds_fl_head_ptr,
+ input wire rob_ds_ret_valid,
+ input wire rob_ds_ret_dest_write,
+ input wire rob_ds_ret_chkpt_free,
+ input wire [`REG_IDX_SZ-1:0] rob_ds_ret_idx,
+ input wire [`TAG_SZ-1:0] rob_ds_ret_tag,
+ input wire [`TAG_SZ-1:0] rob_ds_ret_tag_old,
+ output wire ds2_rob_valid,
+ output wire [`ADDR_SZ-1:0] ds2_rob_fpc,
+ output wire ds2_rob_in_dly_slot,
+ output wire [`DEC_BUS_SZ-1:0] ds2_rob_dec_bus,
+ output wire [`REN_BUS_SZ-1:0] ds2_rob_ren_info,
+ output wire [`BP_SZ-1:0] ds2_rob_bprd_info,
+ output wire [`CHKPT_PTR_SZ-1:0] ds2_rob_chkpt_ptr,
+ output wire [`FL_PTR_SZ-1:0] ds2_rob_fl_head_ptr,
+
+ // Interface to ALU RS
+ input wire rs_ds_alu_full,
+ output wire ds2_rs_alu_valid,
+ output wire [`ADDR_SZ-1:0] ds2_rs_alu_fpc,
+ output wire [`REN_BUS_SZ-1:0] ds2_rs_alu_ren_info,
+ output wire [`ALU_CTL_SZ-1:0] ds2_rs_alu_ctl,
+ output wire [`ROB_PTR_SZ-1:0] ds2_rs_alu_rob_ptr
+
+ // Interface to MULT/DIV RS
+ //input wire rs_ds_mult_div_full,
+ //output wire ds2_rs_mult_div_valid,
+ //output wire [`REN_BUS_SZ-1:0] ds2_rs_mult_div_ren_info,
+ //output wire [`MULTDIV_CTL_SZ-1:0] ds2_rs_mult_div_ctl,
+ //output wire [`ROB_PTR_SZ-1:0] ds2_rs_mult_div_rob_ptr,
+
+ //// Interface to LDST RS
+ //input wire rs_ds_ldst_full,
+ //output wire ds2_rs_ldst_valid,
+ //output wire [`REN_BUS_SZ-1:0] ds2_rs_ldst_ren_info,
+ //output wire [`LDST_CTL_SZ-1:0] ds2_rs_ldst_ctl,
+ //output wire [`ROB_PTR_SZ-1:0] ds2_rs_ldst_rob_ptr
+ );
+
+ // Internal wires/regs
+ // DS1 stage signals
+ wire [`REG_IDX_SZ-1:0] ds1_src1_idx, ds1_src2_idx, ds1_dest_idx;
+ wire ds1_dest_wr;
+ wire ds1_type_br;
+
+ // DS2 stage signals
+ wire ds2_valid;
+ wire [`ADDR_SZ-1:0] ds2_fpc;
+ wire ds2_in_dly_slot;
+ wire [`DEC_BUS_SZ-1:0] ds2_dec_bus;
+ wire [`BP_SZ-1:0] ds2_bprd_info;
+ wire [`TAG_SZ-1:0] ds2_src1_tag, ds2_src2_tag;
+ wire ds2_src1_valid, ds2_src2_valid;
+ wire [`TAG_SZ-1:0] ds2_dest_tag;
+ wire [`TAG_SZ-1:0] ds2_dest_tag_old;
+ wire [`REN_BUS_SZ-1:0] ds2_ren_info;
+ wire [`FL_PTR_SZ-1:0] ds2_fl_head_ptr;
+
+ // Handle stalling pipe for full ROB/RS
+ // Since the stall has to propagate back to IF/ID stages, we may need this
+ // to be an early signal (from a flop ideally). In this case we may need ID stage
+ // signals to determine the stall.
+ assign ds_stall = map_table_init | ds2_valid & (
+ rob_ds_full |
+ (rs_ds_alu_full & ds2_dec_bus[`DEC_TYPE_ALU]) |
+ (rs_ds_mult_div_full & ds2_dec_bus[`DEC_TYPE_MULTDIV]) |
+ (rs_ds_ldst_full & ds2_dec_bus[`DEC_TYPE_LDST]));
+
+
+ // Instantiate Map table for register renaming
+ // Note: for MULT/DIV we will use both rename ports for the single instruction
+ // because they write to both HI and LO.
+ assign ds1_src1_idx = id_ds1_dec_bus[`DEC_REG_S_IDX];
+ assign ds1_src2_idx = id_ds1_dec_bus[`DEC_REG_T_IDX];
+ assign ds1_dest_idx = id_ds1_dec_bus[`DEC_REG_D_IDX];
+ assign ds1_dest_wr = id_ds1_valid & id_ds1_dec_bus[`DEC_REG_D_WR] & !ds_stall;
+ assign ds1_type_br = id_ds1_valid & id_ds1_dec_bus[`DEC_TYPE_BR] & !ds_stall;
+
+ map_table mt0 (
+ .clk(clk),
+ .rst(rst),
+ .map_table_init(map_table_init),
+
+ .ds1_valid(id_ds1_valid),
+ .ds1_src1_idx(ds1_src1_idx),
+ .ds1_src2_idx(ds1_src2_idx),
+ .ds1_dest_idx(ds1_dest_idx),
+ .ds1_dest_wr(ds1_dest_wr),
+ .ds1_type_br(ds1_type_br),
+ .ds2_src1_tag(ds2_src1_tag),
+ .ds2_src2_tag(ds2_src2_tag),
+ .ds2_src1_valid(ds_src1_valid),
+ .ds2_src2_valid(ds_src2_valid),
+ .ds2_dest_tag(ds2_dest_tag),
+ .ds2_dest_tag_old(ds2_dest_tag_old),
+ .ds2_fl_head_ptr(ds2_fl_head_ptr),
+ .ds2_chkpt_ptr(ds2_rob_chkpt_ptr),
+
+ //.ex_cdb_bus(ex_cdb_bus),
+ .rob_pipe_flush(rob_pipe_flush),
+ .rob_ds_ret_valid(rob_ds_ret_valid),
+ .rob_ds_ret_dest_write(rob_ds_ret_dest_write),
+ .rob_ds_chkpt_ptr(rob_ds_chkpt_ptr),
+ .rob_ds_fl_head_ptr(rob_ds_fl_head_ptr),
+ .rob_ds_ret_chkpt_free(rob_ds_ret_chkpt_free),
+ .rob_ds_ret_idx(rob_ds_ret_idx),
+ .rob_ds_ret_tag(rob_ds_ret_tag),
+ .rob_ds_ret_tag_old(rob_ds_ret_tag_old)
+ );
+
+ // Flop info into DS2 stage
+ MDFFLR #(1) ds2_valid_ff (clk, rst, !ds_stall, 1'b0, id_ds1_valid, ds2_valid);
+ MDFFL #(`ADDR_SZ) ds2_fpc_ff (clk, id_ds1_valid, id_ds1_fpc, ds2_fpc);
+ MDFFLR #(1) ds2_in_dly_slot_ff (clk, rst, id_ds1_valid, 1'b0, id_ds1_in_dly_slot, ds2_in_dly_slot);
+ MDFFL #(`DEC_BUS_SZ) ds2_dec_bus_ff (clk, id_ds1_valid, id_ds1_dec_bus, ds2_dec_bus);
+ MDFFL #(`BP_SZ) ds2_bprd_info_ff (clk, id_ds1_valid, id_ds1_bprd_info, ds2_bprd_info);
+
+
+ // Construct dispatch packets to the different Reservation Stations
+ assign ds2_ren_info = { ds2_dec_bus[`DEC_REG_D_IDX], // DEST_IDX
+ ds2_dec_bus[`DEC_REG_D_WR], // DEST_VLD
+ ds2_dest_tag_old, // DEST_TAG_OLD
+ ds2_dest_tag, // DEST_TAG
+ ds2_src2_valid, // SRC2_VLD
+ ds2_dec_bus[`DEC_REG_T_NEED], // SRC2_NEED
+ ds2_src2_tag, // SRC2_TAG
+ ds2_src1_valid, // SRC1_VLD
+ ds2_dec_bus[`DEC_REG_S_NEED], // SRC1_NEED
+ ds2_src1_tag // SRC1_TAG
+ };
+
+ // Handle outputs to ROB
+ assign ds_rob_valid = ds2_valid;
+ assign ds_rob_fpc = ds2_fpc;
+ assign ds_rob_in_dly_slot = ds2_in_dly_slot;
+ assign ds_rob_dec_bus = ds2_dec_bus;
+ assign ds_rob_ren_info = ds2_ren_info;
+ assign ds_rob_bprd_info = ds2_bprd_info;
+ assign ds_rob_fl_head_ptr = ds2_fl_head_ptr;
+
+ // Handle outputs to ALU RS
+ assign ds2_rs_alu_valid = ds2_valid & ds2_dec_bus[`DEC_TYPE_ALU] & !ds_stall;
+ assign ds2_rs_alu_fpc = ds2_fpc;
+ assign ds2_rs_alu_ren_info = ds2_ren_info;
+ assign ds2_rs_alu_rob_ptr = rob_ds_tail_ptr;
+
+endmodule
trunk/rtl/ds_stage.v
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: trunk/rtl/sources.list
===================================================================
--- trunk/rtl/sources.list (nonexistent)
+++ trunk/rtl/sources.list (revision 2)
@@ -0,0 +1,19 @@
+rtl/ooops_defs.v
+rtl/ooops_lib.v
+rtl/core.v
+rtl/sp_sram.v
+rtl/if_stage.v
+rtl/if_buffer.v
+rtl/icache_top.v
+rtl/icache_ctl.v
+rtl/id_stage.v
+rtl/ds_stage.v
+rtl/map_table.v
+rtl/free_list.v
+rtl/rs_top.v
+rtl/rs.v
+rtl/rs_entry.v
+rtl/regfile.v
+rtl/psel.v
+rtl/dp_sram.v
+rtl/wb_master.v
Index: trunk/tb/test_map_table.v
===================================================================
--- trunk/tb/test_map_table.v (nonexistent)
+++ trunk/tb/test_map_table.v (revision 2)
@@ -0,0 +1,289 @@
+//////////////////////////////////////////////////////////////////
+// //
+// OoOPs Core Register Map Table testbench //
+// //
+// This file is part of the OoOPs project //
+// http://www.opencores.org/project,oops //
+// //
+// Description: //
+// Small, self-contained testbench for basic functionality of //
+// the Map Table. //
+// //
+// Author(s): //
+// - Joshua Smith, smjoshua@umich.edu //
+// //
+//////////////////////////////////////////////////////////////////
+// //
+// Copyright (C) 2012 Authors and OPENCORES.ORG //
+// //
+// This source file may be used and distributed without //
+// restriction provided that this copyright statement is not //
+// removed from the file and that any derivative work contains //
+// the original copyright notice and the associated disclaimer. //
+// //
+// This source file is free software; you can redistribute it //
+// and/or modify it under the terms of the GNU Lesser General //
+// Public License as published by the Free Software Foundation; //
+// either version 2.1 of the License, or (at your option) any //
+// later version. //
+// //
+// This source is distributed in the hope that it will be //
+// useful, but WITHOUT ANY WARRANTY; without even the implied //
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
+// PURPOSE. See the GNU Lesser General Public License for more //
+// details. //
+// //
+// You should have received a copy of the GNU Lesser General //
+// Public License along with this source; if not, download it //
+// from http://www.opencores.org/lgpl.shtml //
+// //
+//////////////////////////////////////////////////////////////////
+`include "ooops_defs.v"
+
+module test_map_table;
+
+ // I/O to Map Table DUT
+ reg clk;
+ reg rst;
+ reg ds1_valid;
+ reg [`REG_IDX_SZ-1:0] ds1_src1_idx;
+ reg [`REG_IDX_SZ-1:0] ds1_src2_idx;
+ reg [`REG_IDX_SZ-1:0] ds1_dest_idx;
+ reg ds1_dest_wr;
+ reg ds1_type_br;
+ reg rob_pipe_flush;
+ reg rob_ds_ret_valid;
+ reg rob_ds_ret_dest_write;
+ reg [`CHKPT_PTR_SZ-1:0] rob_ds_chkpt_ptr;
+ reg [`FL_PTR_SZ-1:0] rob_ds_fl_head_ptr;
+ reg rob_ds_ret_chkpt_free;
+ reg [`REG_IDX_SZ-1:0] rob_ds_ret_idx;
+ reg [`TAG_SZ-1:0] rob_ds_ret_tag;
+ reg [`TAG_SZ-1:0] rob_ds_ret_tag_old;
+
+ wire map_table_init;
+ wire [`TAG_SZ-1:0] ds2_src1_tag;
+ wire [`TAG_SZ-1:0] ds2_src2_tag;
+ wire ds2_src1_valid;
+ wire ds2_src2_valid;
+ wire [`TAG_SZ-1:0] ds2_dest_tag;
+ wire [`TAG_SZ-1:0] ds2_dest_tag_old;
+ wire [`FL_PTR_SZ-1:0] ds2_fl_head_ptr;
+ wire [`CHKPT_PTR_SZ-1:0] ds2_chkpt_ptr;
+
+
+ // Instantiate DUT
+ map_table m0 (
+ .clk(clk),
+ .rst(rst),
+ .map_table_init(map_table_init),
+
+ .ds1_valid(ds1_valid),
+ .ds1_src1_idx(ds1_src1_idx),
+ .ds1_src2_idx(ds1_src2_idx),
+ .ds1_dest_idx(ds1_dest_idx),
+ .ds1_dest_wr(ds1_dest_wr),
+ .ds1_type_br(ds1_type_br),
+ .ds2_src1_tag(ds2_src1_tag),
+ .ds2_src2_tag(ds2_src2_tag),
+ .ds2_src1_valid(ds2_src1_valid),
+ .ds2_src2_valid(ds2_src2_valid),
+ .ds2_dest_tag(ds2_dest_tag),
+ .ds2_dest_tag_old(ds2_dest_tag_old),
+ .ds2_fl_head_ptr(ds2_fl_head_ptr),
+ .ds2_chkpt_ptr(ds2_chkpt_ptr),
+
+ .rob_pipe_flush(rob_pipe_flush),
+ .rob_ds_ret_valid(rob_ds_ret_valid),
+ .rob_ds_ret_dest_write(rob_ds_ret_dest_write),
+ .rob_ds_chkpt_ptr(rob_ds_chkpt_ptr),
+ .rob_ds_fl_head_ptr(rob_ds_fl_head_ptr),
+ .rob_ds_ret_chkpt_free(rob_ds_ret_chkpt_free),
+ .rob_ds_ret_idx(rob_ds_ret_idx),
+ .rob_ds_ret_tag(rob_ds_ret_tag),
+ .rob_ds_ret_tag_old(rob_ds_ret_tag_old)
+ );
+
+
+ // generate clk
+ always begin
+ #5;
+ clk = ~clk;
+ end
+
+ initial begin
+ // Initialize clk and inputs
+ clk = 1'b0;
+ rst = 1'b1;
+
+ ds1_valid = 0;
+ ds1_src1_idx = 0;
+ ds1_src2_idx = 0;
+ ds1_dest_idx = 0;
+ ds1_dest_wr = 1'b0;
+ ds1_type_br = 1'b0;
+ rob_pipe_flush = 1'b0;
+ rob_ds_ret_valid = 1'b0;
+ rob_ds_ret_dest_write = 1'b0;
+ rob_ds_chkpt_ptr = 0;
+ rob_ds_fl_head_ptr = 0;
+ rob_ds_ret_chkpt_free = 0;
+ rob_ds_ret_idx = 0;
+ rob_ds_ret_tag = 0;
+ rob_ds_ret_tag_old = 0;
+
+ // Set up waveform dump
+ `ifdef WAVE_DUMP
+ $dumpfile("wave.vcd");
+ $dumpvars(0,test_map_table);
+ `endif
+
+ // Assert reset for a couple clks
+ $display("Asserting reset...");
+ repeat (3) @(negedge clk);
+ rst = 1'b0;
+ $display("Reset done.");
+
+ // Wait for initialization to be done
+ while (map_table_init)
+ @(negedge clk);
+
+ // Rename one instruction
+ set_rename_inputs(1, 2, 3, 1'b1, 1'b0); // Read r1, r2; write r3; not branch
+ @(negedge clk);
+ clear_rename_inputs;
+
+ // Check output src and dest tags
+ if ((ds2_src1_tag != 'd1) || (ds2_src2_tag != 'd2) || (ds2_dest_tag != 'd34))
+ fail('d1);
+
+ // Rename a second dependent instruction
+ set_rename_inputs(1, 3, 4, 1'b1, 1'b0); // Read r1, r3; write r4; not branch
+ @(negedge clk);
+ clear_rename_inputs;
+
+ // Check output src and dest tags
+ if ((ds2_src1_tag != 'd1) || (ds2_src2_tag != 'd34) || (ds2_dest_tag != 'd35))
+ fail('d2);
+
+
+ // Rename a branch which does not write a register to allocate new checkpoint
+ set_rename_inputs(3, 4, 4, 1'b0, 1'b1); // Read r3, r4; no write; is branch
+ @(negedge clk);
+ clear_rename_inputs;
+
+ // Check output src tags and checkpoint ptr
+ if ((ds2_src1_tag != 'd34) || (ds2_src2_tag != 'd35) || (ds2_dest_tag != 'd36) ||
+ (ds2_chkpt_ptr != 'd0))
+ fail('d3);
+
+
+ // Rename two more instructions to overwrite r3 and r4, then recover from checkpoint
+ set_rename_inputs(1, 2, 3, 1'b1, 1'b0); // Read r1, r2; write r3; not branch
+ @(negedge clk);
+ // Check tag and chkpt_ptr outputs
+ if ((ds2_src1_tag != 'd1) || (ds2_src2_tag != 'd2) || (ds2_dest_tag != 'd36) || (ds2_dest_tag_old != 'd34) || (ds2_chkpt_ptr != 'd1))
+ fail('d4);
+
+ set_rename_inputs(1, 3, 4, 1'b1, 1'b0); // Read r1, r3; write r4; not branch
+ @(negedge clk);
+ clear_rename_inputs;
+ // Check tag and chkpt_ptr outputs
+ if ((ds2_src1_tag != 'd1) || (ds2_src2_tag != 'd36) || (ds2_dest_tag_old != 'd35) || (ds2_chkpt_ptr != 'd1))
+ fail('d5);
+
+ // Retire in-flight instructions, then recover checkpoint from branch misprediction
+ set_retire_inputs(1'b0, 1'b1, 1'b0, 0, 'd35, 'd3, 'd34, 'd3);
+ @(negedge clk);
+ set_retire_inputs(1'b0, 1'b1, 1'b0, 0, 'd36, 'd4, 'd35, 'd4);
+ @(negedge clk);
+ set_retire_inputs(1'b1, 1'b0, 1'b0, 0, 'd37, 'd4, 'd36, 'd35); // Branch flush, don't free checkpoint
+ @(negedge clk);
+ clear_retire_inputs;
+
+ // Now rename instruction that reads r3 and r4
+ set_rename_inputs(3, 4, 4, 1'b1, 1'b0); // Read r3, r4; write r4; not branch
+ @(negedge clk);
+ clear_rename_inputs;
+ // Check output tags and chkpt_ptr
+ if ((ds2_src1_tag != 'd34) || (ds2_src2_tag != 'd35) || (ds2_dest_tag != 'd37) || (ds2_dest_tag_old != 'd35) || (ds2_chkpt_ptr != 'd0))
+ fail('d6);
+
+
+
+ // Let clock run for a few cycles before finishing
+ repeat (5) @(negedge clk);
+ $display("Finished!");
+ $finish;
+ end
+
+
+ // Task to easily set all rename inputs
+ task set_rename_inputs;
+ input [`REG_IDX_SZ-1:0] src1_idx, src2_idx, dest_idx;
+ input dest_wr;
+ input type_br;
+
+ begin
+ ds1_valid = 1'b1;
+ ds1_src1_idx = src1_idx;
+ ds1_src2_idx = src2_idx;
+ ds1_dest_idx = dest_idx;
+ ds1_dest_wr = dest_wr;
+ ds1_type_br = type_br;
+ end
+ endtask
+
+ task clear_rename_inputs;
+ begin
+ ds1_valid = 1'b0;
+ ds1_src1_idx = 0;
+ ds1_src2_idx = 0;
+ ds1_dest_wr = 1'b0;
+ ds1_type_br = 1'b0;
+ end
+ endtask
+
+ task set_retire_inputs;
+ input pipe_flush, dest_write, chkpt_free;
+ input [`CHKPT_PTR_SZ-1:0] chkpt_ptr;
+ input [`FL_PTR_SZ-1:0] fl_head_ptr;
+ input [`REG_IDX_SZ-1:0] dest_idx;
+ input [`TAG_SZ-1:0] dest_tag, dest_tag_old;
+
+ begin
+ rob_ds_ret_valid = 1'b1;
+ rob_pipe_flush = pipe_flush;
+ rob_ds_ret_dest_write = dest_write;
+ rob_ds_ret_chkpt_free = chkpt_free;
+ rob_ds_chkpt_ptr = chkpt_ptr;
+ rob_ds_fl_head_ptr = fl_head_ptr;
+ rob_ds_ret_idx = dest_idx;
+ rob_ds_ret_tag = dest_tag;
+ rob_ds_ret_tag_old = dest_tag_old;
+ end
+ endtask
+
+ task clear_retire_inputs;
+ begin
+ rob_ds_ret_valid = 1'b0;
+ rob_pipe_flush = 1'b0;
+ rob_ds_ret_dest_write = 1'b0;
+ rob_ds_ret_chkpt_free = 1'b0;
+ rob_ds_chkpt_ptr = 0;
+ rob_ds_fl_head_ptr = 0;
+ rob_ds_ret_idx = 0;
+ rob_ds_ret_tag = 0;
+ rob_ds_ret_tag_old = 0;
+ end
+ endtask
+
+ task fail;
+ input integer test_num;
+ begin
+ $display("ERROR: Failed on test %0d at time %0d", test_num, $time);
+ repeat(3) @(negedge clk);
+ $finish;
+ end
+ endtask
+endmodule
Index: trunk/Makefile
===================================================================
--- trunk/Makefile (nonexistent)
+++ trunk/Makefile (revision 2)
@@ -0,0 +1,28 @@
+# Some useful constants
+SRC_LIST = ./rtl/sources.list
+TB = tb/test_map_table.v
+
+# Simulator used for testing is Icarus Verilog
+# To dump waves, add -DWAVE_DUMP to ICARUS_OPTS
+INCLUDE_CMD = -I ./rtl
+ICARUS_OPTS = -DWAVE_DUMP
+ICARUS_CMD = iverilog
+
+MAP_TABLE_SRC = rtl/map_table.v \
+ rtl/free_list.v \
+ rtl/dp_sram.v \
+ rtl/ooops_defs.v \
+ rtl/ooops_lib.v
+MAP_TABLE_TB = tb/test_map_table.v
+
+all: sim
+
+# Main command to compile simulation model
+sim:
+ $(ICARUS_CMD) $(ICARUS_OPTS) $(INCLUDE_CMD) -f $(SRC_LIST) $(TB) -o sim.exe
+
+map_table: $(MAP_TABLE_SRC) $(MAP_TABLE_TB)
+ $(ICARUS_CMD) $(ICARUS_OPTS) $(INCLUDE_CMD) $(MAP_TABLE_SRC) $(MAP_TABLE_TB) -o sim.exe
+
+clean:
+ rm ./*.exe