OpenCores
URL https://opencores.org/ocsvn/thor/thor/trunk

Subversion Repositories thor

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /thor/trunk
    from Rev 57 to Rev 58
    Reverse comparison

Rev 57 → Rev 58

/FT64v5/rtl/common/FT64_cache.v
38,11 → 38,12
module FT64_L1_icache_mem(rst, clk, wr, en, lineno, i, o, ov, invall, invline);
parameter pLines = 64;
parameter pLineWidth = 288;
localparam pLNMSB = pLines==128 ? 6 : 5;
input rst;
input clk;
input wr;
input [8:0] en;
input [5:0] lineno;
input [pLNMSB:0] lineno;
input [pLineWidth-1:0] i;
output [pLineWidth-1:0] o;
output [8:0] ov;
191,29 → 192,32
// -----------------------------------------------------------------------------
 
module FT64_L1_icache_cmptag4way(rst, clk, nxt, wr, adr, lineno, hit);
parameter pLines = 64;
localparam pLNMSB = pLines==128 ? 6 : 5;
localparam pMSB = pLines==128 ? 9 : 8;
input rst;
input clk;
input nxt;
input wr;
input [37:0] adr;
output reg [5:0] lineno;
output reg [pLNMSB:0] lineno;
output hit;
 
(* ram_style="distributed" *)
reg [32:0] mem0 [0:15];
reg [32:0] mem1 [0:15];
reg [32:0] mem2 [0:15];
reg [32:0] mem3 [0:15];
reg [32:0] mem0 [0:pLines/4-1];
reg [32:0] mem1 [0:pLines/4-1];
reg [32:0] mem2 [0:pLines/4-1];
reg [32:0] mem3 [0:pLines/4-1];
reg [37:0] rradr;
integer n;
initial begin
for (n = 0; n < 16; n = n + 1)
begin
mem0[n] = 0;
mem1[n] = 0;
mem2[n] = 0;
mem3[n] = 0;
end
for (n = 0; n < pLines/4; n = n + 1)
begin
mem0[n] = 0;
mem1[n] = 0;
mem2[n] = 0;
mem3[n] = 0;
end
end
 
wire [21:0] lfsro;
225,24 → 229,24
else begin
if (wr) begin
case(lfsro[1:0])
2'b00: begin mem0[adr[8:5]] <= adr[37:5]; wlineno <= {2'b00,adr[8:5]}; end
2'b01: begin mem1[adr[8:5]] <= adr[37:5]; wlineno <= {2'b01,adr[8:5]}; end
2'b10: begin mem2[adr[8:5]] <= adr[37:5]; wlineno <= {2'b10,adr[8:5]}; end
2'b11: begin mem3[adr[8:5]] <= adr[37:5]; wlineno <= {2'b11,adr[8:5]}; end
2'b00: begin mem0[adr[pMSB:5]] <= adr[37:5]; wlineno <= {2'b00,adr[pMSB:5]}; end
2'b01: begin mem1[adr[pMSB:5]] <= adr[37:5]; wlineno <= {2'b01,adr[pMSB:5]}; end
2'b10: begin mem2[adr[pMSB:5]] <= adr[37:5]; wlineno <= {2'b10,adr[pMSB:5]}; end
2'b11: begin mem3[adr[pMSB:5]] <= adr[37:5]; wlineno <= {2'b11,adr[pMSB:5]}; end
endcase
end
end
 
wire hit0 = mem0[adr[8:5]]==adr[37:5];
wire hit1 = mem1[adr[8:5]]==adr[37:5];
wire hit2 = mem2[adr[8:5]]==adr[37:5];
wire hit3 = mem3[adr[8:5]]==adr[37:5];
wire hit0 = mem0[adr[pMSB:5]]==adr[37:5];
wire hit1 = mem1[adr[pMSB:5]]==adr[37:5];
wire hit2 = mem2[adr[pMSB:5]]==adr[37:5];
wire hit3 = mem3[adr[pMSB:5]]==adr[37:5];
always @*
//if (wr2) lineno = wlineno;
if (hit0) lineno = {2'b00,adr[8:5]};
else if (hit1) lineno = {2'b01,adr[8:5]};
else if (hit2) lineno = {2'b10,adr[8:5]};
else lineno = {2'b11,adr[8:5]};
if (hit0) lineno = {2'b00,adr[pMSB:5]};
else if (hit1) lineno = {2'b01,adr[pMSB:5]};
else if (hit2) lineno = {2'b10,adr[pMSB:5]};
else lineno = {2'b11,adr[pMSB:5]};
assign hit = hit0|hit1|hit2|hit3;
endmodule
 
303,8 → 307,11
// -----------------------------------------------------------------------------
 
module FT64_L1_icache(rst, clk, nxt, wr, wr_ack, en, wadr, adr, i, o, hit, invall, invline);
parameter pSize = 2;
parameter CAMTAGS = 1'b0; // 32 way
parameter FOURWAY = 1'b1;
localparam pLines = pSize==4 ? 128 : 64;
localparam pLNMSB = pSize==4 ? 6 : 5;
input rst;
input clk;
input nxt;
322,8 → 329,8
wire [287:0] ic;
reg [287:0] i1, i2;
wire [8:0] lv; // line valid
wire [5:0] lineno;
wire [5:0] wlineno;
wire [pLNMSB:0] lineno;
wire [pLNMSB:0] wlineno;
wire taghit;
reg wr1,wr2;
reg [8:0] en1, en2;
351,7 → 358,7
generate begin : tags
if (FOURWAY) begin
 
FT64_L1_icache_mem u1
FT64_L1_icache_mem #(.pLines(pLines)) u1
(
.rst(rst),
.clk(clk),
365,7 → 372,7
.invline(invline1)
);
 
FT64_L1_icache_cmptag4way u3
FT64_L1_icache_cmptag4way #(.pLines(pLines)) u3
(
.rst(rst),
.clk(clk),
/FT64v5/rtl/common/FT64_config.vh
27,21 → 27,42
`define SUPPORT_VECTOR 1'b1
//`define SUPPORT_DCI 1'b1 // dynamically compressed instructions
//`define DEBUG_LOGIC 1'b1
`define L1_ICACHE_SIZE 4 // 2 or 4 for 2 or 4 kB
 
// One way to tweak the size of the core a little bit is to limit the number
// of address bits processed. The test system for instance has only 512MB of
// memory, so the address size is limited to 32 bits.
`define AMSB 31
`define ABITS `AMSB:0
`define QBITS 3:0
 
 
`define QBITS 3:0 // bitfield representing a queue entry index
`define QENTRIES 10 // changing this still requires changing code in FT64.
`define XBITS 7:0
 
//`define SUPPORT_DBG 1'b1
 
// Issue logic is not really required for every possible distance from
// the head of the queue. Later queue entries tend to depend on prior
// ones and hence may not be ready to be issued. Also note that
// instruction decode takes a cycle making the last entry or two in the
// queue not ready to be issued. Commenting out this line will limit
// much of the issue logic to the first six queue slots relative to the
// head of the queue.
`define FULL_ISSUE_LOGIC 1'b1
 
// The WAYS config define affects things like the number of ports on the
// register file, the number of ports on the instruction cache, and how
// many entries are contained in the fetch buffers. It also indirectly
// affects how many instructions are queued.
`define WAYS 2 // number of ways parallel (1-3 3 not working yet)
`define NUM_IDU 2 // number of instruction decode units (1-3)
`define NUM_ALU 2 // number of ALU's (1-2)
`define NUM_MEM 2 // number of memory queues (1-3)
`define NUM_FPU 2 // number of floating-point units (0-2)
// Note that even with just a single commit bus, multiple instructions may
// commit if they do not target any registers. Up to three instruction may
// commit even with just a single bus.
`define NUM_CMT 2 // number of commit busses (1-2)
// Comment out the following to remove FCU enhancements (branch predictor, BTB, RSB)
`define FCU_ENH 1
/FT64v5/rtl/common/FT64_iexpander.v
58,7 → 58,7
default:
begin
expand[47:32] = 16'h0000;
expand[31:18] = {{6{cinstr[11]}},cinstr[11:8],cinstr[5]};
expand[31:18] = {{9{cinstr[11]}},cinstr[11:8],cinstr[5]};
expand[17:13] = cinstr[4:0];
expand[12:8] = cinstr[4:0];
expand[7:6] = 2'b10;
/FT64v5/rtl/twoway/FT64.v
844,7 → 844,8
reg [`XBITS] dramC_exc;
 
wire outstanding_stores;
reg [63:0] I; // instruction count
reg [63:0] I; // instruction count
reg [63:0] CC; // commit count
 
reg commit0_v;
reg [4:0] commit0_id;
856,15 → 857,20
reg [RBIT:0] commit1_tgt;
reg [7:0] commit1_we = 8'h00;
reg [63:0] commit1_bus;
reg commit2_v;
reg [4:0] commit2_id;
reg [RBIT:0] commit2_tgt;
reg [7:0] commit2_we = 8'h00;
reg [63:0] commit2_bus;
 
reg [4:0] bstate;
parameter BIDLE = 5'd0;
parameter B1 = 5'd1;
parameter B2 = 5'd2;
parameter B3 = 5'd3;
parameter B4 = 5'd4;
parameter B5 = 5'd5;
parameter B6 = 5'd6;
parameter B_DCacheStoreAck = 5'd1;
parameter B_DCacheLoadStart = 5'd2;
parameter B_DCacheLoadStb = 5'd3;
parameter B_DCacheLoadWait1 = 5'd4;
parameter B_DCacheLoadWait2 = 5'd5;
parameter B_DCacheLoadResetBusy = 5'd6;
parameter B7 = 5'd7;
parameter B8 = 5'd8;
parameter B9 = 5'd9;
881,9 → 887,10
parameter B2a = 5'd20;
parameter B2b = 5'd21;
parameter B2c = 5'd22;
parameter B2d = 5'd23;
parameter B_DCacheLoadAck = 5'd23;
parameter B20 = 5'd24;
parameter B21 = 5'd25;
parameter B_DCacheLoadWait3 = 5'd26;
reg [1:0] bwhich;
reg [3:0] icstate,picstate;
parameter IDLE = 4'd0;
1040,7 → 1047,7
end
endgenerate
 
FT64_L1_icache uic0
FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic0
(
.rst(rst),
.clk(clk),
1057,7 → 1064,7
);
generate begin : gICacheInst
if (`WAYS > 1) begin
FT64_L1_icache uic1
FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic1
(
.rst(rst),
.clk(clk),
1077,7 → 1084,7
assign ihit1 = 1'b1;
end
if (`WAYS > 2) begin
FT64_L1_icache uic2
FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic2
(
.rst(rst),
.clk(clk),
1293,6 → 1300,7
.en(bpe),
.xisBranch0(iqentry_br[head0] & commit0_v),
.xisBranch1(iqentry_br[head1] & commit1_v),
.xisBranch2(iqentry_br[head2] & commit2_v),
.pcA(fetchbufA_pc),
.pcB(fetchbufB_pc),
.pcC(fetchbufC_pc),
1301,8 → 1309,10
.pcF(fetchbufF_pc),
.xpc0(iqentry_pc[head0]),
.xpc1(iqentry_pc[head1]),
.xpc2(iqentry_pc[head2]),
.takb0(commit0_v & iqentry_takb[head0]),
.takb1(commit1_v & iqentry_takb[head1]),
.takb2(commit2_v & iqentry_takb[head2]),
.predict_takenA(predict_takenA),
.predict_takenB(predict_takenB),
.predict_takenC(predict_takenC),
1329,6 → 1339,7
.en(bpe),
.xisBranch0(iqentry_br[head0] & commit0_v),
.xisBranch1(iqentry_br[head1] & commit1_v),
.xisBranch2(iqentry_br[head2] & commit2_v),
.pcA(fetchbufA_pc),
.pcB(fetchbufB_pc),
.pcC(fetchbufC_pc),
1337,8 → 1348,10
.pcF(32'd0),
.xpc0(iqentry_pc[head0]),
.xpc1(iqentry_pc[head1]),
.xpc2(iqentry_pc[head2]),
.takb0(commit0_v & iqentry_takb[head0]),
.takb1(commit1_v & iqentry_takb[head1]),
.takb2(commit2_v & iqentry_takb[head2]),
.predict_takenA(predict_takenA),
.predict_takenB(predict_takenB),
.predict_takenC(predict_takenC),
1362,7 → 1375,8
.clk(fcu_clk),
.en(bpe),
.xisBranch0(iqentry_br[head0] & commit0_v),
.xisBranch1(1'b0),
.xisBranch1(iqentry_br[head1] & commit1_v),
.xisBranch2(iqentry_br[head2] & commit2_v),
.pcA(fetchbufA_pc),
.pcB(fetchbufB_pc),
.pcC(32'd0),
1370,9 → 1384,11
.pcE(32'd0),
.pcF(32'd0),
.xpc0(iqentry_pc[head0]),
.xpc1(32'd0),
.xpc1(iqentry_pc[head1]),
.xpc2(iqentry_pc[head2]),
.takb0(commit0_v & iqentry_takb[head0]),
.takb1(1'b0),
.takb1(commit1_v & iqentry_takb[head1]),
.takb2(commit2_v & iqentry_takb[head2]),
.predict_takenA(predict_takenA),
.predict_takenB(predict_takenB),
.predict_takenC(),
1713,11 → 1729,11
(
.rst(rst),
.wclk(clk),
.wr((bstate==B2d && ack_i)||((bstate==B1||(bstate==B19 && isStore)) && whit0)),
.wr((bstate==B_DCacheLoadAck && ack_i)||((bstate==B_DCacheStoreAck||(bstate==B19 && isStore)) && whit0)),
.sel(sel_o),
.wadr({pcr[5:0],adr_o}),
.whit(whit0),
.i(bstate==B2d ? dat_i : dat_o),
.i(bstate==B_DCacheLoadAck ? dat_i : dat_o),
.rclk(clk),
.rdsize(dram0_memsize),
.radr({pcr[5:0],dram0_addr}),
1732,11 → 1748,11
(
.rst(rst),
.wclk(clk),
.wr((bstate==B2d && ack_i)||((bstate==B1||(bstate==B19 && isStore)) && whit1)),
.wr((bstate==B_DCacheLoadAck && ack_i)||((bstate==B_DCacheStoreAck||(bstate==B19 && isStore)) && whit1)),
.sel(sel_o),
.wadr({pcr[5:0],adr_o}),
.whit(whit1),
.i(bstate==B2d ? dat_i : dat_o),
.i(bstate==B_DCacheLoadAck ? dat_i : dat_o),
.rclk(clk),
.rdsize(dram1_memsize),
.radr({pcr[5:0],dram1_addr}),
1751,11 → 1767,11
(
.rst(rst),
.wclk(clk),
.wr((bstate==B2d && ack_i)||((bstate==B1||(bstate==B19 && isStore)) && whit2)),
.wr((bstate==B_DCacheLoadAck && ack_i)||((bstate==B_DCacheStoreAck||(bstate==B19 && isStore)) && whit2)),
.sel(sel_o),
.wadr({pcr[5:0],adr_o}),
.whit(whit2),
.i(bstate==B2d ? dat_i : dat_o),
.i(bstate==B_DCacheLoadAck ? dat_i : dat_o),
.rclk(clk),
.rdsize(dram2_memsize),
.radr({pcr[5:0],dram2_addr}),
5573,15 → 5589,36
commit1_tgt <= iqentry_tgt[head1];
commit1_we <= iqentry_we[head1];
commit1_bus <= iqentry_res[head1];
// Need to set commit1, and commit2 valid bits for the branch predictor.
if (`NUM_CMT > 2) begin
end
else begin
commit2_v <= ({iqentry_v[head0], iqentry_cmt[head0]} != 2'b10
&& {iqentry_v[head1], iqentry_cmt[head1]} != 2'b10
&& {iqentry_v[head2], iqentry_br[head2], iqentry_cmt[head2]}==3'b111
&& iqentry_tgt[head2][4:0]==5'd0 && ~|panic); // watch out for dbnz and ibne
commit2_tgt <= 12'h000;
commit2_we <= 8'h00;
end
end
else begin
commit1_v <= ({iqentry_v[head0], iqentry_cmt[head0]} != 2'b10
&& {iqentry_v[head1], iqentry_br[head1], iqentry_cmt[head1]}==3'b111
&& iqentry_tgt[head1][4:0]==5'd0 && ~|panic); // watch out for dbnz and ibne
commit1_tgt <= 12'h000;
commit1_we <= 8'h00;
commit2_v <= ({iqentry_v[head0], iqentry_cmt[head0]} != 2'b10
&& {iqentry_v[head1], iqentry_cmt[head1]} != 2'b10
&& {iqentry_v[head2], iqentry_br[head2], iqentry_cmt[head2]}==3'b111
&& iqentry_tgt[head2][4:0]==5'd0 && ~|panic); // watch out for dbnz and ibne
commit2_tgt <= 12'h000;
commit2_we <= 8'h00;
end
end
assign int_commit = (commit0_v && iqentry_irq[head0]) ||
(commit0_v && commit1_v && iqentry_irq[head1] && `NUM_CMT > 1);
assign int_commit = (commit0_v && iqentry_irq[head0])
|| (commit0_v && commit1_v && iqentry_irq[head1] && `NUM_CMT > 1)
|| (commit0_v && commit1_v && commit2_v && iqentry_irq[head2] && `NUM_CMT > 2);
 
// Detect if a given register will become valid during the current cycle.
// We want a signal that is active during the current clock cycle for the read
5611,7 → 5648,10
|| (branchmiss && branchmiss_thrd == iqentry_thrd[commit0_id[`QBITS]] && iqentry_source[ commit0_id[`QBITS] ]));
if (commit1_v && n=={commit1_tgt[7:0]} && `NUM_CMT > 1)
regIsValid[n] = regIsValid[n] | (rf_source[ {commit1_tgt[7:0]} ] == commit1_id
|| (branchmiss && branchmiss_thrd == iqentry_thrd[commit0_id[`QBITS]] && iqentry_source[ commit1_id[`QBITS] ]));
|| (branchmiss && branchmiss_thrd == iqentry_thrd[commit1_id[`QBITS]] && iqentry_source[ commit1_id[`QBITS] ]));
if (commit2_v && n=={commit2_tgt[7:0]} && `NUM_CMT > 2)
regIsValid[n] = regIsValid[n] | (rf_source[ {commit2_tgt[7:0]} ] == commit2_id
|| (branchmiss && branchmiss_thrd == iqentry_thrd[commit2_id[`QBITS]] && iqentry_source[ commit2_id[`QBITS] ]));
end
regIsValid[0] = `VAL;
regIsValid[32] = `VAL;
5963,6 → 6003,7
dramB_v <= 0;
dramC_v <= 0;
I <= 0;
CC <= 0;
icstate <= IDLE;
bstate <= BIDLE;
tick <= 64'd0;
6143,6 → 6184,22
if (commit1_tgt[5:0]==6'd30 && commit1_bus==64'd0)
$display("FP <= 0");
end
if (commit2_v && `NUM_CMT > 2) begin
if (!rf_v[ {commit2_tgt[7:0]} ]) begin
if ({commit2_tgt[7:0]}=={commit1_tgt[7:0]} && {commit2_tgt[7:0]}=={commit0_tgt[7:0]})
rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}];
else if ({commit2_tgt[7:0]}=={commit0_tgt[7:0]})
rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}];
else if ({commit2_tgt[7:0]}=={commit1_tgt[7:0]})
rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}];
else
rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit2_tgt[7:0]}];//rf_source[ commit1_tgt[4:0] ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ]);
end
if (commit2_tgt[5:0] != 6'd0) $display("r%d <- %h v[%d]<-%d", commit2_tgt, commit2_bus, regIsValid[commit2_tgt[5:0]],
rf_source[ {commit2_tgt[7:0]} ] == commit2_id || (branchmiss && iqentry_source[ commit2_id[`QBITS] ]));
if (commit2_tgt[5:0]==6'd30 && commit2_bus==64'd0)
$display("FP <= 0");
end
rf_v[0] <= 1;
 
//
6655,6 → 6712,8
setargs(n,commit0_id,commit0_v,commit0_bus);
if (`NUM_CMT > 1)
setargs(n,commit1_id,commit1_v,commit1_bus);
if (`NUM_CMT > 2)
setargs(n,commit2_id,commit2_v,commit2_bus);
 
setinsn(n[`QBITS],id1_ido,id1_available&id1_vo,id1_bus);
if (`NUM_IDU > 1)
7171,6 → 7230,8
oddball_commit(commit0_v, head0);
if (`NUM_CMT > 1)
oddball_commit(commit1_v, head1);
if (`NUM_CMT > 2)
oddball_commit(commit2_v, head2);
//if (`NUM_CMT > 2)
// oddball_commit(commit2_v, head2);
 
7496,7 → 7557,7
ol_o <= wb_ol[0];
wbo_id <= wb_id[0];
isStore <= TRUE;
bstate <= wb_rmw[0] ? B12 : B1;
bstate <= wb_rmw[0] ? B12 : B_DCacheStoreAck;
end
begin
for (j = 1; j < `WB_DEPTH; j = j + 1) begin
7620,7 → 7681,7
dat_o <= fnDato(dram0_instr,dram0_data);
ol_o <= dram0_ol;
isStore <= TRUE;
bstate <= B1;
bstate <= B_DCacheStoreAck;
`else
if (wbptr<`WB_DEPTH-1) begin
dram0 <= `DRAMREQ_READY;
7663,7 → 7724,7
dat_o <= fnDato(dram1_instr,dram1_data);
ol_o <= dram1_ol;
isStore <= TRUE;
bstate <= B1;
bstate <= B_DCacheStoreAck;
`else
if (wbptr<`WB_DEPTH-1) begin
dram1 <= `DRAMREQ_READY;
7706,7 → 7767,7
dat_o <= fnDato(dram2_instr,dram2_data);
ol_o <= dram2_ol;
isStore <= TRUE;
bstate <= B1;
bstate <= B_DCacheStoreAck;
`else
if (wbptr<`WB_DEPTH-1) begin
dram2 <= `DRAMREQ_READY;
7742,7 → 7803,7
dram0 <= `DRAMSLOT_HASBUS;
bwhich <= 2'b00;
preload <= dram0_preload;
bstate <= B2;
bstate <= B_DCacheLoadStart;
end
end
else if (~|wb_v && mem2_available && !dram1_unc && dram1==`DRAMSLOT_REQBUS && dram1_load && `NUM_MEM > 1) begin
7760,7 → 7821,7
dram1 <= `DRAMSLOT_HASBUS;
bwhich <= 2'b01;
preload <= dram1_preload;
bstate <= B2;
bstate <= B_DCacheLoadStart;
end
end
else if (~|wb_v && mem3_available && !dram2_unc && dram2==`DRAMSLOT_REQBUS && dram2_load && `NUM_MEM > 2) begin
7778,7 → 7839,7
dram2 <= `DRAMSLOT_HASBUS;
preload <= dram2_preload;
bwhich <= 2'b10;
bstate <= B2;
bstate <= B_DCacheLoadStart;
end
end
else if (~|wb_v && mem1_available && dram0_unc && dram0==`DRAMSLOT_BUSY && dram0_load) begin
7868,7 → 7929,7
// Terminal state for a store operation.
// Note that if only a single memory channel is selected, bwhich will be a
// constant 0. This should cause the extra code to be removed.
B1:
B_DCacheStoreAck:
if (acki|err_i) begin
isStore <= `TRUE;
cyc_o <= `LOW;
7898,7 → 7959,7
end
`else
case(bwhich)
2'd0: if (mem1_available) begin
2'd0: begin
dram0 <= `DRAMREQ_READY;
iqentry_exc[dram0_id[`QBITS]] <= wrv_i|err_i ? `FLT_DWF : `FLT_NONE;
if (err_i|wrv_i) iqentry_a1[dram0_id[`QBITS]] <= adr_o;
7927,7 → 7988,7
`endif
bstate <= B19;
end
B2:
B_DCacheLoadStart:
begin
dccnt <= 2'd0;
case(bwhich)
7939,7 → 8000,7
sel_o <= fnSelect(dram0_instr,dram0_addr);
adr_o <= {dram0_addr[31:5],5'b0};
ol_o <= dram0_ol;
bstate <= B2d;
bstate <= B_DCacheLoadAck;
end
2'd1: if (`NUM_MEM > 1) begin
cti_o <= 3'b001;
7949,7 → 8010,7
sel_o <= fnSelect(dram1_instr,dram1_addr);
adr_o <= {dram1_addr[31:5],5'b0};
ol_o <= dram1_ol;
bstate <= B2d;
bstate <= B_DCacheLoadAck;
end
2'd2: if (`NUM_MEM > 2) begin
cti_o <= 3'b001;
7959,13 → 8020,13
sel_o <= fnSelect(dram2_instr,dram2_addr);
adr_o <= {dram2_addr[31:5],5'b0};
ol_o <= dram2_ol;
bstate <= B2d;
bstate <= B_DCacheLoadAck;
end
default: if (~acki) bstate <= BIDLE;
endcase
end
// Data cache load terminal state
B2d:
B_DCacheLoadAck:
if (ack_i|err_i) begin
errq <= errq | err_i;
rdvq <= rdvq | rdv_i;
7987,7 → 8048,7
endcase
dccnt <= dccnt + 2'd1;
adr_o[4:3] <= adr_o[4:3] + 2'd1;
bstate <= B2d;
bstate <= B_DCacheLoadAck;
if (dccnt==2'd2)
cti_o <= 3'b111;
if (dccnt==2'd3) begin
7996,22 → 8057,25
cyc_o <= `LOW;
stb_o <= `LOW;
sel_o <= 8'h00;
bstate <= B4;
bstate <= B_DCacheLoadWait1;
end
end
B3: begin
stb_o <= `HIGH;
bstate <= B2d;
end
B4: bstate <= B5;
B5: bstate <= B6;
B6: begin
case(bwhich)
2'd0: dram0 <= `DRAMSLOT_BUSY; // causes retest of dhit
2'd1: dram1 <= `DRAMSLOT_BUSY;
2'd2: dram2 <= `DRAMSLOT_BUSY;
default: ;
endcase
B_DCacheLoadStb:
begin
stb_o <= `HIGH;
bstate <= B_DCacheLoadAck;
end
B_DCacheLoadWait1: bstate <= B_DCacheLoadWait2;
B_DCacheLoadWait2: bstate <= B_DCacheLoadResetBusy;
//B_DCacheLoadWait3: bstate <= B_DCacheLoadResetBusy;
B_DCacheLoadResetBusy: begin
// There could be more than one memory cycle active. We reset the state
// of all the machines to retest for a hit because otherwise sequential
// loading of memory will cause successive machines to miss resulting in
// multiple dcache loads that aren't needed.
if (dram0 != `DRAMSLOT_AVAIL && dram0_addr[31:5]==adr_o[31:5]) dram0 <= `DRAMSLOT_BUSY; // causes retest of dhit
if (dram1 != `DRAMSLOT_AVAIL && dram1_addr[31:5]==adr_o[31:5]) dram1 <= `DRAMSLOT_BUSY;
if (dram2 != `DRAMSLOT_AVAIL && dram2_addr[31:5]==adr_o[31:5]) dram2 <= `DRAMSLOT_BUSY;
if (~ack_i) bstate <= BIDLE;
end
 
8155,7 → 8219,7
stb_o <= `HIGH;
we_o <= `HIGH;
dat_o <= fnDato(rmw_instr,rmw_res);
bstate <= B1;
bstate <= B_DCacheStoreAck;
end
B21:
if (~ack_i) begin
8381,7 → 8445,7
$display("Commit");
$display("0: %c %h %o %d #", commit0_v?"v":" ", commit0_bus, commit0_id, commit0_tgt[4:0]);
$display("1: %c %h %o %d #", commit1_v?"v":" ", commit1_bus, commit1_id, commit1_tgt[4:0]);
$display("instructions committed: %d ticks: %d ", I, tick);
$display("instructions committed: %d valid committed: %d ticks: %d ", CC, I, tick);
$display("Write merges: %d", wb_merges);
`endif // SIM
 
8653,8 → 8717,9
head7 <= (head7 + amt) % QENTRIES;
head8 <= (head8 + amt) % QENTRIES;
head9 <= (head9 + amt) % QENTRIES;
I <= I + amt;
CC <= CC + amt;
if (amt==3'd3) begin
I = I + iqentry_v[head0] + iqentry_v[head1] + iqentry_v[head2];
iqentry_agen[head0] <= `INV;
iqentry_agen[head1] <= `INV;
iqentry_agen[head2] <= `INV;
8669,6 → 8734,7
iqentry_alu[head2] <= `FALSE;
end
else if (amt==3'd2) begin
I = I + iqentry_v[head0] + iqentry_v[head1];
iqentry_agen[head0] <= `INV;
iqentry_agen[head1] <= `INV;
iqentry_mem[head0] <= `FALSE;
8678,6 → 8744,7
iqentry_alu[head0] <= `FALSE;
iqentry_alu[head1] <= `FALSE;
end else if (amt==3'd1) begin
I = I + iqentry_v[head0];
iqentry_agen[head0] <= `INV;
iqentry_mem[head0] <= `FALSE;
iqentry_iv[head0] <= `INV;
/FT64v5/rtl/twoway/FT64_BranchPredicator.v
25,8 → 25,8
//=============================================================================
//
module FT64_BranchPredictor(rst, clk, en,
xisBranch0, xisBranch1,
pcA, pcB, pcC, pcD, pcE, pcF, xpc0, xpc1, takb0, takb1,
xisBranch0, xisBranch1, xisBranch2,
pcA, pcB, pcC, pcD, pcE, pcF, xpc0, xpc1, xpc2, takb0, takb1, takb2,
predict_takenA, predict_takenB, predict_takenC, predict_takenD,
predict_takenE, predict_takenF);
parameter DBW=32;
35,6 → 35,7
input en;
input xisBranch0;
input xisBranch1;
input xisBranch2;
input [DBW-1:0] pcA;
input [DBW-1:0] pcB;
input [DBW-1:0] pcC;
43,8 → 44,10
input [DBW-1:0] pcF;
input [DBW-1:0] xpc0;
input [DBW-1:0] xpc1;
input [DBW-1:0] xpc2;
input takb0;
input takb1;
input takb2;
output predict_takenA;
output predict_takenB;
output predict_takenC;
67,13 → 70,13
for (n = 0; n < 512; n = n + 1)
branch_history_table[n] = 3;
end
wire [8:0] bht_wa = {pc[8:2],gbl_branch_hist[2:1]}; // write address
wire [8:0] bht_raA = {pcA[8:2],gbl_branch_hist[2:1]}; // read address (IF stage)
wire [8:0] bht_raB = {pcB[8:2],gbl_branch_hist[2:1]}; // read address (IF stage)
wire [8:0] bht_raC = {pcC[8:2],gbl_branch_hist[2:1]}; // read address (IF stage)
wire [8:0] bht_raD = {pcD[8:2],gbl_branch_hist[2:1]}; // read address (IF stage)
wire [8:0] bht_raE = {pcE[8:2],gbl_branch_hist[2:1]}; // read address (IF stage)
wire [8:0] bht_raF = {pcF[8:2],gbl_branch_hist[2:1]}; // read address (IF stage)
wire [8:0] bht_wa = {pc[7:1],gbl_branch_hist[2:1]}; // write address
wire [8:0] bht_raA = {pcA[7:1],gbl_branch_hist[2:1]}; // read address (IF stage)
wire [8:0] bht_raB = {pcB[7:1],gbl_branch_hist[2:1]}; // read address (IF stage)
wire [8:0] bht_raC = {pcC[7:1],gbl_branch_hist[2:1]}; // read address (IF stage)
wire [8:0] bht_raD = {pcD[7:1],gbl_branch_hist[2:1]}; // read address (IF stage)
wire [8:0] bht_raE = {pcE[7:1],gbl_branch_hist[2:1]}; // read address (IF stage)
wire [8:0] bht_raF = {pcF[7:1],gbl_branch_hist[2:1]}; // read address (IF stage)
wire [1:0] bht_xbits = branch_history_table[bht_wa];
wire [1:0] bht_ibitsA = branch_history_table[bht_raA];
wire [1:0] bht_ibitsB = branch_history_table[bht_raB];
92,19 → 95,49
if (rst)
pcstail <= 5'd0;
else begin
if (xisBranch0 & xisBranch1) begin
case({xisBranch0,xisBranch1,xisBranch2})
3'b000: ;
3'b001:
begin
pcs[pcstail] <= {xpc2[31:1],takb2};
pcstail <= pcstail + 5'd1;
end
3'b010:
begin
pcs[pcstail] <= {xpc1[31:1],takb1};
pcstail <= pcstail + 5'd1;
end
3'b011:
begin
pcs[pcstail] <= {xpc1[31:1],takb1};
pcs[pcstail+1] <= {xpc2[31:1],takb2};
pcstail <= pcstail + 5'd2;
end
3'b100:
begin
pcs[pcstail] <= {xpc0[31:1],takb0};
pcstail <= pcstail + 5'd1;
end
3'b101:
begin
pcs[pcstail] <= {xpc0[31:1],takb0};
pcs[pcstail+1] <= {xpc2[31:1],takb2};
pcstail <= pcstail + 5'd2;
end
3'b110:
begin
pcs[pcstail] <= {xpc0[31:1],takb0};
pcs[pcstail+1] <= {xpc1[31:1],takb1};
pcstail <= pcstail + 5'd2;
end
else if (xisBranch0) begin
end
3'b111:
begin
pcs[pcstail] <= {xpc0[31:1],takb0};
pcstail <= pcstail + 5'd1;
end
else if (xisBranch1) begin
pcs[pcstail] <= {xpc1[31:1],takb1};
pcstail <= pcstail + 5'd1;
end
pcs[pcstail+1] <= {xpc1[31:1],takb1};
pcs[pcstail+2] <= {xpc2[31:1],takb2};
pcstail <= pcstail + 5'd3;
end
endcase
end
 
always @(posedge clk)
147,11 → 180,11
if (rst)
gbl_branch_hist <= 3'b000;
else begin
if (en) begin
if (wrhist) begin
gbl_branch_hist <= {gbl_branch_hist[1:0],takb};
branch_history_table[bht_wa] <= xbits_new;
end
if (en) begin
if (wrhist) begin
gbl_branch_hist <= {gbl_branch_hist[1:0],takb};
branch_history_table[bht_wa] <= xbits_new;
end
end
end
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.