URL
https://opencores.org/ocsvn/thor/thor/trunk
Subversion Repositories thor
Compare Revisions
- This comparison shows the changes necessary to convert path
/thor/trunk
- from Rev 57 to Rev 58
- ↔ Reverse comparison
Rev 57 → Rev 58
/FT64v5/rtl/common/FT64_cache.v
38,11 → 38,12
module FT64_L1_icache_mem(rst, clk, wr, en, lineno, i, o, ov, invall, invline); |
parameter pLines = 64; |
parameter pLineWidth = 288; |
localparam pLNMSB = pLines==128 ? 6 : 5; |
input rst; |
input clk; |
input wr; |
input [8:0] en; |
input [5:0] lineno; |
input [pLNMSB:0] lineno; |
input [pLineWidth-1:0] i; |
output [pLineWidth-1:0] o; |
output [8:0] ov; |
191,29 → 192,32
// ----------------------------------------------------------------------------- |
|
module FT64_L1_icache_cmptag4way(rst, clk, nxt, wr, adr, lineno, hit); |
parameter pLines = 64; |
localparam pLNMSB = pLines==128 ? 6 : 5; |
localparam pMSB = pLines==128 ? 9 : 8; |
input rst; |
input clk; |
input nxt; |
input wr; |
input [37:0] adr; |
output reg [5:0] lineno; |
output reg [pLNMSB:0] lineno; |
output hit; |
|
(* ram_style="distributed" *) |
reg [32:0] mem0 [0:15]; |
reg [32:0] mem1 [0:15]; |
reg [32:0] mem2 [0:15]; |
reg [32:0] mem3 [0:15]; |
reg [32:0] mem0 [0:pLines/4-1]; |
reg [32:0] mem1 [0:pLines/4-1]; |
reg [32:0] mem2 [0:pLines/4-1]; |
reg [32:0] mem3 [0:pLines/4-1]; |
reg [37:0] rradr; |
integer n; |
initial begin |
for (n = 0; n < 16; n = n + 1) |
begin |
mem0[n] = 0; |
mem1[n] = 0; |
mem2[n] = 0; |
mem3[n] = 0; |
end |
for (n = 0; n < pLines/4; n = n + 1) |
begin |
mem0[n] = 0; |
mem1[n] = 0; |
mem2[n] = 0; |
mem3[n] = 0; |
end |
end |
|
wire [21:0] lfsro; |
225,24 → 229,24
else begin |
if (wr) begin |
case(lfsro[1:0]) |
2'b00: begin mem0[adr[8:5]] <= adr[37:5]; wlineno <= {2'b00,adr[8:5]}; end |
2'b01: begin mem1[adr[8:5]] <= adr[37:5]; wlineno <= {2'b01,adr[8:5]}; end |
2'b10: begin mem2[adr[8:5]] <= adr[37:5]; wlineno <= {2'b10,adr[8:5]}; end |
2'b11: begin mem3[adr[8:5]] <= adr[37:5]; wlineno <= {2'b11,adr[8:5]}; end |
2'b00: begin mem0[adr[pMSB:5]] <= adr[37:5]; wlineno <= {2'b00,adr[pMSB:5]}; end |
2'b01: begin mem1[adr[pMSB:5]] <= adr[37:5]; wlineno <= {2'b01,adr[pMSB:5]}; end |
2'b10: begin mem2[adr[pMSB:5]] <= adr[37:5]; wlineno <= {2'b10,adr[pMSB:5]}; end |
2'b11: begin mem3[adr[pMSB:5]] <= adr[37:5]; wlineno <= {2'b11,adr[pMSB:5]}; end |
endcase |
end |
end |
|
wire hit0 = mem0[adr[8:5]]==adr[37:5]; |
wire hit1 = mem1[adr[8:5]]==adr[37:5]; |
wire hit2 = mem2[adr[8:5]]==adr[37:5]; |
wire hit3 = mem3[adr[8:5]]==adr[37:5]; |
wire hit0 = mem0[adr[pMSB:5]]==adr[37:5]; |
wire hit1 = mem1[adr[pMSB:5]]==adr[37:5]; |
wire hit2 = mem2[adr[pMSB:5]]==adr[37:5]; |
wire hit3 = mem3[adr[pMSB:5]]==adr[37:5]; |
always @* |
//if (wr2) lineno = wlineno; |
if (hit0) lineno = {2'b00,adr[8:5]}; |
else if (hit1) lineno = {2'b01,adr[8:5]}; |
else if (hit2) lineno = {2'b10,adr[8:5]}; |
else lineno = {2'b11,adr[8:5]}; |
if (hit0) lineno = {2'b00,adr[pMSB:5]}; |
else if (hit1) lineno = {2'b01,adr[pMSB:5]}; |
else if (hit2) lineno = {2'b10,adr[pMSB:5]}; |
else lineno = {2'b11,adr[pMSB:5]}; |
assign hit = hit0|hit1|hit2|hit3; |
endmodule |
|
303,8 → 307,11
// ----------------------------------------------------------------------------- |
|
module FT64_L1_icache(rst, clk, nxt, wr, wr_ack, en, wadr, adr, i, o, hit, invall, invline); |
parameter pSize = 2; |
parameter CAMTAGS = 1'b0; // 32 way |
parameter FOURWAY = 1'b1; |
localparam pLines = pSize==4 ? 128 : 64; |
localparam pLNMSB = pSize==4 ? 6 : 5; |
input rst; |
input clk; |
input nxt; |
322,8 → 329,8
wire [287:0] ic; |
reg [287:0] i1, i2; |
wire [8:0] lv; // line valid |
wire [5:0] lineno; |
wire [5:0] wlineno; |
wire [pLNMSB:0] lineno; |
wire [pLNMSB:0] wlineno; |
wire taghit; |
reg wr1,wr2; |
reg [8:0] en1, en2; |
351,7 → 358,7
generate begin : tags |
if (FOURWAY) begin |
|
FT64_L1_icache_mem u1 |
FT64_L1_icache_mem #(.pLines(pLines)) u1 |
( |
.rst(rst), |
.clk(clk), |
365,7 → 372,7
.invline(invline1) |
); |
|
FT64_L1_icache_cmptag4way u3 |
FT64_L1_icache_cmptag4way #(.pLines(pLines)) u3 |
( |
.rst(rst), |
.clk(clk), |
/FT64v5/rtl/common/FT64_config.vh
27,21 → 27,42
`define SUPPORT_VECTOR 1'b1 |
//`define SUPPORT_DCI 1'b1 // dynamically compressed instructions |
//`define DEBUG_LOGIC 1'b1 |
`define L1_ICACHE_SIZE 4 // 2 or 4 for 2 or 4 kB |
|
// One way to tweak the size of the core a little bit is to limit the number |
// of address bits processed. The test system for instance has only 512MB of |
// memory, so the address size is limited to 32 bits. |
`define AMSB 31 |
`define ABITS `AMSB:0 |
`define QBITS 3:0 |
|
|
`define QBITS 3:0 // bitfield representing a queue entry index |
`define QENTRIES 10 // changing this still requires changing code in FT64. |
`define XBITS 7:0 |
|
//`define SUPPORT_DBG 1'b1 |
|
// Issue logic is not really required for every possible distance from |
// the head of the queue. Later queue entries tend to depend on prior |
// ones and hence may not be ready to be issued. Also note that |
// instruction decode takes a cycle making the last entry or two in the |
// queue not ready to be issued. Commenting out this line will limit |
// much of the issue logic to the first six queue slots relative to the |
// head of the queue. |
`define FULL_ISSUE_LOGIC 1'b1 |
|
// The WAYS config define affects things like the number of ports on the |
// register file, the number of ports on the instruction cache, and how |
// many entries are contained in the fetch buffers. It also indirectly |
// affects how many instructions are queued. |
`define WAYS 2 // number of ways parallel (1-3 3 not working yet) |
`define NUM_IDU 2 // number of instruction decode units (1-3) |
`define NUM_ALU 2 // number of ALU's (1-2) |
`define NUM_MEM 2 // number of memory queues (1-3) |
`define NUM_FPU 2 // number of floating-point units (0-2) |
// Note that even with just a single commit bus, multiple instructions may |
// commit if they do not target any registers. Up to three instruction may |
// commit even with just a single bus. |
`define NUM_CMT 2 // number of commit busses (1-2) |
// Comment out the following to remove FCU enhancements (branch predictor, BTB, RSB) |
`define FCU_ENH 1 |
/FT64v5/rtl/common/FT64_iexpander.v
58,7 → 58,7
default: |
begin |
expand[47:32] = 16'h0000; |
expand[31:18] = {{6{cinstr[11]}},cinstr[11:8],cinstr[5]}; |
expand[31:18] = {{9{cinstr[11]}},cinstr[11:8],cinstr[5]}; |
expand[17:13] = cinstr[4:0]; |
expand[12:8] = cinstr[4:0]; |
expand[7:6] = 2'b10; |
/FT64v5/rtl/twoway/FT64.v
844,7 → 844,8
reg [`XBITS] dramC_exc; |
|
wire outstanding_stores; |
reg [63:0] I; // instruction count |
reg [63:0] I; // instruction count |
reg [63:0] CC; // commit count |
|
reg commit0_v; |
reg [4:0] commit0_id; |
856,15 → 857,20
reg [RBIT:0] commit1_tgt; |
reg [7:0] commit1_we = 8'h00; |
reg [63:0] commit1_bus; |
reg commit2_v; |
reg [4:0] commit2_id; |
reg [RBIT:0] commit2_tgt; |
reg [7:0] commit2_we = 8'h00; |
reg [63:0] commit2_bus; |
|
reg [4:0] bstate; |
parameter BIDLE = 5'd0; |
parameter B1 = 5'd1; |
parameter B2 = 5'd2; |
parameter B3 = 5'd3; |
parameter B4 = 5'd4; |
parameter B5 = 5'd5; |
parameter B6 = 5'd6; |
parameter B_DCacheStoreAck = 5'd1; |
parameter B_DCacheLoadStart = 5'd2; |
parameter B_DCacheLoadStb = 5'd3; |
parameter B_DCacheLoadWait1 = 5'd4; |
parameter B_DCacheLoadWait2 = 5'd5; |
parameter B_DCacheLoadResetBusy = 5'd6; |
parameter B7 = 5'd7; |
parameter B8 = 5'd8; |
parameter B9 = 5'd9; |
881,9 → 887,10
parameter B2a = 5'd20; |
parameter B2b = 5'd21; |
parameter B2c = 5'd22; |
parameter B2d = 5'd23; |
parameter B_DCacheLoadAck = 5'd23; |
parameter B20 = 5'd24; |
parameter B21 = 5'd25; |
parameter B_DCacheLoadWait3 = 5'd26; |
reg [1:0] bwhich; |
reg [3:0] icstate,picstate; |
parameter IDLE = 4'd0; |
1040,7 → 1047,7
end |
endgenerate |
|
FT64_L1_icache uic0 |
FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic0 |
( |
.rst(rst), |
.clk(clk), |
1057,7 → 1064,7
); |
generate begin : gICacheInst |
if (`WAYS > 1) begin |
FT64_L1_icache uic1 |
FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic1 |
( |
.rst(rst), |
.clk(clk), |
1077,7 → 1084,7
assign ihit1 = 1'b1; |
end |
if (`WAYS > 2) begin |
FT64_L1_icache uic2 |
FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic2 |
( |
.rst(rst), |
.clk(clk), |
1293,6 → 1300,7
.en(bpe), |
.xisBranch0(iqentry_br[head0] & commit0_v), |
.xisBranch1(iqentry_br[head1] & commit1_v), |
.xisBranch2(iqentry_br[head2] & commit2_v), |
.pcA(fetchbufA_pc), |
.pcB(fetchbufB_pc), |
.pcC(fetchbufC_pc), |
1301,8 → 1309,10
.pcF(fetchbufF_pc), |
.xpc0(iqentry_pc[head0]), |
.xpc1(iqentry_pc[head1]), |
.xpc2(iqentry_pc[head2]), |
.takb0(commit0_v & iqentry_takb[head0]), |
.takb1(commit1_v & iqentry_takb[head1]), |
.takb2(commit2_v & iqentry_takb[head2]), |
.predict_takenA(predict_takenA), |
.predict_takenB(predict_takenB), |
.predict_takenC(predict_takenC), |
1329,6 → 1339,7
.en(bpe), |
.xisBranch0(iqentry_br[head0] & commit0_v), |
.xisBranch1(iqentry_br[head1] & commit1_v), |
.xisBranch2(iqentry_br[head2] & commit2_v), |
.pcA(fetchbufA_pc), |
.pcB(fetchbufB_pc), |
.pcC(fetchbufC_pc), |
1337,8 → 1348,10
.pcF(32'd0), |
.xpc0(iqentry_pc[head0]), |
.xpc1(iqentry_pc[head1]), |
.xpc2(iqentry_pc[head2]), |
.takb0(commit0_v & iqentry_takb[head0]), |
.takb1(commit1_v & iqentry_takb[head1]), |
.takb2(commit2_v & iqentry_takb[head2]), |
.predict_takenA(predict_takenA), |
.predict_takenB(predict_takenB), |
.predict_takenC(predict_takenC), |
1362,7 → 1375,8
.clk(fcu_clk), |
.en(bpe), |
.xisBranch0(iqentry_br[head0] & commit0_v), |
.xisBranch1(1'b0), |
.xisBranch1(iqentry_br[head1] & commit1_v), |
.xisBranch2(iqentry_br[head2] & commit2_v), |
.pcA(fetchbufA_pc), |
.pcB(fetchbufB_pc), |
.pcC(32'd0), |
1370,9 → 1384,11
.pcE(32'd0), |
.pcF(32'd0), |
.xpc0(iqentry_pc[head0]), |
.xpc1(32'd0), |
.xpc1(iqentry_pc[head1]), |
.xpc2(iqentry_pc[head2]), |
.takb0(commit0_v & iqentry_takb[head0]), |
.takb1(1'b0), |
.takb1(commit1_v & iqentry_takb[head1]), |
.takb2(commit2_v & iqentry_takb[head2]), |
.predict_takenA(predict_takenA), |
.predict_takenB(predict_takenB), |
.predict_takenC(), |
1713,11 → 1729,11
( |
.rst(rst), |
.wclk(clk), |
.wr((bstate==B2d && ack_i)||((bstate==B1||(bstate==B19 && isStore)) && whit0)), |
.wr((bstate==B_DCacheLoadAck && ack_i)||((bstate==B_DCacheStoreAck||(bstate==B19 && isStore)) && whit0)), |
.sel(sel_o), |
.wadr({pcr[5:0],adr_o}), |
.whit(whit0), |
.i(bstate==B2d ? dat_i : dat_o), |
.i(bstate==B_DCacheLoadAck ? dat_i : dat_o), |
.rclk(clk), |
.rdsize(dram0_memsize), |
.radr({pcr[5:0],dram0_addr}), |
1732,11 → 1748,11
( |
.rst(rst), |
.wclk(clk), |
.wr((bstate==B2d && ack_i)||((bstate==B1||(bstate==B19 && isStore)) && whit1)), |
.wr((bstate==B_DCacheLoadAck && ack_i)||((bstate==B_DCacheStoreAck||(bstate==B19 && isStore)) && whit1)), |
.sel(sel_o), |
.wadr({pcr[5:0],adr_o}), |
.whit(whit1), |
.i(bstate==B2d ? dat_i : dat_o), |
.i(bstate==B_DCacheLoadAck ? dat_i : dat_o), |
.rclk(clk), |
.rdsize(dram1_memsize), |
.radr({pcr[5:0],dram1_addr}), |
1751,11 → 1767,11
( |
.rst(rst), |
.wclk(clk), |
.wr((bstate==B2d && ack_i)||((bstate==B1||(bstate==B19 && isStore)) && whit2)), |
.wr((bstate==B_DCacheLoadAck && ack_i)||((bstate==B_DCacheStoreAck||(bstate==B19 && isStore)) && whit2)), |
.sel(sel_o), |
.wadr({pcr[5:0],adr_o}), |
.whit(whit2), |
.i(bstate==B2d ? dat_i : dat_o), |
.i(bstate==B_DCacheLoadAck ? dat_i : dat_o), |
.rclk(clk), |
.rdsize(dram2_memsize), |
.radr({pcr[5:0],dram2_addr}), |
5573,15 → 5589,36
commit1_tgt <= iqentry_tgt[head1]; |
commit1_we <= iqentry_we[head1]; |
commit1_bus <= iqentry_res[head1]; |
// Need to set commit1, and commit2 valid bits for the branch predictor. |
if (`NUM_CMT > 2) begin |
end |
else begin |
commit2_v <= ({iqentry_v[head0], iqentry_cmt[head0]} != 2'b10 |
&& {iqentry_v[head1], iqentry_cmt[head1]} != 2'b10 |
&& {iqentry_v[head2], iqentry_br[head2], iqentry_cmt[head2]}==3'b111 |
&& iqentry_tgt[head2][4:0]==5'd0 && ~|panic); // watch out for dbnz and ibne |
commit2_tgt <= 12'h000; |
commit2_we <= 8'h00; |
end |
end |
else begin |
commit1_v <= ({iqentry_v[head0], iqentry_cmt[head0]} != 2'b10 |
&& {iqentry_v[head1], iqentry_br[head1], iqentry_cmt[head1]}==3'b111 |
&& iqentry_tgt[head1][4:0]==5'd0 && ~|panic); // watch out for dbnz and ibne |
commit1_tgt <= 12'h000; |
commit1_we <= 8'h00; |
commit2_v <= ({iqentry_v[head0], iqentry_cmt[head0]} != 2'b10 |
&& {iqentry_v[head1], iqentry_cmt[head1]} != 2'b10 |
&& {iqentry_v[head2], iqentry_br[head2], iqentry_cmt[head2]}==3'b111 |
&& iqentry_tgt[head2][4:0]==5'd0 && ~|panic); // watch out for dbnz and ibne |
commit2_tgt <= 12'h000; |
commit2_we <= 8'h00; |
end |
end |
|
assign int_commit = (commit0_v && iqentry_irq[head0]) || |
(commit0_v && commit1_v && iqentry_irq[head1] && `NUM_CMT > 1); |
assign int_commit = (commit0_v && iqentry_irq[head0]) |
|| (commit0_v && commit1_v && iqentry_irq[head1] && `NUM_CMT > 1) |
|| (commit0_v && commit1_v && commit2_v && iqentry_irq[head2] && `NUM_CMT > 2); |
|
// Detect if a given register will become valid during the current cycle. |
// We want a signal that is active during the current clock cycle for the read |
5611,7 → 5648,10
|| (branchmiss && branchmiss_thrd == iqentry_thrd[commit0_id[`QBITS]] && iqentry_source[ commit0_id[`QBITS] ])); |
if (commit1_v && n=={commit1_tgt[7:0]} && `NUM_CMT > 1) |
regIsValid[n] = regIsValid[n] | (rf_source[ {commit1_tgt[7:0]} ] == commit1_id |
|| (branchmiss && branchmiss_thrd == iqentry_thrd[commit0_id[`QBITS]] && iqentry_source[ commit1_id[`QBITS] ])); |
|| (branchmiss && branchmiss_thrd == iqentry_thrd[commit1_id[`QBITS]] && iqentry_source[ commit1_id[`QBITS] ])); |
if (commit2_v && n=={commit2_tgt[7:0]} && `NUM_CMT > 2) |
regIsValid[n] = regIsValid[n] | (rf_source[ {commit2_tgt[7:0]} ] == commit2_id |
|| (branchmiss && branchmiss_thrd == iqentry_thrd[commit2_id[`QBITS]] && iqentry_source[ commit2_id[`QBITS] ])); |
end |
regIsValid[0] = `VAL; |
regIsValid[32] = `VAL; |
5963,6 → 6003,7
dramB_v <= 0; |
dramC_v <= 0; |
I <= 0; |
CC <= 0; |
icstate <= IDLE; |
bstate <= BIDLE; |
tick <= 64'd0; |
6143,6 → 6184,22
if (commit1_tgt[5:0]==6'd30 && commit1_bus==64'd0) |
$display("FP <= 0"); |
end |
if (commit2_v && `NUM_CMT > 2) begin |
if (!rf_v[ {commit2_tgt[7:0]} ]) begin |
if ({commit2_tgt[7:0]}=={commit1_tgt[7:0]} && {commit2_tgt[7:0]}=={commit0_tgt[7:0]}) |
rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]; |
else if ({commit2_tgt[7:0]}=={commit0_tgt[7:0]}) |
rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]; |
else if ({commit2_tgt[7:0]}=={commit1_tgt[7:0]}) |
rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]; |
else |
rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit2_tgt[7:0]}];//rf_source[ commit1_tgt[4:0] ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ]); |
end |
if (commit2_tgt[5:0] != 6'd0) $display("r%d <- %h v[%d]<-%d", commit2_tgt, commit2_bus, regIsValid[commit2_tgt[5:0]], |
rf_source[ {commit2_tgt[7:0]} ] == commit2_id || (branchmiss && iqentry_source[ commit2_id[`QBITS] ])); |
if (commit2_tgt[5:0]==6'd30 && commit2_bus==64'd0) |
$display("FP <= 0"); |
end |
rf_v[0] <= 1; |
|
// |
6655,6 → 6712,8
setargs(n,commit0_id,commit0_v,commit0_bus); |
if (`NUM_CMT > 1) |
setargs(n,commit1_id,commit1_v,commit1_bus); |
if (`NUM_CMT > 2) |
setargs(n,commit2_id,commit2_v,commit2_bus); |
|
setinsn(n[`QBITS],id1_ido,id1_available&id1_vo,id1_bus); |
if (`NUM_IDU > 1) |
7171,6 → 7230,8
oddball_commit(commit0_v, head0); |
if (`NUM_CMT > 1) |
oddball_commit(commit1_v, head1); |
if (`NUM_CMT > 2) |
oddball_commit(commit2_v, head2); |
//if (`NUM_CMT > 2) |
// oddball_commit(commit2_v, head2); |
|
7496,7 → 7557,7
ol_o <= wb_ol[0]; |
wbo_id <= wb_id[0]; |
isStore <= TRUE; |
bstate <= wb_rmw[0] ? B12 : B1; |
bstate <= wb_rmw[0] ? B12 : B_DCacheStoreAck; |
end |
begin |
for (j = 1; j < `WB_DEPTH; j = j + 1) begin |
7620,7 → 7681,7
dat_o <= fnDato(dram0_instr,dram0_data); |
ol_o <= dram0_ol; |
isStore <= TRUE; |
bstate <= B1; |
bstate <= B_DCacheStoreAck; |
`else |
if (wbptr<`WB_DEPTH-1) begin |
dram0 <= `DRAMREQ_READY; |
7663,7 → 7724,7
dat_o <= fnDato(dram1_instr,dram1_data); |
ol_o <= dram1_ol; |
isStore <= TRUE; |
bstate <= B1; |
bstate <= B_DCacheStoreAck; |
`else |
if (wbptr<`WB_DEPTH-1) begin |
dram1 <= `DRAMREQ_READY; |
7706,7 → 7767,7
dat_o <= fnDato(dram2_instr,dram2_data); |
ol_o <= dram2_ol; |
isStore <= TRUE; |
bstate <= B1; |
bstate <= B_DCacheStoreAck; |
`else |
if (wbptr<`WB_DEPTH-1) begin |
dram2 <= `DRAMREQ_READY; |
7742,7 → 7803,7
dram0 <= `DRAMSLOT_HASBUS; |
bwhich <= 2'b00; |
preload <= dram0_preload; |
bstate <= B2; |
bstate <= B_DCacheLoadStart; |
end |
end |
else if (~|wb_v && mem2_available && !dram1_unc && dram1==`DRAMSLOT_REQBUS && dram1_load && `NUM_MEM > 1) begin |
7760,7 → 7821,7
dram1 <= `DRAMSLOT_HASBUS; |
bwhich <= 2'b01; |
preload <= dram1_preload; |
bstate <= B2; |
bstate <= B_DCacheLoadStart; |
end |
end |
else if (~|wb_v && mem3_available && !dram2_unc && dram2==`DRAMSLOT_REQBUS && dram2_load && `NUM_MEM > 2) begin |
7778,7 → 7839,7
dram2 <= `DRAMSLOT_HASBUS; |
preload <= dram2_preload; |
bwhich <= 2'b10; |
bstate <= B2; |
bstate <= B_DCacheLoadStart; |
end |
end |
else if (~|wb_v && mem1_available && dram0_unc && dram0==`DRAMSLOT_BUSY && dram0_load) begin |
7868,7 → 7929,7
// Terminal state for a store operation. |
// Note that if only a single memory channel is selected, bwhich will be a |
// constant 0. This should cause the extra code to be removed. |
B1: |
B_DCacheStoreAck: |
if (acki|err_i) begin |
isStore <= `TRUE; |
cyc_o <= `LOW; |
7898,7 → 7959,7
end |
`else |
case(bwhich) |
2'd0: if (mem1_available) begin |
2'd0: begin |
dram0 <= `DRAMREQ_READY; |
iqentry_exc[dram0_id[`QBITS]] <= wrv_i|err_i ? `FLT_DWF : `FLT_NONE; |
if (err_i|wrv_i) iqentry_a1[dram0_id[`QBITS]] <= adr_o; |
7927,7 → 7988,7
`endif |
bstate <= B19; |
end |
B2: |
B_DCacheLoadStart: |
begin |
dccnt <= 2'd0; |
case(bwhich) |
7939,7 → 8000,7
sel_o <= fnSelect(dram0_instr,dram0_addr); |
adr_o <= {dram0_addr[31:5],5'b0}; |
ol_o <= dram0_ol; |
bstate <= B2d; |
bstate <= B_DCacheLoadAck; |
end |
2'd1: if (`NUM_MEM > 1) begin |
cti_o <= 3'b001; |
7949,7 → 8010,7
sel_o <= fnSelect(dram1_instr,dram1_addr); |
adr_o <= {dram1_addr[31:5],5'b0}; |
ol_o <= dram1_ol; |
bstate <= B2d; |
bstate <= B_DCacheLoadAck; |
end |
2'd2: if (`NUM_MEM > 2) begin |
cti_o <= 3'b001; |
7959,13 → 8020,13
sel_o <= fnSelect(dram2_instr,dram2_addr); |
adr_o <= {dram2_addr[31:5],5'b0}; |
ol_o <= dram2_ol; |
bstate <= B2d; |
bstate <= B_DCacheLoadAck; |
end |
default: if (~acki) bstate <= BIDLE; |
endcase |
end |
// Data cache load terminal state |
B2d: |
B_DCacheLoadAck: |
if (ack_i|err_i) begin |
errq <= errq | err_i; |
rdvq <= rdvq | rdv_i; |
7987,7 → 8048,7
endcase |
dccnt <= dccnt + 2'd1; |
adr_o[4:3] <= adr_o[4:3] + 2'd1; |
bstate <= B2d; |
bstate <= B_DCacheLoadAck; |
if (dccnt==2'd2) |
cti_o <= 3'b111; |
if (dccnt==2'd3) begin |
7996,22 → 8057,25
cyc_o <= `LOW; |
stb_o <= `LOW; |
sel_o <= 8'h00; |
bstate <= B4; |
bstate <= B_DCacheLoadWait1; |
end |
end |
B3: begin |
stb_o <= `HIGH; |
bstate <= B2d; |
end |
B4: bstate <= B5; |
B5: bstate <= B6; |
B6: begin |
case(bwhich) |
2'd0: dram0 <= `DRAMSLOT_BUSY; // causes retest of dhit |
2'd1: dram1 <= `DRAMSLOT_BUSY; |
2'd2: dram2 <= `DRAMSLOT_BUSY; |
default: ; |
endcase |
B_DCacheLoadStb: |
begin |
stb_o <= `HIGH; |
bstate <= B_DCacheLoadAck; |
end |
B_DCacheLoadWait1: bstate <= B_DCacheLoadWait2; |
B_DCacheLoadWait2: bstate <= B_DCacheLoadResetBusy; |
//B_DCacheLoadWait3: bstate <= B_DCacheLoadResetBusy; |
B_DCacheLoadResetBusy: begin |
// There could be more than one memory cycle active. We reset the state |
// of all the machines to retest for a hit because otherwise sequential |
// loading of memory will cause successive machines to miss resulting in |
// multiple dcache loads that aren't needed. |
if (dram0 != `DRAMSLOT_AVAIL && dram0_addr[31:5]==adr_o[31:5]) dram0 <= `DRAMSLOT_BUSY; // causes retest of dhit |
if (dram1 != `DRAMSLOT_AVAIL && dram1_addr[31:5]==adr_o[31:5]) dram1 <= `DRAMSLOT_BUSY; |
if (dram2 != `DRAMSLOT_AVAIL && dram2_addr[31:5]==adr_o[31:5]) dram2 <= `DRAMSLOT_BUSY; |
if (~ack_i) bstate <= BIDLE; |
end |
|
8155,7 → 8219,7
stb_o <= `HIGH; |
we_o <= `HIGH; |
dat_o <= fnDato(rmw_instr,rmw_res); |
bstate <= B1; |
bstate <= B_DCacheStoreAck; |
end |
B21: |
if (~ack_i) begin |
8381,7 → 8445,7
$display("Commit"); |
$display("0: %c %h %o %d #", commit0_v?"v":" ", commit0_bus, commit0_id, commit0_tgt[4:0]); |
$display("1: %c %h %o %d #", commit1_v?"v":" ", commit1_bus, commit1_id, commit1_tgt[4:0]); |
$display("instructions committed: %d ticks: %d ", I, tick); |
$display("instructions committed: %d valid committed: %d ticks: %d ", CC, I, tick); |
$display("Write merges: %d", wb_merges); |
`endif // SIM |
|
8653,8 → 8717,9
head7 <= (head7 + amt) % QENTRIES; |
head8 <= (head8 + amt) % QENTRIES; |
head9 <= (head9 + amt) % QENTRIES; |
I <= I + amt; |
CC <= CC + amt; |
if (amt==3'd3) begin |
I = I + iqentry_v[head0] + iqentry_v[head1] + iqentry_v[head2]; |
iqentry_agen[head0] <= `INV; |
iqentry_agen[head1] <= `INV; |
iqentry_agen[head2] <= `INV; |
8669,6 → 8734,7
iqentry_alu[head2] <= `FALSE; |
end |
else if (amt==3'd2) begin |
I = I + iqentry_v[head0] + iqentry_v[head1]; |
iqentry_agen[head0] <= `INV; |
iqentry_agen[head1] <= `INV; |
iqentry_mem[head0] <= `FALSE; |
8678,6 → 8744,7
iqentry_alu[head0] <= `FALSE; |
iqentry_alu[head1] <= `FALSE; |
end else if (amt==3'd1) begin |
I = I + iqentry_v[head0]; |
iqentry_agen[head0] <= `INV; |
iqentry_mem[head0] <= `FALSE; |
iqentry_iv[head0] <= `INV; |
/FT64v5/rtl/twoway/FT64_BranchPredicator.v
25,8 → 25,8
//============================================================================= |
// |
module FT64_BranchPredictor(rst, clk, en, |
xisBranch0, xisBranch1, |
pcA, pcB, pcC, pcD, pcE, pcF, xpc0, xpc1, takb0, takb1, |
xisBranch0, xisBranch1, xisBranch2, |
pcA, pcB, pcC, pcD, pcE, pcF, xpc0, xpc1, xpc2, takb0, takb1, takb2, |
predict_takenA, predict_takenB, predict_takenC, predict_takenD, |
predict_takenE, predict_takenF); |
parameter DBW=32; |
35,6 → 35,7
input en; |
input xisBranch0; |
input xisBranch1; |
input xisBranch2; |
input [DBW-1:0] pcA; |
input [DBW-1:0] pcB; |
input [DBW-1:0] pcC; |
43,8 → 44,10
input [DBW-1:0] pcF; |
input [DBW-1:0] xpc0; |
input [DBW-1:0] xpc1; |
input [DBW-1:0] xpc2; |
input takb0; |
input takb1; |
input takb2; |
output predict_takenA; |
output predict_takenB; |
output predict_takenC; |
67,13 → 70,13
for (n = 0; n < 512; n = n + 1) |
branch_history_table[n] = 3; |
end |
wire [8:0] bht_wa = {pc[8:2],gbl_branch_hist[2:1]}; // write address |
wire [8:0] bht_raA = {pcA[8:2],gbl_branch_hist[2:1]}; // read address (IF stage) |
wire [8:0] bht_raB = {pcB[8:2],gbl_branch_hist[2:1]}; // read address (IF stage) |
wire [8:0] bht_raC = {pcC[8:2],gbl_branch_hist[2:1]}; // read address (IF stage) |
wire [8:0] bht_raD = {pcD[8:2],gbl_branch_hist[2:1]}; // read address (IF stage) |
wire [8:0] bht_raE = {pcE[8:2],gbl_branch_hist[2:1]}; // read address (IF stage) |
wire [8:0] bht_raF = {pcF[8:2],gbl_branch_hist[2:1]}; // read address (IF stage) |
wire [8:0] bht_wa = {pc[7:1],gbl_branch_hist[2:1]}; // write address |
wire [8:0] bht_raA = {pcA[7:1],gbl_branch_hist[2:1]}; // read address (IF stage) |
wire [8:0] bht_raB = {pcB[7:1],gbl_branch_hist[2:1]}; // read address (IF stage) |
wire [8:0] bht_raC = {pcC[7:1],gbl_branch_hist[2:1]}; // read address (IF stage) |
wire [8:0] bht_raD = {pcD[7:1],gbl_branch_hist[2:1]}; // read address (IF stage) |
wire [8:0] bht_raE = {pcE[7:1],gbl_branch_hist[2:1]}; // read address (IF stage) |
wire [8:0] bht_raF = {pcF[7:1],gbl_branch_hist[2:1]}; // read address (IF stage) |
wire [1:0] bht_xbits = branch_history_table[bht_wa]; |
wire [1:0] bht_ibitsA = branch_history_table[bht_raA]; |
wire [1:0] bht_ibitsB = branch_history_table[bht_raB]; |
92,19 → 95,49
if (rst) |
pcstail <= 5'd0; |
else begin |
if (xisBranch0 & xisBranch1) begin |
case({xisBranch0,xisBranch1,xisBranch2}) |
3'b000: ; |
3'b001: |
begin |
pcs[pcstail] <= {xpc2[31:1],takb2}; |
pcstail <= pcstail + 5'd1; |
end |
3'b010: |
begin |
pcs[pcstail] <= {xpc1[31:1],takb1}; |
pcstail <= pcstail + 5'd1; |
end |
3'b011: |
begin |
pcs[pcstail] <= {xpc1[31:1],takb1}; |
pcs[pcstail+1] <= {xpc2[31:1],takb2}; |
pcstail <= pcstail + 5'd2; |
end |
3'b100: |
begin |
pcs[pcstail] <= {xpc0[31:1],takb0}; |
pcstail <= pcstail + 5'd1; |
end |
3'b101: |
begin |
pcs[pcstail] <= {xpc0[31:1],takb0}; |
pcs[pcstail+1] <= {xpc2[31:1],takb2}; |
pcstail <= pcstail + 5'd2; |
end |
3'b110: |
begin |
pcs[pcstail] <= {xpc0[31:1],takb0}; |
pcs[pcstail+1] <= {xpc1[31:1],takb1}; |
pcstail <= pcstail + 5'd2; |
end |
else if (xisBranch0) begin |
end |
3'b111: |
begin |
pcs[pcstail] <= {xpc0[31:1],takb0}; |
pcstail <= pcstail + 5'd1; |
end |
else if (xisBranch1) begin |
pcs[pcstail] <= {xpc1[31:1],takb1}; |
pcstail <= pcstail + 5'd1; |
end |
pcs[pcstail+1] <= {xpc1[31:1],takb1}; |
pcs[pcstail+2] <= {xpc2[31:1],takb2}; |
pcstail <= pcstail + 5'd3; |
end |
endcase |
end |
|
always @(posedge clk) |
147,11 → 180,11
if (rst) |
gbl_branch_hist <= 3'b000; |
else begin |
if (en) begin |
if (wrhist) begin |
gbl_branch_hist <= {gbl_branch_hist[1:0],takb}; |
branch_history_table[bht_wa] <= xbits_new; |
end |
if (en) begin |
if (wrhist) begin |
gbl_branch_hist <= {gbl_branch_hist[1:0],takb}; |
branch_history_table[bht_wa] <= xbits_new; |
end |
end |
end |
|