OpenCores
URL https://opencores.org/ocsvn/thor/thor/trunk

Subversion Repositories thor

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /thor/trunk/FT64v7/rtl/twoway
    from Rev 61 to Rev 60
    Reverse comparison

Rev 61 → Rev 60

/FT64.v
1,6 → 1,6
// ============================================================================
// __
// \\__/ o\ (C) 2017-2019 Robert Finch, Waterloo
// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
42,7 → 42,7
`include "FT64_defines.vh"
 
module FT64(hartid, rst, clk_i, clk4x, tm_clk_i, irq_i, vec_i, bte_o, cti_o, bok_i, cyc_o, stb_o, ack_i, err_i, we_o, sel_o, adr_o, dat_o, dat_i,
ol_o, pcr_o, pcr2_o, pkeys_o, icl_o, sr_o, cr_o, rbi_i, signal_i, exc_o);
ol_o, pcr_o, pcr2_o, pkeys_o, icl_o, sr_o, cr_o, rbi_i, signal_i);
input [63:0] hartid;
input rst;
input clk_i;
71,8 → 71,6
output reg sr_o;
input rbi_i;
input [31:0] signal_i;
(* mark_debug="true" *)
output [7:0] exc_o;
 
parameter TM_CLKFREQ = 20000000;
parameter QENTRIES = `QENTRIES;
110,7 → 108,6
parameter wyde = 3'd1;
parameter tetra = 3'd2;
parameter octa = 3'd3;
parameter hexi = 3'd4;
// IQ states
parameter IQS_INVALID = 3'd0;
parameter IQS_QUEUED = 3'd1;
182,8 → 179,151
*/
`endif
 
`ifdef SUPPORT_PREDICATION
reg [3:0] pregs [0:1023];
`endif
 
reg [63:0] wbrcd;
wire [5:0] brgs;
`ifdef SUPPORT_SEGMENTATION
reg [23:0] currentCSSelector;
reg [63:0] zs_base [0:63];
reg [63:0] ds_base [0:63];
reg [63:0] es_base [0:63];
reg [63:0] fs_base [0:63];
reg [63:0] gs_base [0:63];
reg [63:0] hs_base [0:63];
reg [63:0] ss_base [0:63];
reg [63:0] cs_base [0:63];
reg [63:0] zsx_base;
reg [63:0] dsx_base;
reg [63:0] esx_base;
reg [63:0] fsx_base;
reg [63:0] gsx_base;
reg [63:0] hsx_base;
reg [63:0] ssx_base;
reg [63:0] csx_base;
reg [63:0] zs_lb [0:63];
reg [63:0] ds_lb [0:63];
reg [63:0] es_lb [0:63];
reg [63:0] fs_lb [0:63];
reg [63:0] gs_lb [0:63];
reg [63:0] hs_lb [0:63];
reg [63:0] ss_lb [0:63];
reg [63:0] cs_lb [0:63];
reg [63:0] zslb;
reg [63:0] dslb;
reg [63:0] eslb;
reg [63:0] fslb;
reg [63:0] gslb;
reg [63:0] hslb;
reg [63:0] sslb;
reg [63:0] cslb;
reg [63:0] zs_ub [0:63];
reg [63:0] ds_ub [0:63];
reg [63:0] es_ub [0:63];
reg [63:0] fs_ub [0:63];
reg [63:0] gs_ub [0:63];
reg [63:0] hs_ub [0:63];
reg [63:0] ss_ub [0:63];
reg [63:0] cs_ub [0:63];
reg [63:0] zsub;
reg [63:0] dsub;
reg [63:0] esub;
reg [63:0] fsub;
reg [63:0] gsub;
reg [63:0] hsub;
reg [63:0] ssub;
reg [63:0] csub;
reg [23:0] zs_sel [0:63];
reg [23:0] ds_sel [0:63];
reg [23:0] es_sel [0:63];
reg [23:0] fs_sel [0:63];
reg [23:0] gs_sel [0:63];
reg [23:0] hs_sel [0:63];
reg [23:0] ss_sel [0:63];
reg [23:0] cs_sel [0:63];
reg [15:0] zs_acr [0:63];
reg [15:0] ds_acr [0:63];
reg [15:0] es_acr [0:63];
reg [15:0] fs_acr [0:63];
reg [15:0] gs_acr [0:63];
reg [15:0] hs_acr [0:63];
reg [15:0] ss_acr [0:63];
reg [15:0] cs_acr [0:63];
initial begin
for (n = 0; n < 64; n = n + 1) begin
zs_base[n] <= 64'h0;
ds_base[n] <= 64'h0;
es_base[n] <= 64'h0;
fs_base[n] <= 64'h0;
gs_base[n] <= 64'h0;
hs_base[n] <= 64'h0;
ss_base[n] <= 64'h0;
cs_base[n] <= 64'h0;
zs_lb[n] <= 64'h0;
ds_lb[n] <= 64'h0;
es_lb[n] <= 64'h0;
fs_lb[n] <= 64'h0;
gs_lb[n] <= 64'h0;
hs_lb[n] <= 64'h0;
ss_lb[n] <= 64'h0;
cs_lb[n] <= 64'h0;
zs_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
ds_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
es_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
fs_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
gs_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
hs_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
ss_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
cs_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
zs_sel[n] <= 24'h0;
ds_sel[n] <= 24'h0;
es_sel[n] <= 24'h0;
fs_sel[n] <= 24'h0;
gs_sel[n] <= 24'h0;
hs_sel[n] <= 24'h0;
ss_sel[n] <= 24'h0;
cs_sel[n] <= 24'h0;
zs_acr[n] <= 16'h8000;
ds_acr[n] <= 16'h9200;
es_acr[n] <= 16'h8000;
fs_acr[n] <= 16'h8000;
gs_acr[n] <= 16'h8000;
hs_acr[n] <= 16'h8000;
ss_acr[n] <= 16'h9600;
cs_acr[n] <= 16'h9A00;
end
end
always @(posedge clk_i)
begin
zsx_base <= zs_base[brgs];
dsx_base <= ds_base[brgs];
esx_base <= es_base[brgs];
fsx_base <= fs_base[brgs];
gsx_base <= gs_base[brgs];
hsx_base <= hs_base[brgs];
ssx_base <= ss_base[brgs];
csx_base <= cs_base[brgs];
zsub <= zs_ub[brgs];
dsub <= ds_ub[brgs];
esub <= es_ub[brgs];
fsub <= fs_ub[brgs];
gsub <= gs_ub[brgs];
hsub <= hs_ub[brgs];
ssub <= ss_ub[brgs];
csub <= cs_ub[brgs];
zslb <= zs_lb[brgs];
dslb <= ds_lb[brgs];
eslb <= es_lb[brgs];
fslb <= fs_lb[brgs];
gslb <= gs_lb[brgs];
hslb <= hs_lb[brgs];
sslb <= ss_lb[brgs];
cslb <= cs_lb[brgs];
currentCSSelector <= cs_sel[brgs];
end
`endif
`ifdef SUPPORT_BBMS
reg [15:0] thrd_handle [0:63];
reg [63:0] prg_base [0:63];
284,6 → 424,11
wire [`ABITS] pc0a;
wire [`ABITS] pc1a;
wire [`ABITS] pc2a;
`ifdef SUPPORT_SEGMENTATION
wire [`ABITS] pc0 = (pc0a[47:40]==8'hFF||ol==2'b00) ? pc0a : {csx_base[50:0],13'd0} + pc0a[47:0];
wire [`ABITS] pc1 = (pc1a[47:40]==8'hFF||ol==2'b00) ? pc1a : {csx_base[50:0],13'd0} + pc1a[47:0];
wire [`ABITS] pc2 = (pc2a[47:40]==8'hFF||ol==2'b00) ? pc2a : {csx_base[50:0],13'd0} + pc2a[47:0];
`else
`ifdef SUPPORT_BBMS
wire [`ABITS] pc0 = (pc0a[47:40]==8'hFF||ol==2'b00) ? pc0a : {pb[50:0],13'd0} + pc0a[47:0];
wire [`ABITS] pc1 = (pc1a[47:40]==8'hFF||ol==2'b00) ? pc1a : {pb[50:0],13'd0} + pc1a[47:0];
293,6 → 438,7
wire [`ABITS] pc1 = pc1a;
wire [`ABITS] pc2 = pc2a;
`endif
`endif
 
reg excmiss;
reg [`ABITS] excmisspc;
318,7 → 464,11
wire wbm = cr0[34];
wire sple = cr0[35]; // speculative load enable
wire ctgtxe = cr0[33];
`ifdef SUPPORT_PREDICATION
wire pred_on = cr0[36]; // predicated execution mode on
`else
wire pred_on = 1'b0;
`endif
reg [63:0] pmr;
wire id1_available = pmr[0];
wire id2_available = pmr[1];
339,11 → 489,9
wire thread_en = 1'b0;
`endif
wire vechain = cr0[18];
// Performance CSR's
reg [39:0] iq_ctr;
reg [39:0] irq_ctr; // count of number of interrupts
reg [39:0] bm_ctr; // branch miss counter
reg [39:0] br_ctr; // branch counter
reg [39:0] icl_ctr; // instruction cache load counter
 
reg [7:0] fcu_timeout;
354,7 → 502,6
assign pcr_o = pcr;
assign pcr2_o = pcr2;
reg [63:0] aec;
(* mark_debug = "true" *)
reg [15:0] cause[0:15];
`ifdef SUPPORT_SMT
reg [`ABITS] epc [0:NTHREAD];
532,7 → 679,7
// the queue plus an extra count for skipping on branch misses. In this case
// that would be four bits minimum (count 0 to 8).
wire [63:0] rdat0,rdat1,rdat2;
reg [127:0] xdati;
reg [63:0] xdati;
 
reg canq1, canq2, canq3;
(* mark_debug = "true" *)
569,7 → 716,6
reg [QENTRIES-1:0] iqentry_oddball = 8'h00; // writes to register file
reg [QENTRIES-1:0] iqentry_load; // is a memory load instruction
reg [QENTRIES-1:0] iqentry_loadv; // is a volatile memory load instruction
reg [QENTRIES-1:0] iqentry_loadseg;
reg [QENTRIES-1:0] iqentry_store; // is a memory store instruction
reg [QENTRIES-1:0] iqentry_preload; // is a memory preload instruction
reg [QENTRIES-1:0] iqentry_ldcmp;
599,10 → 745,6
reg [QENTRIES-1:0] iqentry_prfw = 1'b0;
reg [7:0] iqentry_we [0:QENTRIES-1]; // enable strobe
reg [63:0] iqentry_res [0:QENTRIES-1]; // instruction result
reg [63:0] iqentry_seg_base [0:QENTRIES-1]; //
reg [63:0] iqentry_seg_lb [0:QENTRIES-1]; //
reg [63:0] iqentry_seg_ub [0:QENTRIES-1]; //
reg [63:0] iqentry_seg_acr [0:QENTRIES-1]; //
reg [63:0] iqentry_ares [0:QENTRIES-1]; // alternate instruction result
reg [47:0] iqentry_instr[0:QENTRIES-1]; // instruction opcode
reg [2:0] iqentry_insln[0:QENTRIES-1]; // instruction length
661,6 → 803,13
reg [PREGS-1:1] iqentry_livetarget [0:QENTRIES-1];
reg [PREGS-1:1] iqentry_latestID [0:QENTRIES-1];
reg [PREGS-1:1] iqentry_cumulative [0:QENTRIES-1];
`ifdef SUPPORT_PREDICATION
reg [QENTRIES-1:0] iqentry_psource = {QENTRIES{1'b0}};
reg [15:0] plivetarget;
reg [15:0] iqentry_plivetarget [0:QENTRIES-1];
reg [15:0] iqentry_platestID [0:QENTRIES-1];
reg [15:0] iqentry_pcumulative [0:QENTRIES-1];
`endif
wire [PREGS-1:1] iq_out [0:QENTRIES-1];
 
reg [`QBITS] tail0;
703,6 → 852,14
wire fetchbuf2_thrd;
wire fetchbuf2_mem;
wire fetchbuf2_rfw;
`ifdef SUPPORT_PREDICATION
wire fetchbuf0_prfw;
wire [7:0] fetchbuf0_pbyte;
wire fetchbuf1_prfw;
wire [7:0] fetchbuf1_pbyte;
wire fetchbuf2_prfw;
wire [7:0] fetchbuf2_pbyte;
`endif
wire [47:0] fetchbufA_instr;
wire [`ABITS] fetchbufA_pc;
wire fetchbufA_v;
897,9 → 1054,6
reg [63:0] fcu_argI; // only used by BEQ
reg [63:0] fcu_argT;
reg [63:0] fcu_argT2;
reg [63:0] fcu_epc;
reg [23:0] fcu_ecs; // excepted code segment
reg [23:0] fcu_rs; // return selector
reg [`ABITS] fcu_pc;
reg [`ABITS] fcu_nextpc;
reg [`ABITS] fcu_brdisp;
953,10 → 1107,10
reg dram0_preload;
reg [RBIT:0] dram0_tgt;
reg [`QBITSP1] dram0_id;
reg [`XBITS] dram0_exc;
reg dram0_unc;
reg [2:0] dram0_memsize;
reg dram0_load; // is a load operation
reg dram0_loadseg;
reg dram0_store;
reg [1:0] dram0_ol;
reg [63:0] dram1_data;
966,10 → 1120,10
reg dram1_preload;
reg [RBIT:0] dram1_tgt;
reg [`QBITSP1] dram1_id;
reg [`XBITS] dram1_exc;
reg dram1_unc;
reg [2:0] dram1_memsize;
reg dram1_load;
reg dram1_loadseg;
reg dram1_store;
reg [1:0] dram1_ol;
reg [63:0] dram2_data;
979,10 → 1133,10
reg dram2_preload;
reg [RBIT:0] dram2_tgt;
reg [`QBITSP1] dram2_id;
reg [`XBITS] dram2_exc;
reg dram2_unc;
reg [2:0] dram2_memsize;
reg dram2_load;
reg dram2_loadseg;
reg dram2_store;
reg [1:0] dram2_ol;
 
989,12 → 1143,15
reg dramA_v;
reg [`QBITSP1] dramA_id;
reg [63:0] dramA_bus;
reg [`XBITS] dramA_exc;
reg dramB_v;
reg [`QBITSP1] dramB_id;
reg [63:0] dramB_bus;
reg [`XBITS] dramB_exc;
reg dramC_v;
reg [`QBITSP1] dramC_id;
reg [63:0] dramC_bus;
reg [`XBITS] dramC_exc;
 
wire outstanding_stores;
reg [63:0] I; // instruction count
1037,7 → 1194,7
parameter B16 = 5'd16;
parameter B17 = 5'd17;
parameter B18 = 5'd18;
parameter B_LSNAck = 5'd19;
parameter B19 = 5'd19;
parameter B2a = 5'd20;
parameter B2b = 5'd21;
parameter B2c = 5'd22;
1045,14 → 1202,6
parameter B20 = 5'd24;
parameter B21 = 5'd25;
parameter B_DCacheLoadWait3 = 5'd26;
parameter B_LoadDesc = 5'd27;
parameter B_LoadDescStb = 5'd28;
parameter B_WaitSeg = 5'd29;
parameter B_DLoadNack = 5'd30;
parameter SEG_IDLE = 2'd0;
parameter SEG_CHK = 2'd1;
parameter SEG_UPD = 2'd2;
parameter SEG_DONE = 2'd3;
reg [1:0] bwhich;
reg [3:0] icstate,picstate;
parameter IDLE = 4'd0;
1077,15 → 1226,13
always @*
phit <= ihit&&icstate==IDLE;
reg [2:0] iccnt;
(* mark_debug="true" *)
reg icack;
reg L1_wr0,L1_wr1,L1_wr2;
reg L1_invline;
wire [1:0] ic0_fault,ic1_fault,ic2_fault;
reg [9:0] L1_en;
reg [8:0] L1_en;
reg [71:0] L1_adr, L2_adr;
reg [305:0] L1_dati;
wire [305:0] L2_dato;
reg [297:0] L2_rdat;
wire [297:0] L2_dato;
reg L2_xsel;
 
generate begin : gRegfileInst
1224,7 → 1371,7
.en(L1_en),
.adr((icstate==IDLE||icstate==IC_Next) ? {pcr[7:0],pc0} : L1_adr),
.wadr(L1_adr),
.i(L1_dati),
.i(L2_rdat),
.o(insn0a),
.fault(ic0_fault),
.hit(ihit0),
1243,7 → 1390,7
.en(L1_en),
.adr((icstate==IDLE||icstate==IC_Next) ? (thread_en ? {pcr[7:0],pc1}: {pcr[7:0],pc0plus6} ): L1_adr),
.wadr(L1_adr),
.i(L1_dati),
.i(L2_rdat),
.o(insn1b),
.fault(ic1_fault),
.hit(ihit1),
1265,7 → 1412,7
.en(L1_en),
.adr((icstate==IDLE||icstate==IC_Next) ? (thread_en ? {pcr[7:0],pc2} : {pcr[7:0],pc0plus12}) : L1_adr),
.wadr(L1_adr),
.i(L1_dati),
.i(L2_rdat),
.o(insn2b),
.fault(ic2_fault),
.hit(ihit2),
1848,7 → 1995,7
wire freezePC = (irq_i > im) && !int_commit;
always @*
if (freezePC) begin
insn0 <= {32'h00,6'd0,5'd0,irq_i,1'b0,vec_i,2'b00,`BRK};
insn0 <= {8'h00,6'd0,5'd0,irq_i,1'b0,vec_i,2'b00,`BRK};
end
else if (phit) begin
// if (insn0a[`INSTRUCTION_OP]==`BRK && insn0a[25:21]==5'd0 && insn0a[`INSTRUCTION_L2]==2'b00)
1855,21 → 2002,10
// insn0 <= {6'd1,5'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK};
// else
insn0 <= insn0a;
if (insn0a[15:0]==16'hFF00) begin // BRK #255
if (~|irq_i)
insn0 <= {8'h00,`NOP_INSN};
else
insn0[20:0] <= {irq_i,1'b0,vec_i,2'b00,`BRK};
end
else if (ic0_fault[1])
insn0 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_IBE,2'b00,`BRK};
else if (ic0_fault[0])
insn0 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_EXF,2'b00,`BRK};
end
else begin
insn0 <= {8'h00,`NOP_INSN};
end
 
generate begin : gInsnMux
if (`WAYS > 1) begin
always @*
1881,16 → 2017,6
// insn1 <= {6'd1,5'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK};
// else
insn1 <= insn1a;
if (insn1a[15:0]==16'hFF00) begin
if (~|irq_i)
insn1 <= {8'h00,`NOP_INSN};
else
insn1[20:0] <= {irq_i,1'b0,vec_i,2'b00,`BRK};
end
else if (ic1_fault[1])
insn1 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_IBE,2'b00,`BRK};
else if (ic1_fault[0])
insn1 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_EXF,2'b00,`BRK};
end
else begin
insn1 <= {8'h00,`NOP_INSN};
1905,16 → 2031,6
// insn2 <= {6'd1,5'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK};
// else
insn2 <= insn2a;
if (insn2a[15:0]==16'hFF00) begin
if (~|irq_i)
insn2 <= {8'h00,`NOP_INSN};
else
insn2[20:0] <= {irq_i,1'b0,vec_i,2'b00,`BRK};
end
else if (ic2_fault[1])
insn2 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_IBE,2'b00,`BRK};
else if (ic2_fault[0])
insn2 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_EXF,2'b00,`BRK};
end
else
insn2 <= `NOP_INSN;
1923,9 → 2039,9
endgenerate
 
wire [63:0] dc0_out, dc1_out, dc2_out;
assign rdat0 = dram0_unc ? xdati[63:0] : dc0_out;
assign rdat1 = dram1_unc ? xdati[63:0] : dc1_out;
assign rdat2 = dram2_unc ? xdati[63:0] : dc2_out;
assign rdat0 = dram0_unc ? xdati : dc0_out;
assign rdat1 = dram1_unc ? xdati : dc1_out;
assign rdat2 = dram2_unc ? xdati : dc2_out;
 
reg preload;
reg [1:0] dccnt;
1960,9 → 2076,9
assign dhit2 = dhit2a && !wb_hit2;
wire whit0, whit1, whit2;
 
wire wr_dcache0 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B_LSNAck && isStore)) && whit0);
wire wr_dcache1 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B_LSNAck && isStore)) && whit1);
wire wr_dcache2 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B_LSNAck && isStore)) && whit2);
wire wr_dcache0 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B19 && isStore)) && whit0);
wire wr_dcache1 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B19 && isStore)) && whit1);
wire wr_dcache2 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B19 && isStore)) && whit2);
 
FT64_dcache udc0
(
2219,13 → 2335,11
`BRK: fnRt = 12'd0;
`REX: fnRt = 12'd0;
`CHK: fnRt = 12'd0;
//`EXEC: fnRt = 12'd0;
`EXEC: fnRt = 12'd0;
`Bcc: fnRt = 12'd0;
`BLcc: fnRt = 12'd0;
`BBc: fnRt = 12'd0;
`NOP: fnRt = 12'd0;
`BEQI: fnRt = 12'd0;
`BNEI: fnRt = 12'd0;
`SB,`Sx,`SWC,`CACHE:
fnRt = 12'd0;
`JMP: fnRt = 12'd0;
2453,13 → 2567,11
`BRK: fnRt = 12'd0;
`REX: fnRt = 12'd0;
`CHK: fnRt = 12'd0;
//`EXEC: fnRt = 12'd0;
`EXEC: fnRt = 12'd0;
`Bcc: fnRt = 12'd0;
`BLcc: fnRt = 12'd0;
`BBc: fnRt = 12'd0;
`NOP: fnRt = 12'd0;
`BEQI: fnRt = 12'd0;
`BNEI: fnRt = 12'd0;
`SB,`Sx,`SWC,`CACHE:
fnRt = 12'd0;
`JMP: fnRt = 12'd0;
2482,6 → 2594,7
`CMP: fnWe = 8'h00;
default: fnWe = 8'hFF;
endcase
`CMPI: fnWe = 8'h00;
default: fnWe = 8'hFF;
endcase
/*
2531,10 → 2644,8
casez(isn[`INSTRUCTION_OP])
`BRK: Source1Valid = isn[16] ? isn[`INSTRUCTION_RA]==5'd0 : TRUE;
`Bcc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`BLcc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`BBc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`BEQI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`BNEI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`CHK: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`RR: case(isn[`INSTRUCTION_S2])
`SHIFT31: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
2544,7 → 2655,6
endcase
`MEMNDX:Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`ADDI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`SEQI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`SLTI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`SLTUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`SGTI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
2571,7 → 2681,6
`Sx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`SWC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`SV: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`PUSHC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`INC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`CAS: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
`CACHE: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
2593,17 → 2702,16
casez(isn[`INSTRUCTION_OP])
`BRK: Source2Valid = TRUE;
`Bcc: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
`BLcc: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
`BBc: Source2Valid = TRUE;
`BEQI: Source2Valid = TRUE;
`BNEI: Source2Valid = TRUE;
`CHK: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
`R2: casez(isn[`INSTRUCTION_S2])
`TLB: Source2Valid = TRUE;
`R1: Source2Valid = TRUE;
`MOV: Source2Valid = TRUE;
`SHIFT31: Source2Valid = TRUE;
`SHIFT63: Source2Valid = TRUE;
`SHIFTR: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0;
`SHIFT31: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0;
`SHIFT63: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0;
`LVX,`SVX: Source2Valid = FALSE;
default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
endcase
2624,7 → 2732,6
endcase
end
`ADDI: Source2Valid = TRUE;
`SEQI: Source2Valid = TRUE;
`SLTI: Source2Valid = TRUE;
`SLTUI: Source2Valid = TRUE;
`SGTI: Source2Valid = TRUE;
2646,7 → 2753,6
`SB: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
`Sx: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
`SWC: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
`PUSHC: Source2Valid = TRUE;
`CAS: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
`JAL: Source2Valid = TRUE;
`RET: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
2715,6 → 2821,50
endcase
endfunction
 
// For predication logic
function SourceTValid;
input [47:0] isn;
case(isn[`INSTRUCTION_OP])
`BRK: SourceTValid = TRUE;
`Bcc: SourceTValid = TRUE;
`BBc: SourceTValid = TRUE;
`BEQI: SourceTValid = TRUE;
`IVECTOR:
case(isn[`INSTRUCTION_S2])
`VEX: SourceTValid = TRUE;
default: SourceTValid = TRUE;
endcase
`CHK: SourceTValid = isn[`INSTRUCTION_RT]==5'd0;
`R2:
if (isn[`INSTRUCTION_L2]==2'b01)
case(isn[47:42])
`CMOVEZ,`CMOVNZ: SourceTValid = isn[`INSTRUCTION_RT]==5'd0;
default: SourceTValid = TRUE;
endcase
else
case(isn[`INSTRUCTION_S2])
`MAJ: SourceTValid = isn[`INSTRUCTION_RT]==5'd0;
default: SourceTValid = TRUE;
endcase
`MEMNDX:
if (!isn[31])
case({isn[31:28],isn[22:21]})
`CACHEX,
`LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX,
`LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX:
SourceTValid = isn[`INSTRUCTION_RT]==5'd0;
default: SourceTValid = TRUE;
endcase
else
SourceTValid = TRUE;
`SB: SourceTValid = TRUE;
`Sx: SourceTValid = TRUE;
`SWC: SourceTValid = TRUE;
`CAS: SourceTValid = TRUE;
`BITFIELD: SourceTValid = isn[`INSTRUCTION_RT]==5'd0 || isn[32]==1'b0;
default: SourceTValid = isn[`INSTRUCTION_RT]==5'd0;
endcase
endfunction
 
// Used to indicate to the queue logic that the instruction needs to be
// recycled to the queue VL number of times.
2807,9 → 2957,25
endcase
endfunction
 
function IsMem;
function IsCmp;
input [47:0] isn;
case(isn[`INSTRUCTION_OP])
`R2:
if (isn[`INSTRUCTION_L2]==2'b00)
case(isn[31:26])
`CMP: IsCmp = TRUE;
default: IsCmp = FALSE;
endcase
else
IsCmp = FALSE;
`CMPI: IsCmp = TRUE;
default: IsCmp = FALSE;
endcase
endfunction
 
function [0:0] IsMem;
input [47:0] isn;
case(isn[`INSTRUCTION_OP])
`MEMNDX: IsMem = TRUE;
`AMO: IsMem = TRUE;
`LB: IsMem = TRUE;
2822,7 → 2988,6
`SB: IsMem = TRUE;
`Sx: IsMem = TRUE;
`SWC: IsMem = TRUE;
`PUSHC: IsMem = TRUE;
`CAS: IsMem = TRUE;
`LVx: IsMem = TRUE;
`LVxU: IsMem = TRUE;
2963,10 → 3128,8
input [47:0] isn;
casez(isn[`INSTRUCTION_OP])
`Bcc: IsBranch = TRUE;
`BLcc: IsBranch = TRUE;
`BBc: IsBranch = TRUE;
`BEQI: IsBranch = TRUE;
`BNEI: IsBranch = TRUE;
`CHK: IsBranch = TRUE;
default: IsBranch = FALSE;
endcase
2996,10 → 3159,8
default: IsFlowCtrl = FALSE;
endcase
`Bcc: IsFlowCtrl = TRUE;
`BLcc: IsFlowCtrl = TRUE;
`BBc: IsFlowCtrl = TRUE;
`BEQI: IsFlowCtrl = TRUE;
`BNEI: IsFlowCtrl = TRUE;
`CHK: IsFlowCtrl = TRUE;
`JAL: IsFlowCtrl = TRUE;
`JMP: IsFlowCtrl = TRUE;
3067,11 → 3228,6
endcase
endfunction
 
function IsRet;
input [47:0] isn;
IsRet = isn[`INSTRUCTION_OP]==`RET;
endfunction
 
function IsRFW;
input [47:0] isn;
input [5:0] vqei;
3094,7 → 3250,6
endcase
`ADD: IsRFW = TRUE;
`SUB: IsRFW = TRUE;
`SEQ: IsRFW = TRUE;
`SLT: IsRFW = TRUE;
`SLTU: IsRFW = TRUE;
`SLE: IsRFW = TRUE;
3192,7 → 3347,6
`BBc: IsRFW = FALSE;
`BITFIELD: IsRFW = TRUE;
`ADDI: IsRFW = TRUE;
`SEQI: IsRFW = TRUE;
`SLTI: IsRFW = TRUE;
`SLTUI: IsRFW = TRUE;
`SGTI: IsRFW = TRUE;
3218,7 → 3372,6
`LV: IsRFW = TRUE;
`LVx: IsRFW = TRUE;
`LVxU: IsRFW = TRUE;
`PUSHC: IsRFW = TRUE;
`CAS: IsRFW = TRUE;
`AMO: IsRFW = TRUE;
`CSRRW: IsRFW = TRUE;
3325,6 → 3478,14
endcase
endfunction
 
function IsExec;
input [47:0] isn;
case(isn[`INSTRUCTION_OP])
`EXEC: IsExec = TRUE;
default: IsExec = FALSE;
endcase
endfunction
 
function [7:0] fnSelect;
input [47:0] ins;
input [`ABITS] adr;
3442,7 → 3603,6
endcase
default: fnSelect = 8'h00;
endcase
`PUSHC,
`INC,
`LWR,`SWC,`CAS: fnSelect = 8'hFF;
`LV,`SV: fnSelect = 8'hFF;
3707,17 → 3867,26
// FETCH
// ---------------------------------------------------------------------------
//
assign fetchbuf0_mem = IsMem(fetchbuf0_instr) & ~IsRet(fetchbuf0_instr);// & IsLoad(fetchbuf0_instr);
assign fetchbuf0_mem = IsMem(fetchbuf0_instr);// & IsLoad(fetchbuf0_instr);
assign fetchbuf0_rfw = IsRFW(fetchbuf0_instr,vqe0,vl,fetchbuf0_thrd);
`ifdef SUPPORT_PREDICATION
assign fetchbuf0_prfw = IsCmp(fetchbuf0_instr);
`endif
 
generate begin: gFetchbufDec
if (`WAYS > 1) begin
assign fetchbuf1_mem = IsMem(fetchbuf1_instr) & ~IsRet(fetchbuf1_instr);// & IsLoad(fetchbuf1_instr);
assign fetchbuf1_mem = IsMem(fetchbuf1_instr);// & IsLoad(fetchbuf1_instr);
assign fetchbuf1_rfw = IsRFW(fetchbuf1_instr,vqe1,vl,fetchbuf1_thrd);
`ifdef SUPPORT_PREDICATION
assign fetchbuf1_prfw = IsCmp(fetchbuf1_instr);
`endif
end
if (`WAYS > 2) begin
assign fetchbuf2_mem = IsMem(fetchbuf2_instr) & ~IsRet(fetchbuf2_instr);// & IsLoad(fetchbuf2_instr);
assign fetchbuf2_mem = IsMem(fetchbuf2_instr);// & IsLoad(fetchbuf2_instr);
assign fetchbuf2_rfw = IsRFW(fetchbuf2_instr,vqe2,vl,fetchbuf2_thrd);
`ifdef SUPPORT_PREDICATION
assign fetchbuf2_prfw = IsCmp(fetchbuf2_instr);
`endif
end
end
endgenerate
3797,9 → 3966,9
.fetchbuf0_insln(fetchbuf0_insln),
.fetchbuf1_insln(fetchbuf1_insln),
.fetchbuf2_insln(fetchbuf2_insln),
.codebuf0(codebuf[insn0[13:8]]),
.codebuf1(codebuf[insn1[13:8]]),
.codebuf2(codebuf[insn2[13:8]]),
.codebuf0(codebuf[insn0[21:16]]),
.codebuf1(codebuf[insn1[21:16]]),
.codebuf2(codebuf[insn2[21:16]]),
.btgtA(btgtA),
.btgtB(btgtB),
.btgtC(btgtC),
3874,8 → 4043,8
.fetchbuf1_v(fetchbuf1_v),
.fetchbuf0_insln(fetchbuf0_insln),
.fetchbuf1_insln(fetchbuf1_insln),
.codebuf0(codebuf[insn0[13:8]]),
.codebuf1(codebuf[insn1[13:8]]),
.codebuf0(codebuf[insn0[21:16]]),
.codebuf1(codebuf[insn1[21:16]]),
.btgtA(btgtA),
.btgtB(btgtB),
.btgtC(btgtC),
3931,7 → 4100,7
.fetchbuf0_v(fetchbuf0_v),
.fetchbuf0_insln(fetchbuf0_insln),
.fetchbuf0_pbyte(fetchbuf0_pbyte),
.codebuf0(codebuf[insn0[13:8]]),
.codebuf0(codebuf[insn0[21:16]]),
.btgtA(btgtA),
.btgtB(btgtB),
.nop_fetchbuf(nop_fetchbuf),
3945,8 → 4114,6
end
endgenerate
 
// Stores might exception so we don't want the heads to advance if a subsequent
// instruction is store even though there's no target register.
wire cmt_head1 = (!iqentry_rfw[heads[1]] && !iqentry_oddball[heads[1]] && ~|iqentry_exc[heads[1]]);
wire cmt_head2 = (!iqentry_rfw[heads[2]] && !iqentry_oddball[heads[2]] && ~|iqentry_exc[heads[2]]);
 
4095,8 → 4262,25
 
always @*
for (n = 0; n < QENTRIES; n = n + 1)
`ifdef SUPPORT_PREDICATION
iqentry_livetarget[n] = {PREGS {iqentry_v[n]}} & {PREGS {~iqentry_stomp[n] && iqentry_thrd[n]==branchmiss_thrd}} & iq_out[n] & ~{PREGS{iqentry_cmp[n]}};
`else
iqentry_livetarget[n] = {PREGS {iqentry_v[n]}} & {PREGS {~iqentry_stomp[n] && iqentry_thrd[n]==branchmiss_thrd}} & iq_out[n];
`endif
 
`ifdef SUPPORT_PREDICATION
always @*
for (j = 1; j < 16; j = j + 1) begin
plivetarget[j] = 1'b0;
for (n = 0; n < QENTRIES; n = n + 1)
plivetarget[j] = plivetarget[j] | iqentry_plivetarget[n][j];
end
 
always @*
for (n = 0; n < QENTRIES; n = n + 1)
iqentry_plivetarget[n] = {16 {iqentry_v[n]}} & {16 {~iqentry_stomp[n] && iqentry_thrd[n]==branchmiss_thrd}} & iq_out[n] & {16{iqentry_cmp[n]}};
`endif
 
//
// BRANCH-MISS LOGIC: latestID
//
4123,6 → 4307,29
for (n = 0; n < QENTRIES; n = n + 1)
iqentry_source[n] = | iqentry_latestID[n];
 
`ifdef SUPPORT_PREDICATION
always @*
for (n = 0; n < QENTRIES; n = n + 1) begin
iqentry_pcumulative[n] = 1'b0;
for (j = n; j < n + QENTRIES; j = j + 1) begin
if (missid==(j % QENTRIES))
for (k = n; k <= j; k = k + 1)
iqentry_pcumulative[n] = iqentry_pcumulative[n] | iqentry_plivetarget[k % QENTRIES];
end
end
 
always @*
for (n = 0; n < QENTRIES; n = n + 1)
iqentry_platestID[n] = (missid == n || ((iqentry_plivetarget[n] & iqentry_pcumulative[(n+1)%QENTRIES]) == {16{1'b0}}))
? iqentry_plivetarget[n]
: {16{1'b0}};
 
always @*
for (n = 0; n < QENTRIES; n = n + 1)
iqentry_psource[n] = | iqentry_platestID[n];
 
`endif
 
reg vqueued2;
assign Ra0 = fnRa(fetchbuf0_instr,vqe0,vl,fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0};
assign Rb0 = fnRb(fetchbuf0_instr,1'b0,vqe0,rfoa0[5:0],rfoa1[5:0],fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0};
4469,10 → 4676,10
always @*
begin
for (n = 0; n < QENTRIES; n = n + 1) begin
iqentry_v[n] = iqentry_state[n] != IQS_INVALID;
iqentry_done[n] = iqentry_state[n]==IQS_DONE || iqentry_state[n]==IQS_CMT;
iqentry_out[n] = iqentry_state[n]==IQS_OUT;
iqentry_agen[n] = iqentry_state[n]==IQS_AGEN;
iqentry_v[n] <= iqentry_state[n] != IQS_INVALID;
iqentry_done[n] <= iqentry_state[n]==IQS_DONE || iqentry_state[n]==IQS_CMT;
iqentry_out[n] <= iqentry_state[n]==IQS_OUT;
iqentry_agen[n] <= iqentry_state[n]==IQS_AGEN;
end
end
 
5282,7 → 5489,7
last_issue1 = QENTRIES;
last_issue2 = QENTRIES;
for (n = 0; n < QENTRIES; n = n + 1)
if (~iqentry_stomp[heads[n]] && iqentry_memissue[heads[n]] && !iqentry_done[heads[n]] && iqentry_v[heads[n]]) begin
if (~iqentry_stomp[heads[n]] && iqentry_memissue[heads[n]]) begin
if (mem1_available && dram0 == `DRAMSLOT_AVAIL) begin
last_issue0 = heads[n];
end
5558,6 → 5765,9
`CSR_SBU: csr_r <= sbu;
`CSR_ENU: csr_r <= en;
`endif
`ifdef SUPPORT_PREDICATION
`CSR_PREGS: read_pregs(csr_r);
`endif
`CSR_Q_CTR: csr_r <= iq_ctr;
`CSR_BM_CTR: csr_r <= bm_ctr;
`CSR_ICL_CTR: csr_r <= icl_ctr;
5644,6 → 5854,33
.exv_o(exv_i),
.wrv_o(wrv_i),
.rdv_o(rdv_i)
`ifdef SUPPORT_SEGMENTATION
,
.zs_base(zsx_base),
.ds_base(dsx_base),
.es_base(esx_base),
.fs_base(fsx_base),
.gs_base(gsx_base),
.hs_base(hsx_base),
.ss_base(ssx_base),
.cs_base(csx_base),
.zsub(zsub),
.dsub(dsub),
.esub(esub),
.fsub(fsub),
.gsub(gsub),
.hsub(hsub),
.ssub(ssub),
.csub(csub),
.zslb(zslb),
.dslb(dslb),
.eslb(eslb),
.fslb(fslb),
.gslb(gslb),
.hslb(hslb),
.sslb(sslb),
.cslb(cslb)
`endif
`ifdef SUPPORT_BBMS
.pb(dl==2'b00 ? 64'd0 : pb),
.cbl(cbl),
5698,6 → 5935,33
.exv_o(),
.wrv_o(),
.rdv_o()
`ifdef SUPPORT_SEGMENTATION
,
.zs_base(zsx_base),
.ds_base(dsx_base),
.es_base(esx_base),
.fs_base(fsx_base),
.gs_base(gsx_base),
.hs_base(hsx_base),
.ss_base(ssx_base),
.cs_base(csx_base),
.zsub(zsub),
.dsub(dsub),
.esub(esub),
.fsub(fsub),
.gsub(gsub),
.hsub(hsub),
.ssub(ssub),
.csub(csub),
.zslb(zslb),
.dslb(dslb),
.eslb(eslb),
.fslb(fslb),
.gslb(gslb),
.hslb(hslb),
.sslb(sslb),
.cslb(cslb)
`endif
`ifdef SUPPORT_BBMS
.pb(dl==2'b00 ? 64'd0 : pb),
.cbl(cbl),
5836,6 → 6100,10
begin
fcu_exc <= `FLT_NONE;
casez(fcu_instr[`INSTRUCTION_OP])
`ifdef SUPPORT_SEGMENTATION
`LDCS: fcu_exc <= fcu_instr[31:8] != fcu_pc[63:40] ? `FLT_CS : `FLT_NONE;
`RET: fcu_exc <= fcu_argB[63:40] != fcu_pc[63:40] ? `FLT_RET : `FLT_NONE;
`endif
`ifdef SUPPORT_BBMS
`LFCS: fcu_exc <= currentCSSelector != fcu_instr[31:8] ? `FLT_CS : `FLT_NONE;
`RET: fcu_exc <= fcu_argB[63:40] != currentCSSelector ? `FLT_RET : `FLT_NONE;
5849,8 → 6117,6
`OL_USER: fcu_exc <= `FLT_PRIV;
default: ;
endcase
// Could have long branches exceptioning and unimplmented in the fetch stage.
// `BBc: fcu_exc <= fcu_instr[6] ? `FLT_BRN : `FLT_NONE;
default: fcu_exc <= `FLT_NONE;
endcase
end
5882,7 → 6148,7
always @*
begin
case(fcu_instr[`INSTRUCTION_OP])
`R2: fcu_misspc = fcu_epc; // RTI (we don't bother fully decoding this as it's the only R2)
`R2: fcu_misspc = fcu_argB; // RTI (we don't bother fully decoding this as it's the only R2)
`RET: fcu_misspc = fcu_argB;
`REX: fcu_misspc = fcu_bus;
`BRK: fcu_misspc = {tvec[0][AMSB:8], 1'b0, olm, 5'h0};
5889,7 → 6155,7
`JAL: fcu_misspc = fcu_argA + fcu_argI;
//`CHK: fcu_misspc = fcu_nextpc + fcu_argI; // Handled as an instruction exception
// Default: branch
default: fcu_misspc = fcu_pt ? fcu_nextpc : {fcu_pc[AMSB:32],fcu_pc[31:0] + fcu_brdisp[31:0]};
default: fcu_misspc = fcu_takb ? {fcu_pc[31:8] + fcu_brdisp[31:8],fcu_brdisp[7:0]} : fcu_nextpc;
endcase
fcu_misspc[0] = 1'b0;
end
6198,7 → 6464,7
end
end
else
queuedNop <= TRUE;
queuedNop <= TRUE;
end
else if (fetchbuf1_v && fetchbuf1_thrd != branchmiss_thrd) begin
if (fetchbuf1_instr[`INSTRUCTION_OP]!=`NOP) begin
6245,9 → 6511,9
end
 
wire writing_wb =
(mem1_available && dram0==`DRAMSLOT_BUSY && dram0_store && wbptr<`WB_DEPTH-1)
|| (mem2_available && dram1==`DRAMSLOT_BUSY && dram1_store && `NUM_MEM > 1 && wbptr<`WB_DEPTH-1)
|| (mem3_available && dram2==`DRAMSLOT_BUSY && dram2_store && `NUM_MEM > 2 && wbptr<`WB_DEPTH-1)
(mem1_available && dram0==`DRAMSLOT_BUSY && dram0_store && !iqentry_stomp[dram0_id[`QBITS]] && wbptr<`WB_DEPTH-1)
|| (mem2_available && dram1==`DRAMSLOT_BUSY && dram1_store && !iqentry_stomp[dram1_id[`QBITS]] && `NUM_MEM > 1 && wbptr<`WB_DEPTH-1)
|| (mem3_available && dram2==`DRAMSLOT_BUSY && dram2_store && !iqentry_stomp[dram2_id[`QBITS]] && `NUM_MEM > 2 && wbptr<`WB_DEPTH-1)
;
 
// Monster clock domain.
6273,10 → 6539,10
wire [63:0] ralu1_bus = |alu1_exc ? {4{lfsro}} : alu1_bus;
wire [63:0] rfpu1_bus = |fpu1_exc ? {4{lfsro}} : fpu1_bus;
wire [63:0] rfpu2_bus = |fpu2_exc ? {4{lfsro}} : fpu2_bus;
wire [63:0] rfcu_bus = |fcu_exc ? {4{lfsro}} : fcu_bus;
wire [63:0] rdramA_bus = dramA_bus;
wire [63:0] rdramB_bus = dramB_bus;
wire [63:0] rdramC_bus = dramC_bus;
wire [63:0] rfcu_bus = |fcu_exc ? {4{lfsro}} : fcu_bus;
wire [63:0] rdramA_bus = |dramA_exc ? {4{lfsro}} : dramA_bus;
wire [63:0] rdramB_bus = |dramB_exc ? {4{lfsro}} : dramB_bus;
wire [63:0] rdramC_bus = |dramC_exc ? {4{lfsro}} : dramC_bus;
 
always @(posedge clk)
if (rst) begin
6348,6 → 6614,10
iqentry_a1_s[n] <= 5'd0;
iqentry_a2_s[n] <= 5'd0;
iqentry_a3_s[n] <= 5'd0;
`ifdef SUPPORT_PREDICATION
iqentry_aT[n] <= 64'd0;
iqentry_aT_s[n] <= 1'd0;
`endif
iqentry_canex[n] <= FALSE;
end
bwhich <= 2'b00;
6364,7 → 6634,6
dram1_id <= 1'b0;
dram2_id <= 1'b0;
L1_adr <= RSTPC;
L2_adr <= RSTPC;
invic <= FALSE;
tail0 <= 3'd0;
tail1 <= 3'd1;
6402,7 → 6671,6
`endif
fcu_dataready <= 0;
fcu_instr <= `NOP_INSN;
fcu_call <= 1'b0;
dramA_v <= 0;
dramB_v <= 0;
dramC_v <= 0;
6423,7 → 6691,6
cr_o <= `LOW;
vadr <= RSTPC;
icl_o <= `LOW; // instruction cache load
L1_dati <= 306'd0;
cr0 <= 64'd0;
cr0[13:8] <= 6'd0; // select compressed instruction group #0
cr0[30] <= TRUE; // enable data caching
6487,7 → 6754,6
iq_ctr <= 40'd0;
icl_ctr <= 40'd0;
bm_ctr <= 40'd0;
br_ctr <= 40'd0;
irq_ctr <= 40'd0;
cmt_timer <= 9'd0;
StoreAck1 <= `FALSE;
7066,40 → 7332,43
fcu_dataready <= `TRUE;
end
 
// If the return segment is not the same as the current code segment then a
// segment load is triggered via the memory unit by setting the iq state to
// AGEN. Otherwise the state is set to CMT which will cause a bypass of the
// segment load from memory.
 
if (fcu_v) begin
fcu_done <= `TRUE;
iqentry_ma [ fcu_id[`QBITS] ] <= fcu_misspc;
iqentry_res [ fcu_id[`QBITS] ] <= rfcu_bus;
iqentry_exc [ fcu_id[`QBITS] ] <= fcu_exc;
// iqentry_done[ fcu_id[`QBITS] ] <= `TRUE;
// iqentry_out [ fcu_id[`QBITS] ] <= `INV;
iqentry_state[fcu_id[`QBITS] ] <= IQS_CMT;
// takb is looked at only for branches to update the predictor. Here it is
// unconditionally set, the value will be ignored if it's not a branch.
iqentry_takb[ fcu_id[`QBITS] ] <= fcu_takb;
br_ctr <= br_ctr + fcu_branch;
fcu_dataready <= `INV;
end
 
// dramX_v only set on a load
if (dramA_v && iqentry_v[ dramA_id[`QBITS] ]) begin
if (mem1_available && dramA_v && iqentry_v[ dramA_id[`QBITS] ]) begin
iqentry_res [ dramA_id[`QBITS] ] <= rdramA_bus;
iqentry_exc [ dramA_id[`QBITS] ] <= dramA_exc;
// iqentry_done[ dramA_id[`QBITS] ] <= `VAL;
// iqentry_out [ dramA_id[`QBITS] ] <= `INV;
iqentry_state[dramA_id[`QBITS] ] <= IQS_CMT;
iqentry_aq [ dramA_id[`QBITS] ] <= `INV;
end
if (`NUM_MEM > 1 && dramB_v && iqentry_v[ dramB_id[`QBITS] ]) begin
if (mem2_available && `NUM_MEM > 1 && dramB_v && iqentry_v[ dramB_id[`QBITS] ]) begin
iqentry_res [ dramB_id[`QBITS] ] <= rdramB_bus;
iqentry_exc [ dramB_id[`QBITS] ] <= dramB_exc;
// iqentry_done[ dramB_id[`QBITS] ] <= `VAL;
iqentry_state[dramB_id[`QBITS] ] <= IQS_CMT;
// iqentry_out [ dramB_id[`QBITS] ] <= `INV;
iqentry_aq [ dramB_id[`QBITS] ] <= `INV;
end
if (`NUM_MEM > 2 && dramC_v && iqentry_v[ dramC_id[`QBITS] ]) begin
if (mem3_available && `NUM_MEM > 2 && dramC_v && iqentry_v[ dramC_id[`QBITS] ]) begin
iqentry_res [ dramC_id[`QBITS] ] <= rdramC_bus;
iqentry_exc [ dramC_id[`QBITS] ] <= dramC_exc;
// iqentry_done[ dramC_id[`QBITS] ] <= `VAL;
iqentry_state[dramC_id[`QBITS] ] <= IQS_CMT;
// iqentry_out [ dramC_id[`QBITS] ] <= `INV;
iqentry_aq [ dramC_id[`QBITS] ] <= `INV;
// if (iqentry_lptr[dram2_id[`QBITS]])
// wbrcd[pcr[5:0]] <= 1'b1;
7386,6 → 7655,21
`else
iqentry_a3[n];
`endif
`ifdef SUPPORT_PREDICATION
fpu1_pred <= iqentry_p_v[n] ? iqentry_pred[n] :
`ifdef FU_BYPASS
(iqentry_p_s[n] == alu0_id) ? alu0nyb[iqentry_preg[n]] :
(iqentry_p_s[n] == alu1_id) ? alu1nyb[iqentry_preg[n]] :
`endif
4'h0;
fpu1_argT <=
`ifdef FU_BYPASS
iqentry_aT_v[n] ? iqentry_aT[n]
: (iqentry_aT_s[n] == alu0_id) ? ralu0_bus : ralu1_bus;
`else
iqentry_aT[n];
`endif
`endif
fpu1_argI <= iqentry_a0[n];
fpu1_dataready <= `VAL;
fpu1_ld <= TRUE;
7426,6 → 7710,21
`else
iqentry_a3[n];
`endif
`ifdef SUPPORT_PREDICATION
fpu2_pred <= iqentry_p_v[n] ? iqentry_pred[n] :
`ifdef FU_BYPASS
(iqentry_p_s[n] == alu0_id) ? alu0nyb[iqentry_preg[n]] :
(iqentry_p_s[n] == alu1_id) ? alu1nyb[iqentry_preg[n]] :
`endif
4'h0;
fpu2_argT <=
`ifdef FU_BYPASS
iqentry_aT_v[n] ? iqentry_aT[n]
: (iqentry_aT_s[n] == alu0_id) ? ralu0_bus : ralu1_bus;
`else
iqentry_aT[n];
`endif
`endif
fpu2_argI <= iqentry_a0[n];
fpu2_dataready <= `VAL;
fpu2_ld <= TRUE;
7443,8 → 7742,8
fcu_pc <= iqentry_pc[n];
fcu_nextpc <= iqentry_pc[n] + iqentry_insln[n];
fcu_pt <= iqentry_pt[n];
fcu_brdisp <= iqentry_instr[n][6] ? {{36{iqentry_instr[n][47]}},iqentry_instr[n][47:23],iqentry_instr[n][17:16],1'b0}
: {{52{iqentry_instr[n][31]}},iqentry_instr[n][31:23],iqentry_instr[n][17:16],1'b0};
fcu_brdisp <= iqentry_instr[n][6] ? {{37{iqentry_instr[n][47]}},iqentry_instr[n][47:23],iqentry_instr[n][17:16]}
: {{53{iqentry_instr[n][31]}},iqentry_instr[n][31:23],iqentry_instr[n][17:16]};
fcu_branch <= iqentry_br[n];
fcu_call <= IsCall(iqentry_instr[n])|iqentry_jal[n];
fcu_jal <= iqentry_jal[n];
7457,14 → 7756,11
: (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus
: ralu1_bus;
`ifdef SUPPORT_SMT
// fcu_argB <= iqentry_rti[n] ? epc0[iqentry_thrd[n]]
fcu_epc <= epc0[iqentry_thrd[n]];
fcu_argB <= iqentry_rti[n] ? epc0[iqentry_thrd[n]]
`else
fcu_epc <= epc0;
// fcu_argB <= iqentry_rti[n] ? epc0
fcu_argB <= iqentry_rti[n] ? epc0
`endif
fcu_argB <=
(iqentry_a2_v[n] ? iqentry_a2[n]
: (iqentry_a2_v[n] ? iqentry_a2[n]
: (iqentry_a2_s[n] == alu0_id) ? ralu0_bus
: (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus
: ralu1_bus);
7475,6 → 7771,21
: ralu1_bus[47:0]);
fcu_argC <= iqentry_a3_v[n] ? iqentry_a3[n]
: (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus;
`ifdef SUPPORT_PREDICATION
fcu_pred <= iqentry_p_v[n] ? iqentry_pred[n] :
`ifdef FU_BYPASS
(iqentry_p_s[n] == alu0_id) ? alu0nyb[iqentry_preg[n]] :
(iqentry_p_s[n] == alu1_id) ? alu1nyb[iqentry_preg[n]] :
`endif
4'h0;
fcu_argT <=
`ifdef FU_BYPASS
iqentry_aT_v[n] ? iqentry_aT[n]
: (iqentry_aT_s[n] == alu0_id) ? ralu0_bus : ralu1_bus;
`else
iqentry_aT[n];
`endif
`endif
fcu_argI <= iqentry_a0[n];
fcu_thrd <= iqentry_thrd[n];
fcu_dataready <= !IsWait(iqentry_instr[n]);
7516,21 → 7827,23
dram2 <= `DRAMSLOT_AVAIL;
 
// grab requests that have finished and put them on the dram_bus
// If stomping on the instruction don't place the value on the argument
// bus to be loaded.
 
if (dram0 == `DRAMREQ_READY && dram0_load) begin
dramA_v <= !iqentry_stomp[dram0_id[`QBITS]];
dramA_v <= `VAL;//!iqentry_stomp[dram0_id[`QBITS]];
dramA_id <= dram0_id;
dramA_exc <= dram0_exc;
dramA_bus <= fnDatiAlign(dram0_instr,dram0_addr,rdat0);
end
if (dram1 == `DRAMREQ_READY && dram1_load && `NUM_MEM > 1) begin
dramB_v <= !iqentry_stomp[dram1_id[`QBITS]];
dramB_v <= `VAL;//!iqentry_stomp[dram1_id[`QBITS]];
dramB_id <= dram1_id;
dramB_exc <= dram1_exc;
dramB_bus <= fnDatiAlign(dram1_instr,dram1_addr,rdat1);
end
if (dram2 == `DRAMREQ_READY && dram2_load && `NUM_MEM > 2) begin
dramC_v <= !iqentry_stomp[dram2_id[`QBITS]];
dramC_v <= `VAL;//!iqentry_stomp[dram2_id[`QBITS]];
dramC_id <= dram2_id;
dramC_exc <= dram2_exc;
dramC_bus <= fnDatiAlign(dram2_instr,dram2_addr,rdat2);
end
 
7544,12 → 7857,13
//
// determine if the instructions ready to issue can, in fact, issue.
// "ready" means that the instruction has valid operands but has not gone yet
for (n = 0; n < QENTRIES; n = n + 1)
if (memissue[n])
iqentry_memissue[n] <= `VAL;
//iqentry_memissue <= memissue;
iqentry_memissue <= memissue;
missue_count <= issue_count;
 
if (dram0 == `DRAMSLOT_AVAIL) dram0_exc <= `FLT_NONE;
if (dram1 == `DRAMSLOT_AVAIL) dram1_exc <= `FLT_NONE;
if (dram2 == `DRAMSLOT_AVAIL) dram2_exc <= `FLT_NONE;
 
for (n = 0; n < QENTRIES; n = n + 1)
if (iqentry_v[n] && iqentry_stomp[n]) begin
iqentry_iv[n] <= `INV;
7561,40 → 7875,32
// iqentry_out[n] <= `INV;
// iqentry_done[n] <= `INV;
// iqentry_cmt[n] <= `INV;
if (dram0_id[`QBITS] == n[`QBITS]) begin
if (dram0==`DRAMSLOT_HASBUS)
wb_nack();
dram0_load <= `FALSE;
dram0_store <= `FALSE;
dram0_rmw <= `FALSE;
dram0 <= `DRAMSLOT_AVAIL;
end
if (dram1_id[`QBITS] == n[`QBITS]) begin
if (dram1==`DRAMSLOT_HASBUS)
wb_nack();
dram1_load <= `FALSE;
dram1_store <= `FALSE;
dram1_rmw <= `FALSE;
dram1 <= `DRAMSLOT_AVAIL;
end
if (dram2_id[`QBITS] == n[`QBITS]) begin
if (dram2==`DRAMSLOT_HASBUS)
wb_nack();
dram2_load <= `FALSE;
dram2_store <= `FALSE;
dram2_rmw <= `FALSE;
dram2 <= `DRAMSLOT_AVAIL;
end
end
 
// A store can't be stomped on, because a store won't issue unless there are
// no instructions that could change the flow of execution before it. Meaning
// stomp would never be true for a store.
// A load could be stomped on, but the memory access is allowed to complete
// to ensure the bus acknowledge doesn't get out of sync.
/*
if (iqentry_stomp[dram0_id[`QBITS]]) begin
if (dram0==`DRAMSLOT_HASBUS)
wb_nack();
dram0_load <= `FALSE;
dram0_store <= `FALSE;
dram0_rmw <= `FALSE;
dram0 <= `DRAMSLOT_AVAIL;
end
if (iqentry_stomp[dram1_id[`QBITS]]) begin
if (dram1==`DRAMSLOT_HASBUS)
wb_nack();
dram1_load <= `FALSE;
dram1_store <= `FALSE;
dram1_rmw <= `FALSE;
dram1 <= `DRAMSLOT_AVAIL;
end
if (iqentry_stomp[dram2_id[`QBITS]]) begin
if (dram2==`DRAMSLOT_HASBUS)
wb_nack();
dram2_load <= `FALSE;
dram2_store <= `FALSE;
dram2_rmw <= `FALSE;
dram2 <= `DRAMSLOT_AVAIL;
end
*/
 
if (last_issue0 < QENTRIES)
tDram0Issue(last_issue0);
if (last_issue1 < QENTRIES)
7614,7 → 7920,7
else
cmt_timer <= 12'd0;
 
if (cmt_timer==12'd1000 && icstate==IDLE) begin
if (cmt_timer==12'd1000) begin
iqentry_state[heads[0]] <= IQS_CMT;
iqentry_exc[heads[0]] <= `FLT_CMT;
cmt_timer <= 12'd0;
7749,7 → 8055,6
L1_wr0 <= FALSE;
L1_wr1 <= FALSE;
L1_wr2 <= FALSE;
L1_en <= 10'h000;
L1_invline <= FALSE;
icnxt <= FALSE;
L2_nxt <= FALSE;
7814,30 → 8119,34
// L2 cache.
IC_WaitL2:
if (ihitL2 && picstate==IC3a) begin
L1_en <= 10'h3FF;
L1_en <= 9'h1FF;
L1_wr0 <= TRUE;
L1_wr1 <= TRUE && `WAYS > 1;
L1_wr2 <= TRUE && `WAYS > 2;
// L1_adr <= L2_adr;
// L1_dati is loaded dring an L2 icache load operation
// if (picstate==IC3a)
L1_dati <= L2_dato;
L1_adr <= L2_adr;
L2_rdat <= L2_dato;
icstate <= IC5;
end
else if (bstate!=B_ICacheNack)
;
else begin
L1_en <= 10'h3FF;
L1_en <= 9'h1FF;
L1_wr0 <= TRUE;
L1_wr1 <= TRUE && `WAYS > 1;
L1_wr2 <= TRUE && `WAYS > 2;
// L1_adr <= L2_adr;
// L1_dati set below while loading cache line
//L1_dati <= L2_dato;
L1_adr <= L2_adr;
// L2_rdat set below while loading cache line
//L2_rdat <= L2_dato;
icstate <= IC5;
end
 
IC5: icstate <= IC6;
IC5:
begin
L1_en <= 9'h000;
L1_wr0 <= FALSE;
L1_wr1 <= FALSE;
L1_wr2 <= FALSE;
icstate <= IC6;
end
IC6: icstate <= IC7;
IC7: icstate <= IC_Next;
IC_Next:
7848,7 → 8157,7
default: icstate <= IDLE;
endcase
 
if (dram0_load)
if (mem1_available && dram0_load)
case(dram0)
`DRAMSLOT_AVAIL: ;
`DRAMSLOT_BUSY:
7873,22 → 8182,22
// dram0_load <= `FALSE;
// end
3'd4:
// if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) begin
if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) begin
if (dhit0)
dram0 <= `DRAMREQ_READY;
else
dram0 <= `DRAMSLOT_REQBUS;
// end
// else begin
// dram0 <= `DRAMSLOT_AVAIL;
// dram0_load <= `FALSE;
// end
end
else begin
dram0 <= `DRAMSLOT_AVAIL;
dram0_load <= `FALSE;
end
`DRAMSLOT_REQBUS: ;
`DRAMSLOT_HASBUS: ;
`DRAMREQ_READY: dram0 <= `DRAMSLOT_AVAIL;
endcase
 
if (dram1_load && `NUM_MEM > 1)
if (mem2_available && dram1_load && `NUM_MEM > 1)
case(dram1)
`DRAMSLOT_AVAIL: ;
`DRAMSLOT_BUSY:
7898,22 → 8207,22
3'd3:
dram1 <= dram1 + 3'd1;
3'd4:
// if (iqentry_v[dram1_id[`QBITS]] && !iqentry_stomp[dram1_id[`QBITS]]) begin
if (iqentry_v[dram1_id[`QBITS]] && !iqentry_stomp[dram1_id[`QBITS]]) begin
if (dhit1)
dram1 <= `DRAMREQ_READY;
else
dram1 <= `DRAMSLOT_REQBUS;
// end
/* else begin
end
else begin
dram1 <= `DRAMSLOT_AVAIL;
dram1_load <= `FALSE;
end*/
end
`DRAMSLOT_REQBUS: ;
`DRAMSLOT_HASBUS: ;
`DRAMREQ_READY: dram1 <= `DRAMSLOT_AVAIL;
endcase
 
if (dram2_load && `NUM_MEM > 2)
if (mem3_available && dram2_load && `NUM_MEM > 2)
case(dram2)
`DRAMSLOT_AVAIL: ;
`DRAMSLOT_BUSY:
7923,16 → 8232,16
3'd3:
dram2 <= dram2 + 3'd1;
3'd4:
// if (iqentry_v[dram2_id[`QBITS]] && !iqentry_stomp[dram2_id[`QBITS]]) begin
if (iqentry_v[dram2_id[`QBITS]] && !iqentry_stomp[dram2_id[`QBITS]]) begin
if (dhit2)
dram2 <= `DRAMREQ_READY;
else
dram2 <= `DRAMSLOT_REQBUS;
// end
/* else begin
end
else begin
dram2 <= `DRAMSLOT_AVAIL;
dram2_load <= `FALSE;
end*/
end
`DRAMSLOT_REQBUS: ;
`DRAMSLOT_HASBUS: ;
`DRAMREQ_READY: dram2 <= `DRAMSLOT_AVAIL;
7956,16 → 8265,20
end
if (mem2_available && `NUM_MEM > 1 && dram1 == `DRAMSLOT_BUSY && dram1_store) begin
if ((alu0_v && (dram1_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram1_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE;
// iqentry_done[ dram1_id[`QBITS] ] <= `VAL;
// iqentry_out[ dram1_id[`QBITS] ] <= `INV;
iqentry_state[ dram1_id[`QBITS] ] <= IQS_DONE;
end
if (mem3_available && `NUM_MEM > 2 && dram2 == `DRAMSLOT_BUSY && dram2_store) begin
if ((alu0_v && (dram2_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram2_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE;
// iqentry_done[ dram2_id[`QBITS] ] <= `VAL;
// iqentry_out[ dram2_id[`QBITS] ] <= `INV;
iqentry_state[ dram2_id[`QBITS] ] <= IQS_DONE;
end
`endif
 
`ifdef HAS_WB
if (dram0==`DRAMSLOT_BUSY && dram0_store) begin
if (mem1_available && dram0==`DRAMSLOT_BUSY && dram0_store && !iqentry_stomp[dram0_id[`QBITS]]) begin
if (wbptr<`WB_DEPTH-1) begin
dram0 <= `DRAMSLOT_AVAIL;
dram0_instr[`INSTRUCTION_OP] <= `NOP;
7982,7 → 8295,7
iqentry_state[ dram0_id[`QBITS] ] <= IQS_DONE;
end
end
else if (dram1==`DRAMSLOT_BUSY && dram1_store && `NUM_MEM > 1) begin
else if (mem2_available && dram1==`DRAMSLOT_BUSY && dram1_store && !iqentry_stomp[dram1_id[`QBITS]] && `NUM_MEM > 1) begin
if (wbptr<`WB_DEPTH-1) begin
dram1 <= `DRAMSLOT_AVAIL;
dram1_instr[`INSTRUCTION_OP] <= `NOP;
7997,7 → 8310,7
iqentry_state[ dram1_id[`QBITS] ] <= IQS_DONE;
end
end
else if (dram2==`DRAMSLOT_BUSY && dram2_store && `NUM_MEM > 2) begin
else if (mem3_available && dram2==`DRAMSLOT_BUSY && dram2_store && !iqentry_stomp[dram2_id[`QBITS]] && `NUM_MEM > 2) begin
if (wbptr<`WB_DEPTH-1) begin
dram2 <= `DRAMSLOT_AVAIL;
dram2_instr[`INSTRUCTION_OP] <= `NOP;
8058,13 → 8371,13
end
 
`endif
if (~|wb_v && dram0==`DRAMSLOT_BUSY && dram0_rmw) begin
if (~|wb_v && mem1_available && dram0==`DRAMSLOT_BUSY && dram0_rmw) begin
`ifdef SUPPORT_DBG
if (dbg_smatch0|dbg_lmatch0) begin
dramA_v <= `TRUE;
dramA_id <= dram0_id;
dramA_exc <= `FLT_DBG;
dramA_bus <= 64'h0;
iqentry_exc[dram0_id[`QBITS]] <= `FLT_DBG;
dram0 <= `DRAMSLOT_AVAIL;
end
else
8086,13 → 8399,13
bstate <= B12;
end
end
else if (~|wb_v && dram1==`DRAMSLOT_BUSY && dram1_rmw && `NUM_MEM > 1) begin
else if (~|wb_v && mem2_available && dram1==`DRAMSLOT_BUSY && dram1_rmw && `NUM_MEM > 1) begin
`ifdef SUPPORT_DBG
if (dbg_smatch1|dbg_lmatch1) begin
dramB_v <= `TRUE;
dramB_id <= dram1_id;
dramB_exc <= `FLT_DBG;
dramB_bus <= 64'h0;
iqentry_exc[dram1_id[`QBITS]] <= `FLT_DBG;
dram1 <= `DRAMSLOT_AVAIL;
end
else
8114,13 → 8427,13
bstate <= B12;
end
end
else if (~|wb_v && dram2==`DRAMSLOT_BUSY && dram2_rmw && `NUM_MEM > 2) begin
else if (~|wb_v && mem3_available && dram2==`DRAMSLOT_BUSY && dram2_rmw && `NUM_MEM > 2) begin
`ifdef SUPPORT_DBG
if (dbg_smatch2|dbg_lmatch2) begin
dramC_v <= `TRUE;
dramC_id <= dram2_id;
dramC_exc <= `FLT_DBG;
dramC_bus <= 64'h0;
iqentry_exc[dram2_id[`QBITS]] <= `FLT_DBG;
dram2 <= `DRAMSLOT_AVAIL;
end
else
8144,13 → 8457,13
end
`ifndef HAS_WB
// Check write buffer enable ?
else if (dram0==`DRAMSLOT_BUSY && dram0_store) begin
else if (mem1_available && dram0==`DRAMSLOT_BUSY && dram0_store) begin
`ifdef SUPPORT_DBG
if (dbg_smatch0) begin
dramA_v <= `TRUE;
dramA_id <= dram0_id;
dramA_exc <= `FLT_DBG;
dramA_bus <= 64'h0;
iqentry_exc[dram0_id[`QBITS]] <= `FLT_DBG;
dram0 <= `DRAMSLOT_AVAIL;
end
else
8162,7 → 8475,6
dram0_instr[`INSTRUCTION_OP] <= `NOP;
cyc <= `HIGH;
stb_o <= `HIGH;
we <= `HIGH;
sel_o <= fnSelect(dram0_instr,dram0_addr);
vadr <= dram0_addr;
dat_o <= fnDato(dram0_instr,dram0_data);
8173,13 → 8485,13
// cr_o <= IsSWC(dram0_instr);
end
end
else if (dram1==`DRAMSLOT_BUSY && dram1_store && `NUM_MEM > 1) begin
else if (mem2_available && dram1==`DRAMSLOT_BUSY && dram1_store && `NUM_MEM > 1) begin
`ifdef SUPPORT_DBG
if (dbg_smatch1) begin
dramB_v <= `TRUE;
dramB_id <= dram1_id;
dramB_exc <= `FLT_DBG;
dramB_bus <= 64'h0;
iqentry_exc[dram1_id[`QBITS]] <= `FLT_DBG;
dram1 <= `DRAMSLOT_AVAIL;
end
else
8191,7 → 8503,6
dram1_instr[`INSTRUCTION_OP] <= `NOP;
cyc <= `HIGH;
stb_o <= `HIGH;
we <= `HIGH;
sel_o <= fnSelect(dram1_instr,dram1_addr);
vadr <= dram1_addr;
dat_o <= fnDato(dram1_instr,dram1_data);
8202,13 → 8513,13
// cr_o <= IsSWC(dram0_instr);
end
end
else if (dram2==`DRAMSLOT_BUSY && dram2_store && `NUM_MEM > 2) begin
else if (mem3_available && dram2==`DRAMSLOT_BUSY && dram2_store && `NUM_MEM > 2) begin
`ifdef SUPPORT_DBG
if (dbg_smatch2) begin
dramC_v <= `TRUE;
dramC_id <= dram2_id;
dramC_exc <= `FLT_DBG;
dramC_bus <= 64'h0;
iqentry_exc[dram2_id[`QBITS]] <= `FLT_DBG;
dram2 <= `DRAMSLOT_AVAIL;
end
else
8220,7 → 8531,6
dram2_instr[`INSTRUCTION_OP] <= `NOP;
cyc <= `HIGH;
stb_o <= `HIGH;
we <= `HIGH;
sel_o <= fnSelect(dram2_instr,dram2_addr);
vadr <= dram2_addr;
dat_o <= fnDato(dram2_instr,dram2_data);
8233,13 → 8543,13
end
`endif
// Check for read misses on the data cache
else if (~|wb_v && !dram0_unc && dram0==`DRAMSLOT_REQBUS && dram0_load) begin
else if (~|wb_v && mem1_available && !dram0_unc && dram0==`DRAMSLOT_REQBUS && dram0_load) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch0) begin
dramA_v <= `TRUE;
dramA_id <= dram0_id;
dramA_exc <= `FLT_DBG;
dramA_bus <= 64'h0;
iqentry_exc[dram0_id[`QBITS]] <= `FLT_DBG;
dram0 <= `DRAMSLOT_AVAIL;
end
else
8251,13 → 8561,13
bstate <= B_DCacheLoadStart;
end
end
else if (~|wb_v && !dram1_unc && dram1==`DRAMSLOT_REQBUS && dram1_load && `NUM_MEM > 1) begin
else if (~|wb_v && mem2_available && !dram1_unc && dram1==`DRAMSLOT_REQBUS && dram1_load && `NUM_MEM > 1) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch1) begin
dramB_v <= `TRUE;
dramB_id <= dram1_id;
dramB_exc <= `FLT_DBG;
dramB_bus <= 64'h0;
iqentry_exc[dram1_id[`QBITS]] <= `FLT_DBG;
dram1 <= `DRAMSLOT_AVAIL;
end
else
8269,13 → 8579,13
bstate <= B_DCacheLoadStart;
end
end
else if (~|wb_v && !dram2_unc && dram2==`DRAMSLOT_REQBUS && dram2_load && `NUM_MEM > 2) begin
else if (~|wb_v && mem3_available && !dram2_unc && dram2==`DRAMSLOT_REQBUS && dram2_load && `NUM_MEM > 2) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch2) begin
dramC_v <= `TRUE;
dramC_id <= dram2_id;
dramC_exc <= `FLT_DBG;
dramC_bus <= 64'h0;
iqentry_exc[dram2_id[`QBITS]] <= `FLT_DBG;
dram2 <= `DRAMSLOT_AVAIL;
end
else
8287,13 → 8597,13
bstate <= B_DCacheLoadStart;
end
end
else if (~|wb_v && dram0_unc && dram0==`DRAMSLOT_BUSY && dram0_load) begin
else if (~|wb_v && mem1_available && dram0_unc && dram0==`DRAMSLOT_BUSY && dram0_load) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch0) begin
dramA_v <= `TRUE;
dramA_id <= dram0_id;
dramA_exc <= `FLT_DBG;
dramA_bus <= 64'h0;
iqentry_exc[dram0_id[`QBITS]] <= `FLT_DBG;
dram0 <= `DRAMSLOT_AVAIL;
end
else
8306,17 → 8616,16
vadr <= {dram0_addr[AMSB:3],3'b0};
sr_o <= IsLWR(dram0_instr);
ol_o <= dram0_ol;
dccnt <= 2'd0;
bstate <= B_DLoadAck;
end
end
else if (~|wb_v && dram1_unc && dram1==`DRAMSLOT_BUSY && dram1_load && `NUM_MEM > 1) begin
else if (~|wb_v && mem2_available && dram1_unc && dram1==`DRAMSLOT_BUSY && dram1_load && `NUM_MEM > 1) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch1) begin
dramB_v <= `TRUE;
dramB_id <= dram1_id;
dramB_exc <= `FLT_DBG;
dramB_bus <= 64'h0;
iqentry_exc[dram1_id[`QBITS]] <= `FLT_DBG;
dram1 <= `DRAMSLOT_AVAIL;
end
else
8329,17 → 8638,16
vadr <= {dram1_addr[AMSB:3],3'b0};
sr_o <= IsLWR(dram1_instr);
ol_o <= dram1_ol;
dccnt <= 2'd0;
bstate <= B_DLoadAck;
end
end
else if (~|wb_v && dram2_unc && dram2==`DRAMSLOT_BUSY && dram2_load && `NUM_MEM > 2) begin
else if (~|wb_v && mem3_available && dram2_unc && dram2==`DRAMSLOT_BUSY && dram2_load && `NUM_MEM > 2) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch2) begin
dramC_v <= `TRUE;
dramC_id <= dram2_id;
dramC_exc <= `FLT_DBG;
dramC_bus <= 64'h0;
iqentry_exc[dram2_id[`QBITS]] <= `FLT_DBG;
dram2 <= 2'd0;
end
else
8352,13 → 8660,11
vadr <= {dram2_addr[AMSB:3],3'b0};
sr_o <= IsLWR(dram2_instr);
ol_o <= dram2_ol;
dccnt <= 2'd0;
bstate <= B_DLoadAck;
end
end
// Check for L2 cache miss
else if (~|wb_v && !ihitL2 && !acki)
begin
else if (~|wb_v && !ihitL2 && !acki) begin
cti_o <= 3'b001;
bte_o <= 2'b00;//2'b01; // 4 beat burst wrap
cyc <= `HIGH;
8366,14 → 8672,10
sel_o <= 8'hFF;
icl_o <= `HIGH;
iccnt <= 3'd0;
icack <= 1'b0;
// adr_o <= icwhich ? {pc0[31:5],5'b0} : {pc1[31:5],5'b0};
// L2_adr <= icwhich ? {pc0[31:5],5'b0} : {pc1[31:5],5'b0};
vadr <= {pcr[7:0],L1_adr[AMSB:5],5'h0};
`ifdef SUPPORT_SMT
`else
ol_o <= ol;//???
`endif
ol_o <= ol[0];
L2_adr <= {pcr[7:0],L1_adr[AMSB:5],5'h0};
L2_xsel <= 1'b0;
bstate <= B_ICacheAck;
8406,7 → 8708,7
wb_v <= 1'b0; // Invalidate write buffer if there is a problem with the store
wb_en <= `FALSE; // and disable write buffer
end
iqentry_state[n] <= IQS_CMT;
iqentry_state[n] <= IQS_CMT;
iqentry_aq[n] <= `INV;
end
end
8436,7 → 8738,7
default: ;
endcase
`endif
bstate <= B_LSNAck;
bstate <= B19;
end
end
 
8476,7 → 8778,7
 
// Data cache load terminal state
B_DCacheLoadAck:
if (acki|err_i|tlb_miss|rdv_i) begin
if (ack_i|err_i|tlb_miss|rdv_i) begin
if (!bok_i) begin
stb_o <= `LOW;
bstate <= B_DCacheLoadStb;
8485,13 → 8787,20
rdvq <= rdvq | rdv_i;
if (!preload) // A preload instruction ignores any error
case(bwhich)
2'd0: iqentry_exc[dram0_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE;
2'd1: iqentry_exc[dram1_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE;
2'd2: iqentry_exc[dram2_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE;
2'd0: if (err_i|rdv_i|tlb_miss) begin
iqentry_exc[dram0_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : `FLT_DRF;
end
2'd1: if ((err_i|rdv_i|tlb_miss) && `NUM_MEM > 1) begin
iqentry_exc[dram1_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : `FLT_DRF;
end
2'd2: if ((err_i|rdv_i|tlb_miss) && `NUM_MEM > 2) begin
iqentry_exc[dram2_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : `FLT_DRF;
end
default: ;
endcase
dccnt <= dccnt + 2'd1;
vadr[4:3] <= vadr[4:3] + 2'd1;
bstate <= B_DCacheLoadAck;
if (dccnt==2'd2)
cti_o <= 3'b111;
if (dccnt==2'd3) begin
8499,6 → 8808,7
bstate <= B_DCacheLoadWait1;
end
end
 
B_DCacheLoadStb:
begin
stb_o <= `HIGH;
8507,46 → 8817,44
B_DCacheLoadWait1: bstate <= B_DCacheLoadWait2;
B_DCacheLoadWait2: bstate <= B_DCacheLoadResetBusy;
//B_DCacheLoadWait3: bstate <= B_DCacheLoadResetBusy;
// There could be more than one memory cycle active. We reset the state
// of all the machines to retest for a hit because otherwise sequential
// loading of memory will cause successive machines to miss resulting in
// multiple dcache loads that aren't needed.
B_DCacheLoadResetBusy:
begin
B_DCacheLoadResetBusy: begin
// There could be more than one memory cycle active. We reset the state
// of all the machines to retest for a hit because otherwise sequential
// loading of memory will cause successive machines to miss resulting in
// multiple dcache loads that aren't needed.
if (dram0 != `DRAMSLOT_AVAIL && dram0_addr[AMSB:5]==vadr[AMSB:5]) dram0 <= `DRAMSLOT_BUSY; // causes retest of dhit
if (dram1 != `DRAMSLOT_AVAIL && dram1_addr[AMSB:5]==vadr[AMSB:5]) dram1 <= `DRAMSLOT_BUSY;
if (dram2 != `DRAMSLOT_AVAIL && dram2_addr[AMSB:5]==vadr[AMSB:5]) dram2 <= `DRAMSLOT_BUSY;
bstate <= BIDLE;
end
if (~ack_i) bstate <= BIDLE;
end
 
// Ack state for instruction cache load
// Once the first ack is received in burst mode, further acks are not necessary
// as the core counts the number of data items. Occasionally missing acks were
// causing a problem.
B_ICacheAck:
if (acki|err_i|tlb_miss|exv_i|icack) begin
if (ack_i|err_i|tlb_miss|exv_i) begin
if (!bok_i) begin
stb_o <= `LOW;
bstate <= B_ICacheNack2;
end
else
icack <= 1'b1;
errq <= errq | err_i;
exvq <= exvq | exv_i;
// L1_en <= 9'h3 << {L2_xsel,L2_adr[4:3],1'b0};
// L1_wr0 <= `TRUE;
// L1_wr1 <= `TRUE;
// L1_adr <= L2_adr;
if (tlb_miss) begin
L1_dati <= {19{`INSN_FLT_TLB}};
L2_rdat <= {18{`INSN_FLT_TLB}};
wb_nack();
icl_o <= `LOW;
bstate <= B_ICacheNack;
end
else if (exv_i) begin
L1_dati <= {19{`INSN_FLT_EXF}};
L2_rdat <= {18{`INSN_FLT_EXF}};
wb_nack();
icl_o <= `LOW;
bstate <= B_ICacheNack;
end
else if (err_i) begin
L1_dati <= {19{`INSN_FLT_IBE}};
L2_rdat <= {18{`INSN_FLT_IBE}};
wb_nack();
icl_o <= `LOW;
bstate <= B_ICacheNack;
8553,14 → 8861,16
end
else
case(iccnt)
3'd0: L1_dati[63:0] <= dat_i;
3'd1: L1_dati[127:64] <= dat_i;
3'd2: L1_dati[191:128] <= dat_i;
3'd3: L1_dati[255:192] <= dat_i;
3'd4: L1_dati[305:256] <= {2'b00,dat_i[47:0]};
default: L1_dati <= L1_dati;
3'd0: L2_rdat[63:0] <= dat_i;
3'd1: L2_rdat[127:64] <= dat_i;
3'd2: L2_rdat[191:128] <= dat_i;
3'd3: L2_rdat[255:192] <= dat_i;
3'd4: L2_rdat[297:256] <= {2'b00,dat_i[39:0]};
default: ;
endcase
//L2_rdat <= {dat_i[31:0],{4{dat_i}}};
iccnt <= iccnt + 3'd1;
//stb_o <= `LOW;
if (iccnt==3'd3)
cti_o <= 3'b111;
if (iccnt==3'd4) begin
8582,17 → 8892,19
end
B_ICacheNack:
begin
L1_wr0 <= `FALSE;
L1_wr1 <= `FALSE;
L1_wr2 <= `FALSE;
L1_en <= 9'h1FF;
L2_xsel <= 1'b0;
if (~acki) begin
if (~ack_i) begin
icl_ctr <= icl_ctr + 40'd1;
bstate <= BIDLE;
L2_nxt <= TRUE;
vadr <= 32'hCCCCCCC8;
end
end
 
B12:
if (acki|err_i|tlb_miss|rdv_i) begin
if (ack_i|err_i|tlb_miss|rdv_i) begin
if (isCAS) begin
iqentry_res [ casid[`QBITS] ] <= (dat_i == cas);
iqentry_exc [ casid[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
8616,7 → 8928,7
2'b10: dram2 <= `DRAMREQ_READY;
default: ;
endcase
bstate <= B_LSNAck;
bstate <= B19;
end
end
else if (isRMW) begin
8644,31 → 8956,13
 
// Regular load
B_DLoadAck:
if (acki|err_i|tlb_miss|rdv_i) begin
if (ack_i|err_i|tlb_miss|rdv_i) begin
wb_nack();
sr_o <= `LOW;
case(dccnt)
2'd0: xdati[63:0] <= dat_i;
2'd1: xdati[127:64] <= dat_i;
endcase
xdati <= dat_i;
case(bwhich)
2'b00: begin
if (dram0_memsize==hexi) begin
if (dccnt==2'd1) begin
dram0 <= `DRAMREQ_READY;
iqentry_seg_base[dram0_id[`QBITS]] <= xdati[63:0];
iqentry_seg_acr[dram0_id[`QBITS]] <= dat_i;
end
else begin
dccnt <= dccnt + 2'd1;
cyc <= `HIGH;
sel_o <= 8'hFF;
vadr <= vadr + 64'd8;
bstate <= B_DLoadNack;
end
end
else
dram0 <= `DRAMREQ_READY;
dram0 <= `DRAMREQ_READY;
iqentry_exc [ dram0_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
end
2'b01: if (`NUM_MEM > 1) begin
8681,13 → 8975,8
end
default: ;
endcase
bstate <= B_LSNAck;
bstate <= B19;
end
B_DLoadNack:
if (~acki) begin
stb_o <= `HIGH;
bstate <= B_DLoadAck;
end
 
// Three cycles to detemrine if there's a cache hit during a store.
B16: begin
8699,15 → 8988,15
endcase
end
B17: bstate <= B18;
B18: bstate <= B_LSNAck;
B_LSNAck:
begin
bstate <= BIDLE;
StoreAck1 <= `FALSE;
isStore <= `FALSE;
end
B18: bstate <= B19;
B19: if (~acki) begin
sel_o <= 8'h00;
bstate <= BIDLE;
StoreAck1 <= `FALSE;
isStore <= `FALSE;
end
B20:
if (~acki) begin
if (~ack_i) begin
stb_o <= `HIGH;
we <= `HIGH;
dat_o <= fnDato(rmw_instr,rmw_res);
8714,7 → 9003,7
bstate <= B_StoreAck;
end
B21:
if (~acki) begin
if (~ack_i) begin
stb_o <= `HIGH;
bstate <= B12;
end
8721,40 → 9010,39
default: bstate <= BIDLE;
endcase
 
 
if (!branchmiss) begin
case({fetchbuf0_v, fetchbuf1_v})
2'b00: ;
2'b01:
if (canq1) begin
tail0 <= (tail0+2'd1) % QENTRIES;
tail1 <= (tail1+2'd1) % QENTRIES;
end
2'b10:
if (canq1) begin
tail0 <= (tail0+2'd1) % QENTRIES;
tail1 <= (tail1+2'd1) % QENTRIES;
end
2'b11:
if (canq1) begin
if (IsBranch(fetchbuf0_instr) && predict_taken0 && fetchbuf0_thrd==fetchbuf1_thrd) begin
tail0 <= (tail0+2'd1) % QENTRIES;
tail1 <= (tail1+2'd1) % QENTRIES;
end
else begin
case({fetchbuf0_v, fetchbuf1_v})
2'b00: ;
2'b01:
if (canq1) begin
tail0 <= (tail0+2'd1) % QENTRIES;
tail1 <= (tail1+2'd1) % QENTRIES;
end
2'b10:
if (canq1) begin
tail0 <= (tail0+2'd1) % QENTRIES;
tail1 <= (tail1+2'd1) % QENTRIES;
end
2'b11:
if (canq1) begin
if (IsBranch(fetchbuf0_instr) && predict_taken0 && fetchbuf0_thrd==fetchbuf1_thrd) begin
tail0 <= (tail0+2'd1) % QENTRIES;
tail1 <= (tail1+2'd1) % QENTRIES;
end
else begin
if (vqe0 < vl || !IsVector(fetchbuf0_instr)) begin
if (canq2) begin
tail0 <= (tail0 + 3'd2) % QENTRIES;
tail1 <= (tail1 + 3'd2) % QENTRIES;
end
else begin // queued1 will be true
tail0 <= (tail0+2'd1) % QENTRIES;
tail1 <= (tail1+2'd1) % QENTRIES;
end
end
end
end
endcase
if (canq2) begin
tail0 <= (tail0 + 3'd2) % QENTRIES;
tail1 <= (tail1 + 3'd2) % QENTRIES;
end
else begin // queued1 will be true
tail0 <= (tail0+2'd1) % QENTRIES;
tail1 <= (tail1+2'd1) % QENTRIES;
end
end
end
end
endcase
end
else if (!thread_en) begin // if branchmiss
for (n = QENTRIES-1; n >= 0; n = n - 1)
8842,14 → 9130,7
(i[`QBITS]==heads[0])?"C":".",
(i[`QBITS]==tail0)?"Q":".",
i[`QBITS],
iqentry_state[i]==IQS_INVALID ? "-" :
iqentry_state[i]==IQS_QUEUED ? "Q" :
iqentry_state[i]==IQS_OUT ? "O" :
iqentry_state[i]==IQS_AGEN ? "A" :
iqentry_state[i]==IQS_MEM ? "M" :
iqentry_state[i]==IQS_DONE ? "D" :
iqentry_state[i]==IQS_CMT ? "C" : "?",
// iqentry_v[i] ? "v" : "-",
iqentry_v[i] ? "v" : "-",
iqentry_iv[i] ? "I" : "-",
iqentry_done[i]?"d":"-",
iqentry_out[i]?"o":"-",
8880,11 → 9161,11
$display("%d %h %h %c%h %o #",
dram2, dram2_addr, dram2_data, (IsFlowCtrl(dram2_instr) ? 98 : (IsMem(dram2_instr)) ? 109 : 97),
dram2_instr, dram2_id);
$display("%d %h %o #", dramA_v, dramA_bus, dramA_id);
$display("%d %h %o %h #", dramA_v, dramA_bus, dramA_id, dramA_exc);
if (`NUM_MEM > 1)
$display("%d %h %o #", dramB_v, dramB_bus, dramB_id);
$display("%d %h %o %h #", dramB_v, dramB_bus, dramB_id, dramB_exc);
if (`NUM_MEM > 2)
$display("%d %h %o #", dramC_v, dramC_bus, dramC_id);
$display("%d %h %o %h #", dramC_v, dramC_bus, dramC_id, dramC_exc);
$display("ALU");
$display("%d %h %h %h %c%h %o %h #",
alu0_dataready, alu0_argI, alu0_argA, alu0_argB,
9111,7 → 9392,6
end
end
*/
assign exc_o = iqentry_exc[heads[0]][7:0];
 
// Update the write buffer.
task wb_update;
9273,7 → 9553,6
iqentry_canex[nn] <= bus[`IB_CANEX];
iqentry_loadv[nn] <= bus[`IB_LOADV];
iqentry_load [nn] <= bus[`IB_LOAD];
iqentry_loadseg[nn]<= bus[`IB_LOADSEG];
iqentry_preload[nn]<= bus[`IB_PRELOAD];
iqentry_store[nn] <= bus[`IB_STORE];
iqentry_push [nn] <= bus[`IB_PUSH];
9293,6 → 9572,9
iqentry_sync [nn] <= bus[`IB_SYNC];
iqentry_fsync[nn] <= bus[`IB_FSYNC];
iqentry_rfw [nn] <= bus[`IB_RFW];
`ifdef SUPPORT_PREDICATION
iqentry_prfw [nn] <= bus[`IB_PRFW];
`endif
iqentry_we [nn] <= bus[`IB_WE];
end
endtask
9403,7 → 9685,7
input [`SNBITS] seqnum;
input [5:0] venno;
begin
iqentry_exc[tail] <= `FLT_NONE;
iqentry_exc[tail] <= `FLT_NONE;
`ifdef SUPPORT_DBG
if (dbg_imatchA)
iqentry_exc[tail] <= `FLT_DBG;
9420,7 → 9702,6
iqentry_insln[tail] <= fetchbuf0_insln;
iqentry_fc [tail] <= `INV;
iqentry_mem [tail] <= `INV;
iqentry_memissue[tail] <= `INV;
iqentry_alu [tail] <= `INV;
iqentry_fpu [tail] <= `INV;
iqentry_load [tail] <= `INV;
9487,7 → 9768,6
iqentry_insln[tail] <= fetchbuf1_insln;
iqentry_fc [tail] <= `INV;
iqentry_mem [tail] <= `INV;
iqentry_memissue[tail] <= `INV;
iqentry_alu [tail] <= `INV;
iqentry_fpu [tail] <= `INV;
iqentry_load [tail] <= `INV;
9542,71 → 9822,6
end
endtask
 
task exc;
input [`QBITS] head;
input thread;
input [7:0] causecd;
begin
excmiss <= TRUE;
`ifdef SUPPORT_SMT
excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol[thread],5'h00};
excthrd <= iqentry_thrd[head];
badaddr[{thread,2'd0}] <= iqentry_ma[head];
bad_instr[{thread,2'd0}] <= iqentry_instr[head];
epc0[thread] <= iqentry_pc[head];
epc1[thread] <= epc0[thread];
epc2[thread] <= epc1[thread];
epc3[thread] <= epc2[thread];
epc4[thread] <= epc3[thread];
epc5[thread] <= epc4[thread];
epc6[thread] <= epc5[thread];
epc7[thread] <= epc6[thread];
epc8[thread] <= epc7[thread];
im_stack[thread] <= {im_stack[thread][27:0],im};
ol_stack[thread] <= {ol_stack[thread][13:0],ol[thread]};
dl_stack[thread] <= {dl_stack[thread][13:0],dl[thread]};
pl_stack[thread] <= {pl_stack[thread][55:0],cpl[thread]};
rs_stack[thread] <= {rs_stack[thread][59:0],`EXC_RGS};
brs_stack[thread] <= {brs_stack[thread][59:0],`EXC_RGS};
cause[{thread,2'd0}] <= {8'd0,causecd};
mstatus[thread][5:4] <= 2'd0;
mstatus[thread][13:6] <= 8'h00;
mstatus[thread][19:14] <= `EXC_RGS;
`else
excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol,5'h00};
excthrd <= 1'b0;
badaddr[{1'b0,2'd0}] <= iqentry_ma[head];
bad_instr[3'd0] <= iqentry_instr[head];
epc0 <= iqentry_pc[head];
epc1 <= epc0;
epc2 <= epc1;
epc3 <= epc2;
epc4 <= epc3;
epc5 <= epc4;
epc6 <= epc5;
epc7 <= epc6;
epc8 <= epc7;
im_stack <= {im_stack[27:0],im};
ol_stack <= {ol_stack[13:0],ol};
dl_stack <= {dl_stack[13:0],dl};
pl_stack <= {pl_stack[55:0],cpl};
rs_stack <= {rs_stack[59:0],`EXC_RGS};
brs_stack <= {rs_stack[59:0],`EXC_RGS};
cause[3'd0] <= {8'd0,causecd};
mstatus[5:4] <= 2'd0;
mstatus[13:6] <= 8'h00;
mstatus[19:14] <= `EXC_RGS;
`endif
wb_en <= `TRUE;
sema[0] <= 1'b0;
ve_hold <= {vqet1,10'd0,vqe1,10'd0,vqet0,10'd0,vqe0};
`ifdef SUPPORT_DBG
dbg_ctrl[62:55] <= {dbg_ctrl[61:55],dbg_ctrl[63]};
dbg_ctrl[63] <= FALSE;
`endif
end
endtask
 
// This task takes care of commits for things other than the register file.
task oddball_commit;
input v;
9617,10 → 9832,70
thread = iqentry_thrd[head];
if (v) begin
if (|iqentry_exc[head]) begin
exc(head,thread,iqentry_exc[head]);
excmiss <= TRUE;
`ifdef SUPPORT_SMT
excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol[thread],5'h00};
excthrd <= iqentry_thrd[head];
badaddr[{thread,2'd0}] <= iqentry_ma[head];
bad_instr[{thread,2'd0}] <= iqentry_instr[head];
epc0[thread] <= iqentry_pc[head];
epc1[thread] <= epc0[thread];
epc2[thread] <= epc1[thread];
epc3[thread] <= epc2[thread];
epc4[thread] <= epc3[thread];
epc5[thread] <= epc4[thread];
epc6[thread] <= epc5[thread];
epc7[thread] <= epc6[thread];
epc8[thread] <= epc7[thread];
im_stack[thread] <= {im_stack[thread][27:0],im};
ol_stack[thread] <= {ol_stack[thread][13:0],ol[thread]};
dl_stack[thread] <= {dl_stack[thread][13:0],dl[thread]};
pl_stack[thread] <= {pl_stack[thread][55:0],cpl[thread]};
rs_stack[thread] <= {rs_stack[thread][59:0],`EXC_RGS};
brs_stack[thread] <= {brs_stack[thread][59:0],`EXC_RGS};
cause[{thread,2'd0}] <= {8'd0,iqentry_exc[head]};
mstatus[thread][5:4] <= 2'd0;
mstatus[thread][13:6] <= 8'h00;
mstatus[thread][19:14] <= `EXC_RGS;
`else
excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol,5'h00};
excthrd <= 1'b0;
badaddr[{1'b0,2'd0}] <= iqentry_ma[head];
bad_instr[3'd0] <= iqentry_instr[head];
epc0 <= iqentry_pc[head];
epc1 <= epc0;
epc2 <= epc1;
epc3 <= epc2;
epc4 <= epc3;
epc5 <= epc4;
epc6 <= epc5;
epc7 <= epc6;
epc8 <= epc7;
im_stack <= {im_stack[27:0],im};
ol_stack <= {ol_stack[13:0],ol};
dl_stack <= {dl_stack[13:0],dl};
pl_stack <= {pl_stack[55:0],cpl};
rs_stack <= {rs_stack[59:0],`EXC_RGS};
brs_stack <= {rs_stack[59:0],`EXC_RGS};
cause[3'd0] <= {8'd0,iqentry_exc[head]};
mstatus[5:4] <= 2'd0;
mstatus[13:6] <= 8'h00;
mstatus[19:14] <= `EXC_RGS;
`endif
wb_en <= `TRUE;
sema[0] <= 1'b0;
ve_hold <= {vqet1,10'd0,vqe1,10'd0,vqet0,10'd0,vqe0};
`ifdef SUPPORT_DBG
dbg_ctrl[62:55] <= {dbg_ctrl[61:55],dbg_ctrl[63]};
dbg_ctrl[63] <= FALSE;
`endif
end
else
case(iqentry_instr[head][`INSTRUCTION_OP])
`ifdef SUPPORT_PREDICATION
`CMPI: pregs[{rgs,iqentry_tgt[head][3:0]}] <= which==2'd1 ? cmt1nyb[iqentry_tgt[head][3:0]] : cmt0nyb[iqentry_tgt[head][3:0]];//commit_bus[3:0];
`endif
 
`BRK:
// BRK is treated as a nop unless it's a software interrupt or a
// hardware interrupt at a higher priority than the current priority.
9716,6 → 9991,9
endcase
`R2:
case(iqentry_instr[head][`INSTRUCTION_S2])
`ifdef SUPPORT_PREDICATION
`CMP: pregs[{rgs,iqentry_tgt[head][3:0]}] <= which==2'd1 ? cmt1nyb[iqentry_tgt[head][3:0]] : cmt0nyb[iqentry_tgt[head][3:0]];//commit_bus[3:0];
`endif
`R1: case(iqentry_instr[head][20:16])
`CHAIN_OFF: cr0[18] <= 1'b0;
`CHAIN_ON: cr0[18] <= 1'b1;
9734,10 → 10012,9
`endif
`RTI: begin
excmiss <= TRUE;
`ifdef SUPPORT_SMT
excmisspc <= epc0[thread];
excthrd <= thread;
excmisspc <= iqentry_ma[head];
`ifdef SUPPORT_SMT
// excmisspc <= epc0[thread];
mstatus[thread][3:0] <= im_stack[thread][3:0];
mstatus[thread][5:4] <= ol_stack[thread][1:0];
mstatus[thread][21:20] <= dl_stack[thread][1:0];
9759,7 → 10036,8
epc7[thread] <= epc8[thread];
epc8[thread] <= {tvec[0][AMSB:8], 1'b0, ol[thread], 5'h0};
`else
// excmisspc <= epc0;
excmisspc <= epc0;
excthrd <= thread;
mstatus[3:0] <= im_stack[3:0];
mstatus[5:4] <= ol_stack[1:0];
mstatus[21:20] <= dl_stack[1:0];
9993,6 → 10271,9
`CSR_SBU: dat <= sbu;
`CSR_ENU: dat <= en;
`endif
`ifdef SUPPORT_PREDICATION
`CSR_PREGS: read_pregs(dat);
`endif
`CSR_Q_CTR: dat <= iq_ctr;
`CSR_BM_CTR: dat <= bm_ctr;
`CSR_ICL_CTR: dat <= icl_ctr;
10121,6 → 10402,9
`CSR_SBU: su_barrier[brgs] <= dat;
`CSR_ENU: en_barrier[brgs] <= dat;
`endif
`ifdef SUPPORT_PREDICATION
`CSR_PREGS: write_pregs(dat);
`endif
`CSR_TIME: begin
ld_time <= 6'h3f;
wc_time_dat <= dat;
10178,28 → 10462,26
task tDram0Issue;
input [`QBITSP1] n;
begin
if (iqentry_state[n]==IQS_AGEN) begin
// dramA_v <= `INV;
dram0 <= `DRAMSLOT_BUSY;
dram0_id <= { 1'b1, n[`QBITS] };
dram0_instr <= iqentry_instr[n];
dram0_rmw <= iqentry_rmw[n];
dram0_preload <= iqentry_preload[n];
dram0_tgt <= iqentry_tgt[n];
if (iqentry_imm[n] & iqentry_push[n])
dram0_data <= iqentry_a0[n];
else
dram0_data <= iqentry_a2[n];
dram0_addr <= iqentry_ma[n];
dram0_unc <= iqentry_ma[n][31:20]==12'hFFD || !dce || iqentry_loadv[n];
dram0_memsize <= iqentry_memsz[n];
dram0_load <= iqentry_load[n];
dram0_loadseg <= iqentry_loadseg[n];
dram0_store <= iqentry_store[n];
dram0 <= `DRAMSLOT_BUSY;
dram0_id <= { 1'b1, n[`QBITS] };
dram0_instr <= iqentry_instr[n];
dram0_rmw <= iqentry_rmw[n];
dram0_preload <= iqentry_preload[n];
dram0_tgt <= iqentry_tgt[n];
dram0_data <= iqentry_a2[n];
dram0_addr <= iqentry_ma[n];
// if (ol[iqentry_thrd[n]]==`OL_USER)
// dram0_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0};
// else
dram0_unc <= iqentry_ma[n][31:20]==12'hFFD || !dce || iqentry_loadv[n];
dram0_memsize <= iqentry_memsz[n];
dram0_load <= iqentry_load[n];
dram0_store <= iqentry_store[n];
`ifdef SUPPORT_SMT
dram0_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol[iqentry_thrd[n]] : dl[iqentry_thrd[n]];
dram0_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol[iqentry_thrd[n]] : dl[iqentry_thrd[n]];
`else
dram0_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol : dl;
dram0_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol : dl;
`endif
// Once the memory op is issued reset the a1_v flag.
// This will cause the a1 bus to look for new data from memory (a1_s is pointed to a memory bus)
10206,8 → 10488,7
// This is used for the load and compare instructions.
// must reset the a1 source too.
//iqentry_a1_v[n] <= `INV;
iqentry_state[n] <= IQS_MEM;
end
iqentry_state[n] <= IQS_MEM;
end
endtask
 
10214,8 → 10495,7
task tDram1Issue;
input [`QBITSP1] n;
begin
if (iqentry_state[n]==IQS_AGEN) begin
// dramB_v <= `INV;
dramB_v <= `INV;
dram1 <= `DRAMSLOT_BUSY;
dram1_id <= { 1'b1, n[`QBITS] };
dram1_instr <= iqentry_instr[n];
10222,10 → 10502,7
dram1_rmw <= iqentry_rmw[n];
dram1_preload <= iqentry_preload[n];
dram1_tgt <= iqentry_tgt[n];
if (iqentry_imm[n] & iqentry_push[n])
dram1_data <= iqentry_a0[n];
else
dram1_data <= iqentry_a2[n];
dram1_data <= iqentry_a2[n];
dram1_addr <= iqentry_ma[n];
// if (ol[iqentry_thrd[n]]==`OL_USER)
// dram1_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0};
10233,7 → 10510,6
dram1_unc <= iqentry_ma[n][31:20]==12'hFFD || !dce || iqentry_loadv[n];
dram1_memsize <= iqentry_memsz[n];
dram1_load <= iqentry_load[n];
dram1_loadseg <= iqentry_loadseg[n];
dram1_store <= iqentry_store[n];
`ifdef SUPPORT_SMT
dram1_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol[iqentry_thrd[n]] : dl[iqentry_thrd[n]];
10242,7 → 10518,6
`endif
//iqentry_a1_v[n] <= `INV;
iqentry_state[n] <= IQS_MEM;
end
end
endtask
 
10249,8 → 10524,7
task tDram2Issue;
input [`QBITSP1] n;
begin
if (iqentry_state[n]==IQS_AGEN) begin
// dramC_v <= `INV;
dramC_v <= `INV;
dram2 <= `DRAMSLOT_BUSY;
dram2_id <= { 1'b1, n[`QBITS] };
dram2_instr <= iqentry_instr[n];
10257,10 → 10531,7
dram2_rmw <= iqentry_rmw[n];
dram2_preload <= iqentry_preload[n];
dram2_tgt <= iqentry_tgt[n];
if (iqentry_imm[n] & iqentry_push[n])
dram2_data <= iqentry_a0[n];
else
dram2_data <= iqentry_a2[n];
dram2_data <= iqentry_a2[n];
dram2_addr <= iqentry_ma[n];
// if (ol[iqentry_thrd[n]]==`OL_USER)
// dram2_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0};
10268,7 → 10539,6
dram2_unc <= iqentry_ma[n][31:20]==12'hFFD || !dce || iqentry_loadv[n];
dram2_memsize <= iqentry_memsz[n];
dram2_load <= iqentry_load[n];
dram2_loadseg <= iqentry_loadseg[n];
dram2_store <= iqentry_store[n];
`ifdef SUPPORT_SMT
dram2_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol[iqentry_thrd[n]] : dl[iqentry_thrd[n]];
10277,7 → 10547,6
`endif
//iqentry_a1_v[n] <= `INV;
iqentry_state[n] <= IQS_MEM;
end
end
endtask
 
10289,7 → 10558,6
stb_o <= `LOW;
we <= `LOW;
sel_o <= 8'h00;
// vadr <= 32'hCCCCCCCC;
end
endtask
 
/FT64_fetchbuf.v
56,7 → 56,6
nop_fetchbuf,
take_branch0, take_branch1,
stompedRets,
pred_on,
panic
);
parameter AMSB = `AMSB;
129,7 → 128,6
output take_branch0;
output take_branch1;
input [3:0] stompedRets;
input pred_on;
output reg [3:0] panic;
integer n;
 
185,9 → 183,9
else
`endif
case(ins[7:6])
2'd0: fnInsLength = 3'd4|pred_on;
2'd1: fnInsLength = 3'd6|pred_on;
default: fnInsLength = 3'd2|pred_on;
2'd0: fnInsLength = 3'd4;
2'd1: fnInsLength = 3'd6;
default: fnInsLength = 3'd2;
endcase
endfunction
 
195,10 → 193,10
wire [2:0] fetchbufB_inslen;
wire [2:0] fetchbufC_inslen;
wire [2:0] fetchbufD_inslen;
FT64_InsLength uilA (fetchbufA_instr, fetchbufA_inslen, pred_on);
FT64_InsLength uilB (fetchbufB_instr, fetchbufB_inslen, pred_on);
FT64_InsLength uilC (fetchbufC_instr, fetchbufC_inslen, pred_on);
FT64_InsLength uilD (fetchbufD_instr, fetchbufD_inslen, pred_on);
FT64_InsLength uilA (fetchbufA_instr, fetchbufA_inslen);
FT64_InsLength uilB (fetchbufB_instr, fetchbufB_inslen);
FT64_InsLength uilC (fetchbufC_instr, fetchbufC_inslen);
FT64_InsLength uilD (fetchbufD_instr, fetchbufD_inslen);
 
wire [47:0] xinsn0;
wire [47:0] xinsn1;
/FT64_fetchbuf_x1.v
1,6 → 1,6
// ============================================================================
// __
// \\__/ o\ (C) 2018-2019 Robert Finch, Waterloo
// \\__/ o\ (C) 2018 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
120,10 → 120,8
input [47:0] isn;
casex(isn[`INSTRUCTION_OP])
`Bcc: IsBranch = TRUE;
`BLcc: IsBranch = TRUE;
`BBc: IsBranch = TRUE;
`BEQI: IsBranch = TRUE;
`BNEI: IsBranch = TRUE;
`BCHK: IsBranch = TRUE;
default: IsBranch = FALSE;
endcase
136,12 → 134,12
 
function IsJmp;
input [47:0] isn;
IsJmp = isn[`INSTRUCTION_OP]==`JMP && isn[7]==1'b0;
IsJmp = isn[`INSTRUCTION_OP]==`JMP;
endfunction
 
function IsCall;
input [47:0] isn;
IsCall = isn[`INSTRUCTION_OP]==`CALL && isn[7]==1'b0;
IsCall = isn[`INSTRUCTION_OP]==`CALL;
endfunction
 
function IsRet;
159,25 → 157,6
IsRTI = isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_S2]==`RTI;
endfunction
 
function IsExec;
input [47:0] isn;
if (isn[7:6]==2'b00)
case(isn[`INSTRUCTION_OP])
`R2:
case(isn[`INSTRUCTION_S2])
`R1:
case(isn[22:18])
`EXEC: IsExec = TRUE;
default: IsExec = FALSE;
endcase
default: IsExec = FALSE;
endcase
default: IsExec = FALSE;
endcase
else
IsExec = FALSE;
endfunction
 
function [2:0] fnInsLength;
input [47:0] ins;
`ifdef SUPPORT_DCI
238,11 → 217,7
`RET: branch_pcA = retpc0;
`JMP,`CALL:
begin
`ifdef JMP40
branch_pcA[39:0] = fetchbufA_instr[6] ? {fetchbufA_instr[47:8]} : {fetchbufA_pc[39:24],fetchbufA_instr[31:8]};
`else
branch_pcA[39:0] = {fetchbufA_pc[39:24],fetchbufA_instr[31:8]};
`endif
branch_pcA[63:40] = fetchbufA_pc[63:40];
end
`R2: branch_pcA = btgtA; // RTI
249,8 → 224,9
`BRK,`JAL: branch_pcA = btgtA;
default:
begin
branch_pcA[31:0] = fetchbufA_pc[31:0] +
((fetchbufA_instr[7:6]==2'b01) ? {{4{fetchbufA_instr[47]}},fetchbufA_instr[47:23],fetchbufA_instr[17:16],1'b0} : {{20{fetchbufA_instr[31]}},fetchbufA_instr[31:23],fetchbufA_instr[17:16],1'b0});
branch_pcA[31:8] = fetchbufA_pc[31:8] +
((fetchbufA_instr[7:6]==2'b01) ? {{5{fetchbufA_instr[47]}},fetchbufA_instr[47:29]} : {{21{fetchbufA_instr[31]}},fetchbufA_instr[31:29]});
branch_pcA[7:0] = {fetchbufA_instr[28:23],fetchbufA_instr[17:16]};
branch_pcA[63:32] = fetchbufA_pc[63:32];
end
endcase
262,11 → 238,7
`RET: branch_pcB = retpc0;
`JMP,`CALL:
begin
`ifdef JMP40
branch_pcB[39:0] = fetchbufB_instr[6] ? {fetchbufB_instr[47:8]} : {fetchbufB_pc[39:24],fetchbufB_instr[31:8]};
`else
branch_pcB[39:0] = {fetchbufB_pc[39:24],fetchbufB_instr[31:8]};
`endif
branch_pcB[63:40] = fetchbufB_pc[63:40];
end
`R2: branch_pcB = btgtB; // RTI
273,8 → 245,9
`BRK,`JAL: branch_pcB = btgtB;
default:
begin
branch_pcB[31:0] = fetchbufB_pc[31:0] +
((fetchbufB_instr[7:6]==2'b01) ? {{4{fetchbufB_instr[47]}},fetchbufB_instr[47:23],fetchbufB_instr[17:16],1'b0} : {{20{fetchbufB_instr[31]}},fetchbufB_instr[31:23],fetchbufB_instr[17:16],1'b0});
branch_pcB[31:8] = fetchbufB_pc[31:8] +
((fetchbufB_instr[7:6]==2'b01) ? {{5{fetchbufB_instr[47]}},fetchbufB_instr[47:29]} : {{21{fetchbufB_instr[31]}},fetchbufB_instr[31:29]});
branch_pcB[7:0] = {fetchbufB_instr[28:23],fetchbufB_instr[17:16]};
branch_pcB[63:32] = fetchbufB_pc[63:32];
end
endcase
497,8 → 470,8
insln0 <= 3'd2 | pred_on;
else
`endif
if (IsExec(insn0))
insln0 <= fnInsLength(codebuf0); //???? should be 4?
if (insn0[7:6]==2'b00 && insn0[`INSTRUCTION_OP]==`EXEC)
insln0 <= fnInsLength(codebuf0);
else
insln0 <= fnInsLength(insn0);
end
513,9 → 486,9
cinsn0 <= expand0;
else
`endif
if (IsExec(insn0) && !pred_on)
if (insn0[7:6]==2'b00 && insn0[`INSTRUCTION_OP]==`EXEC && !pred_on)
cinsn0 <= codebuf0;
else if (IsExec(insn0[55:8]) && pred_on)
else if (insn0[15:14]==2'b00 && insn0[`INSTRUCTION_OP]==`EXEC && pred_on)
cinsn0 <= codebuf0;
else if (insn0[15] & pred_on)
cinsn0 <= {xinsn0,insn0[7:0]};

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.