OpenCores
URL https://opencores.org/ocsvn/thor/thor/trunk

Subversion Repositories thor

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /thor/trunk
    from Rev 48 to Rev 49
    Reverse comparison

Rev 48 → Rev 49

/FT64v5/rtl/common/FT64_RSB.v
56,23 → 56,30
parameter RSTPC = 32'hFFFC0100;
integer n;
reg [AMSB:0] ras [0:DEPTH-1];
reg [4:0] rasp;
reg [3:0] rasp;
assign pc = ras[rasp];
reg [47:0] lasti0, lasti1;
 
always @(posedge clk)
if (rst) begin
for (n = 0; n < 32; n = n + 1)
ras[n] <= RSTPC;
rasp <= 5'd0;
lasti0 <= `NOP_INSN;
lasti1 <= `NOP_INSN;
for (n = 0; n < DEPTH; n = n + 1)
ras[n] <= RSTPC;
rasp <= 4'd0;
end
else begin
if (fetchbuf0_v && fetchbuf1_v && (queued1 || queued2)) begin
// Make sure the instruction changed between clock cycles.
lasti0 <= fetchbuf0_instr;
lasti1 <= fetchbuf1_instr;
if (fetchbuf0_instr != lasti0 || fetchbuf1_instr != lasti1) begin
case(fetchbuf0_instr[`INSTRUCTION_OP])
`JAL:
begin
// JAL LR,xxxx assume call
if (fetchbuf0_instr[`INSTRUCTION_RB]==regLR) begin
ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + 32'd4;
ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + (fetchbuf0_instr[6] ? 32'd6 : 32'd4);
rasp <= rasp - 4'd1;
end
// JAL r0,[r29] assume a ret
83,7 → 90,7
end
`CALL:
begin
ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + 32'd4;
ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + (fetchbuf0_instr[6] ? 32'd6 : 32'd4);
rasp <= rasp - 4'd1;
end
`RET: begin
92,12 → 99,15
end
default: ;
endcase
end
end
else if (fetchbuf1_v && queued1)
lasti1 <= fetchbuf1_instr;
if (fetchbuf1_instr != lasti1) begin
case(fetchbuf1_instr[`INSTRUCTION_OP])
`JAL:
if (fetchbuf1_instr[`INSTRUCTION_RB]==regLR) begin
ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf1_pc + 32'd4;
ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf1_pc + (fetchbuf1_instr[6] ? 32'd6 : 32'd4);
rasp <= rasp - 4'd1;
end
else if (fetchbuf1_instr[`INSTRUCTION_RB]==5'd00 &&
106,7 → 116,7
end
`CALL:
begin
ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf1_pc + 32'd4;
ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf1_pc + (fetchbuf1_instr[6] ? 32'd6 : 32'd4);
rasp <= rasp - 4'd1;
end
`RET: begin
115,11 → 125,14
end
default: ;
endcase
end
else if (fetchbuf0_v && queued1)
lasti0 <= fetchbuf0_instr;
if (lasti0 != fetchbuf0_instr) begin
case(fetchbuf0_instr[`INSTRUCTION_OP])
`JAL:
if (fetchbuf0_instr[`INSTRUCTION_RB]==regLR) begin
ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + 32'd4;
ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + (fetchbuf0_instr[6] ? 32'd6 : 32'd4);
rasp <= rasp - 4'd1;
end
else if (fetchbuf0_instr[`INSTRUCTION_RB]==5'd00 &&
128,7 → 141,7
end
`CALL:
begin
ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + 32'd4;
ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + (fetchbuf0_instr[6] ? 32'd6 : 32'd4);
rasp <= rasp - 4'd1;
end
`RET: begin
137,6 → 150,7
end
default: ;
endcase
end
/*
if (stompedRets > 4'd0) begin
$display("Stomped Rets: %d", stompedRets);
/FT64v5/rtl/common/FT64_cache.v
37,18 → 37,19
 
module FT64_L1_icache_mem(rst, clk, wr, en, lineno, i, o, ov, invall, invline);
parameter pLines = 64;
parameter pLineWidth = 320;
parameter pLineWidth = 288;
input rst;
input clk;
input wr;
input [9:0] en;
input [8:0] en;
input [5:0] lineno;
input [pLineWidth-1:0] i;
output [pLineWidth-1:0] o;
output [9:0] ov;
output [8:0] ov;
input invall;
input invline;
 
(* ram_style="distributed" *)
reg [pLineWidth-1:0] mem [0:pLines-1];
reg [pLines-1:0] valid0;
reg [pLines-1:0] valid1;
59,7 → 60,6
reg [pLines-1:0] valid6;
reg [pLines-1:0] valid7;
reg [pLines-1:0] valid8;
reg [pLines-1:0] valid9;
 
always @(posedge clk)
if (wr & en[0]) mem[lineno][31:0] <= i[31:0];
80,8 → 80,6
always @(posedge clk)
if (wr & en[8]) mem[lineno][287:256] <= i[287:256];
always @(posedge clk)
if (wr & en[9]) mem[lineno][319:288] <= i[319:288];
always @(posedge clk)
if (rst) begin
valid0 <= 64'd0;
valid1 <= 64'd0;
92,7 → 90,6
valid6 <= 64'd0;
valid7 <= 64'd0;
valid8 <= 64'd0;
valid9 <= 64'd0;
end
else begin
if (invall) begin
105,7 → 102,6
valid6 <= 64'd0;
valid7 <= 64'd0;
valid8 <= 64'd0;
valid9 <= 64'd0;
end
else if (invline) begin
valid0[lineno] <= 1'b0;
117,7 → 113,6
valid6[lineno] <= 1'b0;
valid7[lineno] <= 1'b0;
valid8[lineno] <= 1'b0;
valid9[lineno] <= 1'b0;
end
else if (wr) begin
if (en[0]) valid0[lineno] <= 1'b1;
129,7 → 124,6
if (en[6]) valid6[lineno] <= 1'b1;
if (en[7]) valid7[lineno] <= 1'b1;
if (en[8]) valid8[lineno] <= 1'b1;
if (en[9]) valid9[lineno] <= 1'b1;
end
end
 
143,7 → 137,6
assign ov[6] = valid6[lineno];
assign ov[7] = valid7[lineno];
assign ov[8] = valid8[lineno];
assign ov[9] = valid9[lineno];
 
endmodule
 
206,6 → 199,7
output reg [5:0] lineno;
output hit;
 
(* ram_style="distributed" *)
reg [32:0] mem0 [0:15];
reg [32:0] mem1 [0:15];
reg [32:0] mem2 [0:15];
315,23 → 309,23
input clk;
input nxt;
input wr;
input [9:0] en;
input [8:0] en;
input [37:0] adr;
input [37:0] wadr;
input [319:0] i;
input [287:0] i;
output reg [47:0] o;
output hit;
input invall;
input invline;
 
wire [319:0] ic;
reg [319:0] i1, i2;
wire [9:0] lv; // line valid
wire [287:0] ic;
reg [287:0] i1, i2;
wire [8:0] lv; // line valid
wire [5:0] lineno;
wire [5:0] wlineno;
wire taghit;
reg wr1,wr2;
reg [9:0] en1, en2;
reg [8:0] en1, en2;
reg invline1, invline2;
 
// Must update the cache memory on the cycle after a write to the tag memmory.
341,7 → 335,7
always @(posedge clk)
wr2 <= wr1;
always @(posedge clk)
i1 <= i;
i1 <= i[287:0];
always @(posedge clk)
i2 <= i1;
always @(posedge clk)
441,16 → 435,17
input [8:0] lineno;
input [2:0] sel;
input [63:0] i;
output [319:0] o;
output [287:0] o;
output reg ov;
input invall;
input invline;
 
(* ram_style="block" *)
reg [63:0] mem0 [0:511];
reg [63:0] mem1 [0:511];
reg [63:0] mem2 [0:511];
reg [63:0] mem3 [0:511];
reg [63:0] mem4 [0:511];
reg [31:0] mem4 [0:511];
reg [511:0] valid;
reg [8:0] rrcl;
 
475,7 → 470,7
3'd1: mem1[lineno] <= i;
3'd2: mem2[lineno] <= i;
3'd3: mem3[lineno] <= i;
3'd4: mem4[lineno] <= i;
3'd4: mem4[lineno] <= i[31:0];
endcase
end
end
511,7 → 506,7
input exv_i;
input [63:0] i;
input err_i;
output [319:0] o;
output [287:0] o;
output hit;
input invall;
input invline;
536,7 → 531,7
// An exception is forced to be stored in the event of an error loading the
// the instruction line.
always @(posedge clk)
i1 <= err_i ? {2{16'd0,1'b0,`FLT_IBE,`BRK}} : exv_i ? {2{16'd0,1'b0,`FLT_EXF,`BRK}} : i;
i1 <= err_i ? {2{15'd0,1'b0,`FLT_IBE,2'b00,`BRK}} : exv_i ? {2{15'd0,1'b0,`FLT_EXF,2'b00,`BRK}} : i;
always @(posedge clk)
i2 <= i1;
 
606,6 → 601,7
output reg [8:0] lineno;
output hit;
 
(* ram_style="block" *)
reg [32:0] mem0 [0:127];
reg [32:0] mem1 [0:127];
reg [32:0] mem2 [0:127];
/FT64v5/rtl/common/FT64_config.vh
0,0 → 1,64
// ============================================================================
// __
// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// FT64_config.vh
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
//
`define SIM 1'b1
//`define SUPPORT_SMT 1'b1
//`define DEBUG_LOGIC 1'b1
 
`define AMSB 31
`define ABITS `AMSB:0
`define QBITS 2:0
`define QENTRIES 8
`define XBITS 7:0
 
//`define SUPPORT_DBG 1'b1
`define FULL_ISSUE_LOGIC 1'b1
 
`define WAYS 2 // number of ways parallel (not working yet)
`define NUM_IDU 2 // number of instruction decode units (1-3)
`define NUM_ALU 2 // number of ALU's (1-2)
`define NUM_MEM 2 // number of memory queues (1-3)
`define NUM_FPU 0 // number of floating-point units (0-2)
`define NUM_CMT 2 // number of commit busses (1-2)
// Comment out the following to remove FCU enhancements (branch predictor, BTB, RSB)
`define FCU_ENH 1
// Comment out the following to remove bypassing logic on the functional units
`define FU_BYPASS 1
 
// These are unit availability settings at reset.
`define ID1_AVAIL 1'b1
`define ID2_AVAIL 1'b1
`define ID3_AVAIL 1'b0
`define ALU0_AVAIL 1'b1
`define ALU1_AVAIL 1'b1
`define FPU1_AVAIL 1'b1
`define FPU2_AVAIL 1'b0
`define MEM1_AVAIL 1'b1
`define MEM2_AVAIL 1'b1
`define MEM3_AVAIL 1'b0
`define FCU_AVAIL 1'b1
 
// Comment out to remove the write buffer from the core.
`define HAS_WB 1'b1
`define WB_DEPTH 8 // must be one more than desired depth
/FT64v5/rtl/common/FT64_defines.vh
22,9 → 22,6
//
// ============================================================================
//
`define SIM 1'b1
//`define SUPPORT_SMT 1'b1
//`define DEBUG_LOGIC 1'b1
`define HIGH 1'b1
`define LOW 1'b0
`define TRUE 1'b1
295,6 → 292,7
`define CSR_HARTID 11'h001
`define CSR_TICK 11'h002
`define CSR_PCR 11'h003
`define CSR_PMR 11'h005
`define CSR_CAUSE 11'h006
`define CSR_BADADR 11'h007
`define CSR_PCR2 11'h008
365,6 → 363,7
`define FLT_DBG 8'd33
`define FLT_TGT 8'd34
`define FLT_IADR 8'd36
`define FLT_UNIMP 8'd37
`define FLT_FLT 8'd38
`define FLT_CHK 8'd39
`define FLT_DBZ 8'd40
/FT64v5/rtl/common/FT64_idecoder.v
149,7 → 149,7
always @*
case(instr[`INSTRUCTION_OP])
`BRK: IsFlowCtrl <= TRUE;
`RR: case(instr[`INSTRUCTION_S2])
`R2: case(instr[`INSTRUCTION_S2])
`RTI: IsFlowCtrl <= TRUE;
default: IsFlowCtrl <= FALSE;
endcase
224,7 → 224,14
`BEQI: fnCanException = TRUE;
`CHK: fnCanException = TRUE;
default:
// Stores can stil exception if there is a write buffer, but we allow following
// stores to be issued by ignoring the fact they can exception because the stores
// can be undone by invalidating the write buffer.
`ifdef HAS_WB
fnCanException = IsLoad(isn);
`else
fnCanException = IsMem(isn);
`endif
endcase
end
endfunction
/FT64v5/rtl/common/FT64_mpu.v
88,7 → 88,7
wire cyc,stb,we;
wire [31:0] adr;
reg [63:0] dati;
wire [2:0] irq;
wire [3:0] irq;
wire [6:0] cause;
wire mmu_ack;
wire [31:0] mmu_dato;
213,7 → 213,7
CardMemory ucrd1
(
.clk_i(clk_i),
.cs_i(cs_crd),
.cs_i(cs_crd & cyc_o & stb_o),
.ack_o(crd_ack),
.wr_i(we_o),
.adr_i(adr),
/FT64v5/rtl/common/FT64_pic.v
79,7 → 79,7
i8, i9, i10, i11, i12, i13, i14, i15,
i16, i17, i18, i19, i20, i21, i22, i23,
i24, i25, i26, i27, i28, i29, i30, i31,
output [2:0] irqo, // normally connected to the processor irq
output [3:0] irqo, // normally connected to the processor irq
input nmii, // nmi input connected to nmi requester
output nmio, // normally connected to the nmi of cpu
output [6:0] causeo
96,7 → 96,7
reg [31:0] rste;
reg [31:0] es;
reg [5:0] cause_base;
reg [2:0] irq [0:31];
reg [3:0] irq [0:31];
reg [6:0] cause [0:31];
 
wire cs = cyc_i && stb_i && adr_i[31:8]==pIOAddress[31:8];
128,7 → 128,7
6'b1xxxxx:
begin
cause[adr_i[6:2]] <= dat_i[6:0];
irq[adr_i[6:2]] <= dat_i[10:8];
irq[adr_i[6:2]] <= dat_i[11:8];
ie[adr_i[6:2]] <= dat_i[16];
es[adr_i[6:2]] <= dat_i[17];
end
144,7 → 144,7
if (cs)
casex (adr_i[7:2])
6'd0: dat_o <= {cause_base,3'd0} + irqenc;
6'b1xxxxx: dat_o <= {es[adr_i[6:2]],ie[adr_i[6:2]],5'b0,irq[adr_i[6:2]],1'b0,cause[adr_i[6:2]]};
6'b1xxxxx: dat_o <= {es[adr_i[6:2]],ie[adr_i[6:2]],4'b0,irq[adr_i[6:2]],1'b0,cause[adr_i[6:2]]};
default: dat_o <= ie;
endcase
else
151,7 → 151,7
dat_o <= 32'h0000;
end
 
assign irqo = (irqenc == 5'h0) ? 3'd0 : irq[irqenc];
assign irqo = (irqenc == 5'h0) ? 4'd0 : irq[irqenc];
assign causeo = (irqenc == 5'h0) ? 7'd0 : cause[irqenc];
assign nmio = nmii & ie[0];
 
/FT64v5/rtl/twoway/FT64.v
40,17 → 40,8
// Approx. 100,000 LUTs. 160,000 LC's.
// ============================================================================
//
`include "FT64_config.vh"
`include "FT64_defines.vh"
//`define SUPPORT_DBG 1'b1
//`define FULL_ISSUE_LOGIC 1'b1
`define QBITS 2:0
`define QENTRIES 8
`define ID1_AVAIL 1'b1
`define ID2_AVAIL 1'b1
`define ALU0_AVAIL 1'b1
`define ALU1_AVAIL 1'b1
`define FPU1_AVAIL 1'b1
`define FPU2_AVAIL 1'b0
 
module FT64(hartid, rst, clk_i, clk4x, tm_clk_i, irq_i, vec_i, bte_o, cti_o, cyc_o, stb_o, ack_i, err_i, we_o, sel_o, adr_o, dat_o, dat_i,
ol_o, pcr_o, pcr2_o, exv_i, rdv_i, wrv_i, icl_o, sr_o, cr_o, rbi_i, signal_i);
59,7 → 50,7
input clk_i;
input clk4x;
input tm_clk_i;
input [2:0] irq_i;
input [3:0] irq_i;
input [7:0] vec_i;
output reg [1:0] bte_o;
output reg [2:0] cti_o;
69,7 → 60,7
input err_i;
output reg we_o;
output reg [7:0] sel_o;
output reg [31:0] adr_o;
output reg [`ABITS] adr_o;
output reg [63:0] dat_o;
input [63:0] dat_i;
output reg [1:0] ol_o;
179,11 → 170,12
for (n = 0; n < AREGS; n = n + 1)
rf_source[n] = 5'd0;
end
wire [31:0] pc0;
wire [31:0] pc1;
wire [`ABITS] pc0;
wire [`ABITS] pc1;
wire [`ABITS] pc2;
 
reg excmiss;
reg [31:0] excmisspc;
reg [`ABITS] excmisspc;
reg excthrd;
reg exception_set;
reg rdvq; // accumulated read violation
197,6 → 189,7
reg [63:0] vm [0:7]; // vector mask registers
reg [1:0] m2;
 
reg [31:0] wb_merges;
// CSR's
reg [63:0] cr0;
wire snr = cr0[17]; // sequence number reset
203,6 → 196,18
wire dce = cr0[30]; // data cache enable
wire bpe = cr0[32]; // branch predictor enable
wire ctgtxe = cr0[33];
reg [63:0] pmr;
wire id1_available = pmr[0];
wire id2_available = pmr[1];
wire id3_available = pmr[2];
wire alu0_available = pmr[8];
wire alu1_available = pmr[9];
wire fpu1_available = pmr[16];
wire fpu2_available = pmr[17];
wire mem1_available = pmr[24];
wire mem2_available = pmr[25];
wire mem3_available = pmr[26];
wire fcu_available = pmr[32];
// Simply setting this flag to zero should strip out almost all the logic
// associated SMT.
`ifdef SUPPORT_SMT
221,18 → 226,18
reg [63:0] aec;
reg [15:0] cause[0:15];
`ifdef SUPPORT_SMT
reg [31:0] epc [0:NTHREAD];
reg [31:0] epc0 [0:NTHREAD];
reg [31:0] epc1 [0:NTHREAD];
reg [31:0] epc2 [0:NTHREAD];
reg [31:0] epc3 [0:NTHREAD];
reg [31:0] epc4 [0:NTHREAD];
reg [31:0] epc5 [0:NTHREAD];
reg [31:0] epc6 [0:NTHREAD];
reg [31:0] epc7 [0:NTHREAD];
reg [31:0] epc8 [0:NTHREAD]; // exception pc and stack
reg [`ABITS] epc [0:NTHREAD];
reg [`ABITS] epc0 [0:NTHREAD];
reg [`ABITS] epc1 [0:NTHREAD];
reg [`ABITS] epc2 [0:NTHREAD];
reg [`ABITS] epc3 [0:NTHREAD];
reg [`ABITS] epc4 [0:NTHREAD];
reg [`ABITS] epc5 [0:NTHREAD];
reg [`ABITS] epc6 [0:NTHREAD];
reg [`ABITS] epc7 [0:NTHREAD];
reg [`ABITS] epc8 [0:NTHREAD]; // exception pc and stack
reg [63:0] mstatus [0:NTHREAD]; // machine status
wire [2:0] im = mstatus[0][2:0];
wire [3:0] im = mstatus[0][3:0];
wire [1:0] ol [0:NTHREAD];
wire [1:0] dl [0:NTHREAD];
assign ol[0] = mstatus[0][5:3]; // operating level
255,18 → 260,18
//assign ol_o = mprv ? ol_stack[0][2:0] : ol[0];
wire vca = mstatus[0][32]; // vector chaining active
`else
reg [31:0] epc ;
reg [31:0] epc0 ;
reg [31:0] epc1 ;
reg [31:0] epc2 ;
reg [31:0] epc3 ;
reg [31:0] epc4 ;
reg [31:0] epc5 ;
reg [31:0] epc6 ;
reg [31:0] epc7 ;
reg [31:0] epc8 ; // exception pc and stack
reg [`ABITS] epc ;
reg [`ABITS] epc0 ;
reg [`ABITS] epc1 ;
reg [`ABITS] epc2 ;
reg [`ABITS] epc3 ;
reg [`ABITS] epc4 ;
reg [`ABITS] epc5 ;
reg [`ABITS] epc6 ;
reg [`ABITS] epc7 ;
reg [`ABITS] epc8 ; // exception pc and stack
reg [63:0] mstatus ; // machine status
wire [2:0] im = mstatus[2:0];
wire [3:0] im = mstatus[3:0];
wire [1:0] ol ;
wire [1:0] dl;
assign ol = mstatus[5:3]; // operating level
286,8 → 291,8
wire vca = mstatus[32]; // vector chaining active
`endif
reg [63:0] tcb;
reg [31:0] badaddr[0:15];
reg [31:0] tvec[0:7];
reg [`ABITS] badaddr[0:15];
reg [`ABITS] tvec[0:7];
reg [63:0] sema;
reg [63:0] vm_sema;
reg [63:0] cas; // compare and swap
294,84 → 299,156
reg [63:0] ve_hold;
reg isCAS, isAMO, isInc, isSpt, isRMW;
reg [`QBITS] casid;
reg [31:0] sbl, sbu;
reg [`ABITS] sbl, sbu;
reg [4:0] regLR = 5'd29;
 
reg [2:0] fp_rm;
reg fp_inexe;
reg fp_dbzxe;
reg fp_underxe;
reg fp_overxe;
reg fp_invopxe;
reg fp_giopxe;
reg fp_nsfp = 1'b0;
reg fp_fractie;
reg fp_raz;
reg [2:0] fp1_rm;
reg fp1_inexe;
reg fp1_dbzxe;
reg fp1_underxe;
reg fp1_overxe;
reg fp1_invopxe;
reg fp1_giopxe;
reg fp1_nsfp = 1'b0;
reg fp1_fractie;
reg fp1_raz;
 
reg fp_neg;
reg fp_pos;
reg fp_zero;
reg fp_inf;
reg fp1_neg;
reg fp1_pos;
reg fp1_zero;
reg fp1_inf;
 
reg fp_inex; // inexact exception
reg fp_dbzx; // divide by zero exception
reg fp_underx; // underflow exception
reg fp_overx; // overflow exception
reg fp_giopx; // global invalid operation exception
reg fp_sx; // summary exception
reg fp_swtx; // software triggered exception
reg fp_gx;
reg fp_invopx;
reg fp1_inex; // inexact exception
reg fp1_dbzx; // divide by zero exception
reg fp1_underx; // underflow exception
reg fp1_overx; // overflow exception
reg fp1_giopx; // global invalid operation exception
reg fp1_sx; // summary exception
reg fp1_swtx; // software triggered exception
reg fp1_gx;
reg fp1_invopx;
 
reg fp_infzerox;
reg fp_zerozerox;
reg fp_subinfx;
reg fp_infdivx;
reg fp_NaNCmpx;
reg fp_cvtx;
reg fp_sqrtx;
reg fp_snanx;
reg fp1_infzerox;
reg fp1_zerozerox;
reg fp1_subinfx;
reg fp1_infdivx;
reg fp1_NaNCmpx;
reg fp1_cvtx;
reg fp1_sqrtx;
reg fp1_snanx;
 
wire [31:0] fp_status = {
reg [2:0] fp2_rm;
reg fp2_inexe;
reg fp2_dbzxe;
reg fp2_underxe;
reg fp2_overxe;
reg fp2_invopxe;
reg fp2_giopxe;
reg fp2_nsfp = 1'b0;
reg fp2_fractie;
reg fp2_raz;
 
fp_rm,
fp_inexe,
fp_dbzxe,
fp_underxe,
fp_overxe,
fp_invopxe,
fp_nsfp,
reg fp2_neg;
reg fp2_pos;
reg fp2_zero;
reg fp2_inf;
 
fp_fractie,
fp_raz,
reg fp2_inex; // inexact exception
reg fp2_dbzx; // divide by zero exception
reg fp2_underx; // underflow exception
reg fp2_overx; // overflow exception
reg fp2_giopx; // global invalid operation exception
reg fp2_sx; // summary exception
reg fp2_swtx; // software triggered exception
reg fp2_gx;
reg fp2_invopx;
 
reg fp2_infzerox;
reg fp2_zerozerox;
reg fp2_subinfx;
reg fp2_infdivx;
reg fp2_NaNCmpx;
reg fp2_cvtx;
reg fp2_sqrtx;
reg fp2_snanx;
 
wire [31:0] fp1_status = {
 
fp1_rm,
fp1_inexe,
fp1_dbzxe,
fp1_underxe,
fp1_overxe,
fp1_invopxe,
fp1_nsfp,
 
fp1_fractie,
fp1_raz,
1'b0,
fp_neg,
fp_pos,
fp_zero,
fp_inf,
fp1_neg,
fp1_pos,
fp1_zero,
fp1_inf,
 
fp_swtx,
fp_inex,
fp_dbzx,
fp_underx,
fp_overx,
fp_giopx,
fp_gx,
fp_sx,
fp1_swtx,
fp1_inex,
fp1_dbzx,
fp1_underx,
fp1_overx,
fp1_giopx,
fp1_gx,
fp1_sx,
fp_cvtx,
fp_sqrtx,
fp_NaNCmpx,
fp_infzerox,
fp_zerozerox,
fp_infdivx,
fp_subinfx,
fp_snanx
fp1_cvtx,
fp1_sqrtx,
fp1_NaNCmpx,
fp1_infzerox,
fp1_zerozerox,
fp1_infdivx,
fp1_subinfx,
fp1_snanx
};
 
reg [63:0] fpu_csr;
wire [5:0] fp_rgs = fpu_csr[37:32];
wire [31:0] fp2_status = {
 
fp2_rm,
fp2_inexe,
fp2_dbzxe,
fp2_underxe,
fp2_overxe,
fp2_invopxe,
fp2_nsfp,
 
fp2_fractie,
fp2_raz,
1'b0,
fp2_neg,
fp2_pos,
fp2_zero,
fp2_inf,
 
fp2_swtx,
fp2_inex,
fp2_dbzx,
fp2_underx,
fp2_overx,
fp2_giopx,
fp2_gx,
fp2_sx,
fp2_cvtx,
fp2_sqrtx,
fp2_NaNCmpx,
fp2_infzerox,
fp2_zerozerox,
fp2_infdivx,
fp2_subinfx,
fp2_snanx
};
 
reg [63:0] fpu1_csr;
wire [5:0] fp1_rgs = fpu1_csr[37:32];
 
//reg [25:0] m[0:8191];
reg [3:0] panic; // indexes the message structure
reg [128:0] message [0:15]; // indexed by panic
378,8 → 455,8
 
wire int_commit;
reg StatusHWI;
reg [47:0] insn0, insn1;
wire [47:0] insn0a, insn1a, insn1b;
reg [47:0] insn0, insn1, insn2;
wire [47:0] insn0a, insn1a, insn1b, insn2a, insn2b;
reg tgtq;
// Only need enough bits in the seqnence number to cover the instructions in
// the queue plus an extra count for skipping on branch misses. In this case
415,23 → 492,23
reg [QENTRIES-1:0] iqentry_fpu; // floating point instruction
reg [QENTRIES-1:0] iqentry_fc; // flow control instruction
reg [QENTRIES-1:0] iqentry_canex = 8'h00; // true if it's an instruction that can exception
reg iqentry_load [0:QENTRIES-1]; // is a memory load instruction
reg iqentry_preload [0:QENTRIES-1]; // is a memory preload instruction
reg iqentry_ldcmp [0:QENTRIES-1];
reg iqentry_mem [0:QENTRIES-1]; // touches memory: 1 if LW/SW
reg iqentry_memndx [0:QENTRIES-1]; // indexed memory operation
reg iqentry_rmw [0:QENTRIES-1]; // memory RMW op
reg iqentry_memdb [0:QENTRIES-1];
reg iqentry_memsb [0:QENTRIES-1];
reg iqentry_rtop [0:QENTRIES-1];
reg [QENTRIES-1:0] iqentry_load; // is a memory load instruction
reg [QENTRIES-1:0] iqentry_preload; // is a memory preload instruction
reg [QENTRIES-1:0] iqentry_ldcmp;
reg [QENTRIES-1:0] iqentry_mem; // touches memory: 1 if LW/SW
reg [QENTRIES-1:0] iqentry_memndx; // indexed memory operation
reg [QENTRIES-1:0] iqentry_rmw; // memory RMW op
reg [QENTRIES-1:0] iqentry_memdb;
reg [QENTRIES-1:0] iqentry_memsb;
reg [QENTRIES-1:0] iqentry_rtop;
reg [QENTRIES-1:0] iqentry_sei;
reg [QENTRIES-1:0] iqentry_aq; // memory aquire
reg [QENTRIES-1:0] iqentry_rl; // memory release
reg iqentry_shft48[0:QENTRIES-1];
reg iqentry_jmp [0:QENTRIES-1]; // changes control flow: 1 if BEQ/JALR
reg [QENTRIES-1:0] iqentry_shft48;
reg [QENTRIES-1:0] iqentry_jmp; // changes control flow: 1 if BEQ/JALR
reg [QENTRIES-1:0] iqentry_br; // Bcc (for predictor)
reg iqentry_sync [0:QENTRIES-1]; // sync instruction
reg iqentry_fsync[0:QENTRIES-1];
reg [QENTRIES-1:0] iqentry_sync; // sync instruction
reg [QENTRIES-1:0] iqentry_fsync;
reg [QENTRIES-1:0] iqentry_rfw = 8'h00; // writes to register file
reg [7:0] iqentry_we [0:QENTRIES-1]; // enable strobe
reg [63:0] iqentry_res [0:QENTRIES-1]; // instruction result
451,7 → 528,7
reg [63:0] iqentry_a3 [0:QENTRIES-1]; // argument 3
reg iqentry_a3_v [0:QENTRIES-1]; // arg3 valid
reg [4:0] iqentry_a3_s [0:QENTRIES-1]; // arg3 source (iq entry # with top bit representing ALU/DRAM bus)
reg [31:0] iqentry_pc [0:QENTRIES-1]; // program counter for this instruction
reg [`ABITS] iqentry_pc [0:QENTRIES-1]; // program counter for this instruction
reg [RBIT:0] iqentry_Ra [0:QENTRIES-1];
reg [RBIT:0] iqentry_Rb [0:QENTRIES-1];
reg [RBIT:0] iqentry_Rc [0:QENTRIES-1];
477,12 → 554,14
reg [3:0] stompedOnRets;
reg [QENTRIES-1:0] iqentry_alu0_issue;
reg [QENTRIES-1:0] iqentry_alu1_issue;
reg [QENTRIES-1:0] iqentry_alu2_issue;
reg [QENTRIES-1:0] iqentry_id1issue;
reg [QENTRIES-1:0] iqentry_id2issue;
reg [QENTRIES-1:0] iqentry_id3issue;
reg [1:0] iqentry_mem_islot [0:QENTRIES-1];
reg [1:0] iqentry_fpu_islot [0:QENTRIES-1];
reg [QENTRIES-1:0] iqentry_fcu_issue;
reg [QENTRIES-1:0] iqentry_fpu_issue;
reg [QENTRIES-1:0] iqentry_fpu1_issue;
reg [QENTRIES-1:0] iqentry_fpu2_issue;
 
wire [PREGS-1:1] livetarget;
wire [PREGS-1:1] iqentry_0_livetarget;
534,10 → 613,11
 
reg [3:0] nop_fetchbuf;
wire fetchbuf; // determines which pair to read from & write to
wire [3:0] fb_panic;
 
wire [47:0] fetchbuf0_instr;
wire [2:0] fetchbuf0_insln;
wire [31:0] fetchbuf0_pc;
wire [`ABITS] fetchbuf0_pc;
wire fetchbuf0_v;
wire fetchbuf0_thrd;
wire fetchbuf0_mem;
546,7 → 626,7
wire fetchbuf0_rfw;
wire [47:0] fetchbuf1_instr;
wire [2:0] fetchbuf1_insln;
wire [31:0] fetchbuf1_pc;
wire [`ABITS] fetchbuf1_pc;
wire fetchbuf1_v;
wire fetchbuf1_thrd;
wire fetchbuf1_mem;
555,22 → 635,21
wire fetchbuf1_rfw;
 
wire [47:0] fetchbufA_instr;
wire [31:0] fetchbufA_pc;
wire [`ABITS] fetchbufA_pc;
wire fetchbufA_v;
wire [47:0] fetchbufB_instr;
wire [31:0] fetchbufB_pc;
wire [`ABITS] fetchbufB_pc;
wire fetchbufB_v;
wire [47:0] fetchbufC_instr;
wire [31:0] fetchbufC_pc;
wire [`ABITS] fetchbufC_pc;
wire fetchbufC_v;
wire [47:0] fetchbufD_instr;
wire [31:0] fetchbufD_pc;
wire [`ABITS] fetchbufD_pc;
wire fetchbufD_v;
 
//reg did_branchback0;
//reg did_branchback1;
 
reg id1_available;
reg id1_v;
reg [4:0] id1_id;
reg [47:0] id1_instr;
581,7 → 660,6
reg [4:0] id1_Rt;
wire [127:0] id1_bus;
 
reg id2_available;
reg id2_v;
reg [4:0] id2_id;
reg [47:0] id2_instr;
592,8 → 670,17
reg [4:0] id2_Rt;
wire [127:0] id2_bus;
 
reg id3_v;
reg [4:0] id3_id;
reg [47:0] id3_instr;
reg [5:0] id3_ven;
reg [7:0] id3_vl;
reg id3_thrd;
reg id3_pt;
reg [4:0] id3_Rt;
wire [127:0] id3_bus;
 
reg alu0_ld;
reg alu0_available;
reg alu0_dataready;
wire alu0_done;
wire alu0_idle;
609,17 → 696,16
reg [RBIT:0] alu0_tgt;
reg [5:0] alu0_ven;
reg alu0_thrd;
reg [31:0] alu0_pc;
reg [`ABITS] alu0_pc;
wire [63:0] alu0_bus;
wire [63:0] alu0b_bus;
wire [3:0] alu0_id;
wire [8:0] alu0_exc;
wire [`XBITS] alu0_exc;
wire alu0_v;
wire alu0_branchmiss;
wire [31:0] alu0_misspc;
wire [`ABITS] alu0_misspc;
 
reg alu1_ld;
reg alu1_available;
reg alu1_dataready;
wire alu1_done;
wire alu1_idle;
634,35 → 720,52
reg [63:0] alu1_argI; // only used by BEQ
reg [RBIT:0] alu1_tgt;
reg [5:0] alu1_ven;
reg [31:0] alu1_pc;
reg [`ABITS] alu1_pc;
reg alu1_thrd;
wire [63:0] alu1_bus;
wire [63:0] alu1b_bus;
wire [3:0] alu1_id;
wire [8:0] alu1_exc;
wire [`XBITS] alu1_exc;
wire alu1_v;
wire alu1_branchmiss;
wire [31:0] alu1_misspc;
wire [`ABITS] alu1_misspc;
 
reg fpu_ld;
reg fpu1_available = 1'b1;
reg fpu_dataready = 1'b1;
wire fpu_done = 1'b1;
wire fpu_idle;
reg [3:0] fpu_sourceid;
reg [47:0] fpu_instr;
reg [63:0] fpu_argA;
reg [63:0] fpu_argB;
reg [63:0] fpu_argC;
reg [63:0] fpu_argI; // only used by BEQ
reg [RBIT:0] fpu_tgt;
reg [31:0] fpu_pc;
wire [63:0] fpu_bus;
wire [3:0] fpu_id;
wire [8:0] fpu_exc = 9'h000;
wire fpu_v;
wire [31:0] fpu_status;
reg fpu1_ld;
reg fpu1_dataready = 1'b1;
wire fpu1_done = 1'b1;
wire fpu1_idle;
reg [3:0] fpu1_sourceid;
reg [47:0] fpu1_instr;
reg [63:0] fpu1_argA;
reg [63:0] fpu1_argB;
reg [63:0] fpu1_argC;
reg [63:0] fpu1_argI; // only used by BEQ
reg [RBIT:0] fpu1_tgt;
reg [`ABITS] fpu1_pc;
wire [63:0] fpu1_bus;
wire [3:0] fpu1_id;
wire [`XBITS] fpu1_exc = 9'h000;
wire fpu1_v;
wire [31:0] fpu1_status;
 
reg fpu2_ld;
reg fpu2_dataready = 1'b1;
wire fpu2_done = 1'b1;
wire fpu2_idle;
reg [3:0] fpu2_sourceid;
reg [47:0] fpu2_instr;
reg [63:0] fpu2_argA;
reg [63:0] fpu2_argB;
reg [63:0] fpu2_argC;
reg [63:0] fpu2_argI; // only used by BEQ
reg [RBIT:0] fpu2_tgt;
reg [`ABITS] fpu2_pc;
wire [63:0] fpu2_bus;
wire [3:0] fpu2_id;
wire [`XBITS] fpu2_exc = 9'h000;
wire fpu2_v;
wire [31:0] fpu2_status;
 
reg [63:0] waitctr;
reg fcu_ld;
reg fcu_dataready;
679,19 → 782,19
reg [63:0] fcu_argI; // only used by BEQ
reg [63:0] fcu_argT;
reg [63:0] fcu_argT2;
reg [31:0] fcu_retadr;
reg [`ABITS] fcu_retadr;
reg fcu_retadr_v;
reg [31:0] fcu_pc;
reg [31:0] fcu_nextpc;
reg [31:0] fcu_brdisp;
reg [`ABITS] fcu_pc;
reg [`ABITS] fcu_nextpc;
reg [`ABITS] fcu_brdisp;
wire [63:0] fcu_bus;
wire [3:0] fcu_id;
reg [8:0] fcu_exc;
reg [`XBITS] fcu_exc;
wire fcu_v;
reg fcu_thrd;
reg fcu_branchmiss;
reg fcu_clearbm;
reg [31:0] fcu_misspc;
reg [`ABITS] fcu_misspc;
 
reg [63:0] rmw_argA;
reg [63:0] rmw_argB;
699,9 → 802,20
wire [63:0] rmw_res;
reg [31:0] rmw_instr;
 
// write buffer
reg [63:0] wb_data [0:`WB_DEPTH-1];
reg [`ABITS] wb_addr [0:`WB_DEPTH-1];
reg [1:0] wb_ol [0:`WB_DEPTH-1];
reg [`WB_DEPTH-1:0] wb_v;
reg [`WB_DEPTH-1:0] wb_rmw;
reg [QENTRIES-1:0] wb_id [0:`WB_DEPTH-1];
reg [QENTRIES-1:0] wbo_id;
reg [7:0] wb_sel [0:`WB_DEPTH-1];
reg wb_en;
 
reg branchmiss = 1'b0;
reg branchmiss_thrd = 1'b0;
reg [31:0] misspc;
reg [`ABITS] misspc;
reg [`QBITS] missid;
 
wire take_branch;
715,7 → 829,7
reg [2:0] dram1; // state of the DRAM request (latency = 4; can have three in pipeline)
reg [2:0] dram2; // state of the DRAM request (latency = 4; can have three in pipeline)
reg [63:0] dram0_data;
reg [31:0] dram0_addr;
reg [`ABITS] dram0_addr;
reg [31:0] dram0_seg;
reg [47:0] dram0_instr;
reg dram0_rmw;
722,13 → 836,13
reg dram0_preload;
reg [RBIT:0] dram0_tgt;
reg [3:0] dram0_id;
reg [8:0] dram0_exc;
reg [`XBITS] dram0_exc;
reg dram0_unc;
reg [2:0] dram0_memsize;
reg dram0_load; // is a load operation
reg [1:0] dram0_ol;
reg [63:0] dram1_data;
reg [31:0] dram1_addr;
reg [`ABITS] dram1_addr;
reg [31:0] dram1_seg;
reg [47:0] dram1_instr;
reg dram1_rmw;
735,13 → 849,13
reg dram1_preload;
reg [RBIT:0] dram1_tgt;
reg [3:0] dram1_id;
reg [8:0] dram1_exc;
reg [`XBITS] dram1_exc;
reg dram1_unc;
reg [2:0] dram1_memsize;
reg dram1_load;
reg [1:0] dram1_ol;
reg [63:0] dram2_data;
reg [31:0] dram2_addr;
reg [`ABITS] dram2_addr;
reg [31:0] dram2_seg;
reg [47:0] dram2_instr;
reg dram2_rmw;
748,7 → 862,7
reg dram2_preload;
reg [RBIT:0] dram2_tgt;
reg [3:0] dram2_id;
reg [8:0] dram2_exc;
reg [`XBITS] dram2_exc;
reg dram2_unc;
reg [2:0] dram2_memsize;
reg dram2_load;
757,15 → 871,15
reg dramA_v;
reg [3:0] dramA_id;
reg [63:0] dramA_bus;
reg [8:0] dramA_exc;
reg [`XBITS] dramA_exc;
reg dramB_v;
reg [3:0] dramB_id;
reg [63:0] dramB_bus;
reg [8:0] dramB_exc;
reg [`XBITS] dramB_exc;
reg dramC_v;
reg [3:0] dramC_id;
reg [63:0] dramC_bus;
reg [8:0] dramC_exc;
reg [`XBITS] dramC_exc;
 
wire outstanding_stores;
reg [63:0] I; // instruction count
823,20 → 937,21
parameter IC10 = 4'd10;
parameter IC3a = 4'd11;
reg invic, invdc;
reg icwhich,icnxt,L2_nxt;
wire ihit0,ihit1,ihit2;
wire ihit = ihit0&ihit1;
reg [1:0] icwhich;
reg icnxt,L2_nxt;
wire ihit0,ihit1,ihit2,ihitL2;
wire ihit = ihit0&ihit1&ihit2;
reg phit;
wire threadx;
always @*
phit <= ihit&&icstate==IDLE;
reg [2:0] iccnt;
reg L1_wr0,L1_wr1;
reg L1_wr0,L1_wr1,L1_wr2;
reg L1_invline;
reg [9:0] L1_en;
reg [8:0] L1_en;
reg [37:0] L1_adr, L2_adr;
reg [319:0] L2_rdat;
wire [319:0] L2_dato;
reg [287:0] L2_rdat;
wire [287:0] L2_dato;
reg L2_xsel;
 
FT64_regfile2w6r_oc #(.RBIT(RBIT)) urf1
878,12 → 993,29
endcase
endfunction
 
wire [31:0] pc0plus6 = pc0 + 32'd6;
wire [`ABITS] pc0plus6 = pc0 + 32'd6;
wire [`ABITS] pc0plus12 = pc0 + 32'd12;
 
`ifdef SUPPORT_SMT
assign insn1a = insn1b;
generate begin : gInsnVar
if (`WAYS > 1) begin
assign insn1a = insn1b;
end
if (`WAYS > 2) begin
assign insn2a = insn2b;
end
end
endgenerate
`else
assign insn1a = {insn1b,insn0a} >> {fnInsLength(insn0a),3'b0};
generate begin : gInsnVar
if (`WAYS > 1) begin
assign insn1a = {insn1b,insn0a} >> {fnInsLength(insn0a),3'b0};
end
if (`WAYS > 2) begin
assign insn2a = {insn2b,insn1b,insn0a} >> {fnInsLength(insn0a) + fnInsLength(insn1a),3'b0};
end
end
endgenerate
`endif
 
FT64_L1_icache uic0
901,6 → 1033,8
.invall(invic),
.invline(L1_invline)
);
generate begin : gICacheInst
if (`WAYS > 1) begin
FT64_L1_icache uic1
(
.rst(rst),
920,6 → 1054,36
.invall(invic),
.invline(L1_invline)
);
end
else begin
assign ihit1 = 1'b1;
end
if (`WAYS > 2) begin
FT64_L1_icache uic2
(
.rst(rst),
.clk(clk),
.nxt(icnxt),
.wr(L1_wr2),
.en(L1_en),
`ifdef SUPPORT_SMT
.adr(icstate==IDLE||icstate==IC8 ? {pcr[5:0],pc2} : L1_adr),
`else
.adr(icstate==IDLE||icstate==IC8 ? {pcr[5:0],pc0plus12} : L1_adr),
`endif
.wadr(L1_adr),
.i(L2_rdat),
.o(insn2b),
.hit(ihit2),
.invall(invic),
.invline(L1_invline)
);
end
else
assign ihit2 = 1'b1;
end
endgenerate
 
FT64_L2_icache uic2
(
.rst(rst),
933,7 → 1097,7
.i(dat_i),
.err_i(errq),
.o(L2_dato),
.hit(ihit2),
.hit(ihitL2),
.invall(invic),
.invline()
);
950,7 → 1114,7
wire predict_takenC1;
wire predict_takenD1;
 
wire [31:0] btgtA, btgtB, btgtC, btgtD;
wire [`ABITS] btgtA, btgtB, btgtC, btgtD;
wire btbwr0 = iqentry_v[head0] && iqentry_done[head0] &&
(
iqentry_instr[head0][`INSTRUCTION_OP]==`JAL ||
962,49 → 1126,77
iqentry_instr[head1][`INSTRUCTION_OP]==`BRK ||
IsRTI(iqentry_instr[head1]));
 
`ifdef FCU_ENH
wire fcu_clk;
BUFGCE ufcuclk
(
.I(clk_i),
.CE(fcu_available),
.O(fcu_clk)
);
`endif
 
`ifdef FCU_ENH
FT64_BTB ubtb1
(
.rst(rst),
.wclk(clk),
.wr(btbwr0 | btbwr1),
.wadr(btbwr0 ? iqentry_pc[head0] : iqentry_pc[head1]),
.wdat(btbwr0 ? iqentry_a0[head0] : iqentry_a0[head1]),
.valid(btbwr0 ? iqentry_bt[head0] & iqentry_v[head0] : iqentry_bt[head1] & iqentry_v[head1]),
.rclk(~clk),
.pcA(fetchbufA_pc),
.btgtA(btgtA),
.pcB(fetchbufB_pc),
.btgtB(btgtB),
.pcC(fetchbufC_pc),
.btgtC(btgtC),
.pcD(fetchbufD_pc),
.btgtD(btgtD),
.npcA(BRKPC),
.npcB(BRKPC),
.npcC(BRKPC),
.npcD(BRKPC)
.rst(rst),
.wclk(fcu_clk),
.wr(btbwr0 | btbwr1),
.wadr(btbwr0 ? iqentry_pc[head0] : iqentry_pc[head1]),
.wdat(btbwr0 ? iqentry_a0[head0] : iqentry_a0[head1]),
.valid(btbwr0 ? iqentry_bt[head0] & iqentry_v[head0] : iqentry_bt[head1] & iqentry_v[head1]),
.rclk(~clk),
.pcA(fetchbufA_pc),
.btgtA(btgtA),
.pcB(fetchbufB_pc),
.btgtB(btgtB),
.pcC(fetchbufC_pc),
.btgtC(btgtC),
.pcD(fetchbufD_pc),
.btgtD(btgtD),
.npcA(BRKPC),
.npcB(BRKPC),
.npcC(BRKPC),
.npcD(BRKPC)
);
`else
// Branch tergets are picked up by fetchbuf logic and need to be present.
// Without a target predictor they are just set to the reset address.
// This virtually guarentees a miss.
assign btgtA = RSTPC;
assign btgtB = RSTPC;
assign btgtC = RSTPC;
assign btgtD = RSTPC;
`endif
 
`ifdef FCU_ENH
FT64_BranchPredictor ubp1
(
.rst(rst),
.clk(clk),
.en(bpe),
.xisBranch0(iqentry_br[head0] & commit0_v),
.xisBranch1(iqentry_br[head1] & commit1_v),
.pcA(fetchbufA_pc),
.pcB(fetchbufB_pc),
.pcC(fetchbufC_pc),
.pcD(fetchbufD_pc),
.xpc0(iqentry_pc[head0]),
.xpc1(iqentry_pc[head1]),
.takb0(commit0_v & iqentry_res[head0][0]),
.takb1(commit1_v & iqentry_res[head1][0]),
.predict_takenA(predict_takenA),
.predict_takenB(predict_takenB),
.predict_takenC(predict_takenC),
.predict_takenD(predict_takenD)
.rst(rst),
.clk(fcu_clk),
.en(bpe),
.xisBranch0(iqentry_br[head0] & commit0_v),
.xisBranch1(iqentry_br[head1] & commit1_v),
.pcA(fetchbufA_pc),
.pcB(fetchbufB_pc),
.pcC(fetchbufC_pc),
.pcD(fetchbufD_pc),
.xpc0(iqentry_pc[head0]),
.xpc1(iqentry_pc[head1]),
.takb0(commit0_v & iqentry_res[head0][0]),
.takb1(commit1_v & iqentry_res[head1][0]),
.predict_takenA(predict_takenA),
.predict_takenB(predict_takenB),
.predict_takenC(predict_takenC),
.predict_takenD(predict_takenD)
);
`else
// Predict based on sign of displacement
assign predict_takenA = fetchbufA_instr[31];
assign predict_takenB = fetchbufB_instr[31];
assign predict_takenC = fetchbufC_instr[31];
assign predict_takenD = fetchbufD_instr[31];
`endif
 
//-----------------------------------------------------------------------------
// Debug
1249,24 → 1441,40
wire hirq = (irq_i > im) && ~int_commit;
always @*
if (hirq)
insn0 <= {8'd0,4'd0,irq_i,1'b0,vec_i,2'b00,`BRK};
insn0 <= {8'd0,3'd0,irq_i,1'b0,vec_i,2'b00,`BRK};
else if (phit) begin
if (insn0a[`INSTRUCTION_OP]==`BRK && insn0a[23:20]==4'd0)
insn0 <= {8'd1,4'd0,3'b0,1'b0,`FLT_PRIV,2'b00,`BRK};
if (insn0a[`INSTRUCTION_OP]==`BRK && insn0a[23:21]==3'd0 && insn0a[7:6]==2'b00)
insn0 <= {8'd1,3'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK};
else
insn0 <= insn0a;
end
else
insn0 <= `NOP_INSN;
generate begin : gInsnMux
if (`WAYS > 1) begin
always @*
if (phit) begin
if (insn1a[`INSTRUCTION_OP]==`BRK && insn1a[23:20]==4'd0)
insn1 <= {8'd1,4'd0,3'b0,1'b0,`FLT_PRIV,2'b00,`BRK};
if (insn1a[`INSTRUCTION_OP]==`BRK && insn1a[23:21]==3'd0 && insn1a[7:6]==2'b00)
insn1 <= {8'd1,3'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK};
else
insn1 <= insn1a;
end
else
insn1 <= `NOP_INSN;
end
if (`WAYS > 2) begin
always @*
if (phit) begin
if (insn2a[`INSTRUCTION_OP]==`BRK && insn1a[23:21]==3'd0 && insn2a[7:6]==2'b00)
insn2 <= {8'd1,3'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK};
else
insn2 <= insn2a;
end
else
insn2 <= `NOP_INSN;
end
end
endgenerate
 
wire [63:0] dc0_out, dc1_out, dc2_out;
assign rdat0 = dram0_unc ? xdati : dc0_out;
1278,7 → 1486,7
wire dhit0, dhit1, dhit2;
wire dhit00, dhit10, dhit20;
wire dhit01, dhit11, dhit21;
reg [31:0] dc_wadr;
reg [`ABITS] dc_wadr;
reg [63:0] dc_wdat;
reg isStore;
 
1298,6 → 1506,8
.hit0(dhit0),
.hit1()
);
generate begin : gDCacheInst
if (`NUM_MEM > 1) begin
FT64_dcache udc1
(
.rst(rst),
1314,6 → 1524,8
.hit0(dhit1),
.hit1()
);
end
if (`NUM_MEM > 2) begin
FT64_dcache udc2
(
.rst(rst),
1330,6 → 1542,9
.hit0(dhit2),
.hit1()
);
end
end
endgenerate
 
function [`QBITS] idp1;
input [`QBITS] id;
1473,8 → 1688,8
3'd2: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
3'd3: fnRa = {rs_stack[thrd][5:0],1'b0,isn[`INSTRUCTION_RA]};
3'd4: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
3'd5: fnRa = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
3'd6: fnRa = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
3'd5: fnRa = {fp1_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
3'd6: fnRa = {fp1_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
default:fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
endcase
`VMOV:
1484,7 → 1699,7
endcase
default: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
endcase
`FLOAT: fnRa = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
`FLOAT: fnRa = {fp1_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
default: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
endcase
endfunction
1519,7 → 1734,7
fnRb = {isn[25],isn[22]}==2'b00 ? {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]} : {vqei,1'b1,isn[`INSTRUCTION_RB]};
default: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]};
endcase
`FLOAT: fnRb = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
`FLOAT: fnRb = {fp1_rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
default: fnRb = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
endcase
endfunction
1542,7 → 1757,7
`VSxx,`VSxxS,`VSxxU,`VSxxSU: fnRc = {6'h3F,1'b1,2'b0,isn[18:16]};
default: fnRc = {vqei,1'b1,isn[`INSTRUCTION_RC]};
endcase
`FLOAT: fnRc = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RC]};
`FLOAT: fnRc = {fp1_rgs[thrd],1'b0,isn[`INSTRUCTION_RC]};
default: fnRc = {rgs[thrd],1'b0,isn[`INSTRUCTION_RC]};
endcase
endfunction
1578,9 → 1793,9
3'd1: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
3'd2: fnRt = {rs_stack[thrd][5:0],1'b0,isn[`INSTRUCTION_RB]};
3'd3: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
3'd4: fnRt = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
3'd4: fnRt = {fp1_rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
3'd5: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
3'd6: fnRt = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
3'd6: fnRt = {fp1_rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
default:fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
endcase
`VMOV:
1622,7 → 1837,7
`FTX,`FCX,`FEX,`FDX,`FRM:
fnRt = 12'd0;
`FSYNC: fnRt = 12'd0;
default: fnRt = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RC]};
default: fnRt = {fp1_rgs[thrd],1'b0,isn[`INSTRUCTION_RC]};
endcase
`BRK: fnRt = 12'd0;
`REX: fnRt = 12'd0;
1675,8 → 1890,8
3'd2: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]};
3'd3: fnRa = {rs_stack[5:0],1'b0,isn[`INSTRUCTION_RA]};
3'd4: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]};
3'd5: fnRa = {fp_rgs,1'b0,isn[`INSTRUCTION_RA]};
3'd6: fnRa = {fp_rgs,1'b0,isn[`INSTRUCTION_RA]};
3'd5: fnRa = {fp1_rgs,1'b0,isn[`INSTRUCTION_RA]};
3'd6: fnRa = {fp1_rgs,1'b0,isn[`INSTRUCTION_RA]};
default:fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]};
endcase
`VMOV:
1686,7 → 1901,7
endcase
default: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]};
endcase
`FLOAT: fnRa = {fp_rgs,1'b0,isn[`INSTRUCTION_RA]};
`FLOAT: fnRa = {fp1_rgs,1'b0,isn[`INSTRUCTION_RA]};
default: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]};
endcase
endfunction
1721,7 → 1936,7
fnRb = {isn[25],isn[22]}==2'b00 ? {rgs,1'b0,isn[`INSTRUCTION_RB]} : {vqei,1'b1,isn[`INSTRUCTION_RB]};
default: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]};
endcase
`FLOAT: fnRb = {fp_rgs,1'b0,isn[`INSTRUCTION_RB]};
`FLOAT: fnRb = {fp1_rgs,1'b0,isn[`INSTRUCTION_RB]};
default: fnRb = {rgs,1'b0,isn[`INSTRUCTION_RB]};
endcase
endfunction
1744,7 → 1959,7
`VSxx,`VSxxS,`VSxxU,`VSxxSU: fnRc = {6'h3F,1'b1,2'b0,isn[18:16]};
default: fnRc = {vqei,1'b1,isn[`INSTRUCTION_RC]};
endcase
`FLOAT: fnRc = {fp_rgs,1'b0,isn[`INSTRUCTION_RC]};
`FLOAT: fnRc = {fp1_rgs,1'b0,isn[`INSTRUCTION_RC]};
default: fnRc = {rgs,1'b0,isn[`INSTRUCTION_RC]};
endcase
endfunction
1798,9 → 2013,9
3'd1: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RB]};
3'd2: fnRt = {rs_stack[5:0],1'b0,isn[`INSTRUCTION_RB]};
3'd3: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RB]};
3'd4: fnRt = {fp_rgs,1'b0,isn[`INSTRUCTION_RB]};
3'd4: fnRt = {fp1_rgs,1'b0,isn[`INSTRUCTION_RB]};
3'd5: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RB]};
3'd6: fnRt = {fp_rgs,1'b0,isn[`INSTRUCTION_RB]};
3'd6: fnRt = {fp1_rgs,1'b0,isn[`INSTRUCTION_RB]};
default:fnRt = {rgs,1'b0,isn[`INSTRUCTION_RB]};
endcase
`VMOV:
1842,7 → 2057,7
`FTX,`FCX,`FEX,`FDX,`FRM:
fnRt = 12'd0;
`FSYNC: fnRt = 12'd0;
default: fnRt = {fp_rgs,1'b0,isn[`INSTRUCTION_RC]};
default: fnRt = {fp1_rgs,1'b0,isn[`INSTRUCTION_RC]};
endcase
`BRK: fnRt = 12'd0;
`REX: fnRt = 12'd0;
2481,7 → 2696,7
input [47:0] isn;
casez(isn[`INSTRUCTION_OP])
`BRK: IsFlowCtrl = TRUE;
`RR: case(isn[`INSTRUCTION_S2])
`R2: case(isn[`INSTRUCTION_S2])
`RTI: IsFlowCtrl = TRUE;
default: IsFlowCtrl = FALSE;
endcase
2539,11 → 2754,6
IsFSync = (isn[`INSTRUCTION_OP]==`FLOAT && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`FSYNC);
endfunction
 
function IsMemdb;
input [47:0] isn;
IsMemdb = (isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`R1 && isn[22:18]==`MEMDB);
endfunction
 
function IsMemsb;
input [47:0] isn;
IsMemsb = (isn[`INSTRUCTION_OP]==`RR && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`R1 && isn[22:18]==`MEMSB);
2737,7 → 2947,7
 
function [7:0] fnSelect;
input [47:0] ins;
input [31:0] adr;
input [`ABITS] adr;
begin
case(ins[`INSTRUCTION_OP])
`MEMNDX:
2907,7 → 3117,7
*/
function [63:0] fnDati;
input [47:0] ins;
input [31:0] adr;
input [`ABITS] adr;
input [63:0] dat;
case(ins[`INSTRUCTION_OP])
`MEMNDX:
3141,7 → 3351,7
fnDato = dat;
`SB: fnDato = {8{dat[7:0]}};
`Sx:
case(isn[20:18])
casez(isn[20:18])
3'b100: fnDato = dat;
3'b?10: fnDato = {2{dat[31:0]}};
3'b??1: fnDato = {4{dat[15:0]}};
3233,70 → 3443,72
 
FT64_fetchbuf #(AMSB,RSTPC) ufb1
(
.rst(rst),
.clk4x(clk4x),
.clk(clk),
.cs_i(adr_o[31:16]==16'hFFFF),
.cyc_i(cyc_o),
.stb_i(stb_o),
.ack_o(dc_ack),
.we_i(we_o),
.adr_i(adr_o[15:0]),
.dat_i(dat_o[31:0]),
.hirq(hirq),
.regLR(regLR),
.thread_en(thread_en),
.insn0(insn0),
.insn1(insn1),
.phit(phit),
.threadx(threadx),
.branchmiss(branchmiss),
.misspc(misspc),
.branchmiss_thrd(branchmiss_thrd),
.predict_takenA(predict_takenA),
.predict_takenB(predict_takenB),
.predict_takenC(predict_takenC),
.predict_takenD(predict_takenD),
.predict_taken0(predict_taken0),
.predict_taken1(predict_taken1),
.queued1(queued1),
.queued2(queued2),
.queuedNop(queuedNop),
.pc0(pc0),
.pc1(pc1),
.fetchbuf(fetchbuf),
.fetchbufA_v(fetchbufA_v),
.fetchbufB_v(fetchbufB_v),
.fetchbufC_v(fetchbufC_v),
.fetchbufD_v(fetchbufD_v),
.fetchbufA_pc(fetchbufA_pc),
.fetchbufB_pc(fetchbufB_pc),
.fetchbufC_pc(fetchbufC_pc),
.fetchbufD_pc(fetchbufD_pc),
.fetchbufA_instr(fetchbufA_instr),
.fetchbufB_instr(fetchbufB_instr),
.fetchbufC_instr(fetchbufC_instr),
.fetchbufD_instr(fetchbufD_instr),
.fetchbuf0_instr(fetchbuf0_instr),
.fetchbuf1_instr(fetchbuf1_instr),
.fetchbuf0_thrd(fetchbuf0_thrd),
.fetchbuf1_thrd(fetchbuf1_thrd),
.fetchbuf0_pc(fetchbuf0_pc),
.fetchbuf1_pc(fetchbuf1_pc),
.fetchbuf0_v(fetchbuf0_v),
.fetchbuf1_v(fetchbuf1_v),
.fetchbuf0_insln(fetchbuf0_insln),
.fetchbuf1_insln(fetchbuf1_insln),
.codebuf0(codebuf[insn0[21:16]]),
.codebuf1(codebuf[insn1[21:16]]),
.btgtA(btgtA),
.btgtB(btgtB),
.btgtC(btgtC),
.btgtD(btgtD),
.nop_fetchbuf(nop_fetchbuf),
.take_branch0(take_branch0),
.take_branch1(take_branch1),
.stompedRets(stompedOnRets)
.rst(rst),
.clk4x(clk4x),
.clk(clk),
.fcu_clk(fcu_clk),
.cs_i(adr_o[31:16]==16'hFFFF),
.cyc_i(cyc_o),
.stb_i(stb_o),
.ack_o(dc_ack),
.we_i(we_o),
.adr_i(adr_o[15:0]),
.dat_i(dat_o[31:0]),
.hirq(hirq),
.regLR(regLR),
.thread_en(thread_en),
.insn0(insn0),
.insn1(insn1),
.phit(phit),
.threadx(threadx),
.branchmiss(branchmiss),
.misspc(misspc),
.branchmiss_thrd(branchmiss_thrd),
.predict_takenA(predict_takenA),
.predict_takenB(predict_takenB),
.predict_takenC(predict_takenC),
.predict_takenD(predict_takenD),
.predict_taken0(predict_taken0),
.predict_taken1(predict_taken1),
.queued1(queued1),
.queued2(queued2),
.queuedNop(queuedNop),
.pc0(pc0),
.pc1(pc1),
.fetchbuf(fetchbuf),
.fetchbufA_v(fetchbufA_v),
.fetchbufB_v(fetchbufB_v),
.fetchbufC_v(fetchbufC_v),
.fetchbufD_v(fetchbufD_v),
.fetchbufA_pc(fetchbufA_pc),
.fetchbufB_pc(fetchbufB_pc),
.fetchbufC_pc(fetchbufC_pc),
.fetchbufD_pc(fetchbufD_pc),
.fetchbufA_instr(fetchbufA_instr),
.fetchbufB_instr(fetchbufB_instr),
.fetchbufC_instr(fetchbufC_instr),
.fetchbufD_instr(fetchbufD_instr),
.fetchbuf0_instr(fetchbuf0_instr),
.fetchbuf1_instr(fetchbuf1_instr),
.fetchbuf0_thrd(fetchbuf0_thrd),
.fetchbuf1_thrd(fetchbuf1_thrd),
.fetchbuf0_pc(fetchbuf0_pc),
.fetchbuf1_pc(fetchbuf1_pc),
.fetchbuf0_v(fetchbuf0_v),
.fetchbuf1_v(fetchbuf1_v),
.fetchbuf0_insln(fetchbuf0_insln),
.fetchbuf1_insln(fetchbuf1_insln),
.codebuf0(codebuf[insn0[21:16]]),
.codebuf1(codebuf[insn1[21:16]]),
.btgtA(btgtA),
.btgtB(btgtB),
.btgtC(btgtC),
.btgtD(btgtD),
.nop_fetchbuf(nop_fetchbuf),
.take_branch0(take_branch0),
.take_branch1(take_branch1),
.stompedRets(stompedOnRets),
.panic(fb_panic)
);
 
 
3676,14 → 3888,14
? iqentry_7_livetarget
: {PREGS{1'b0}};
 
assign iqentry_source[0] = | iqentry_0_latestID,
iqentry_source[1] = | iqentry_1_latestID,
iqentry_source[2] = | iqentry_2_latestID,
iqentry_source[3] = | iqentry_3_latestID,
iqentry_source[4] = | iqentry_4_latestID,
iqentry_source[5] = | iqentry_5_latestID,
iqentry_source[6] = | iqentry_6_latestID,
iqentry_source[7] = | iqentry_7_latestID;
assign iqentry_source[0] = | iqentry_0_latestID,
iqentry_source[1] = | iqentry_1_latestID,
iqentry_source[2] = | iqentry_2_latestID,
iqentry_source[3] = | iqentry_3_latestID,
iqentry_source[4] = | iqentry_4_latestID,
iqentry_source[5] = | iqentry_5_latestID,
iqentry_source[6] = | iqentry_6_latestID,
iqentry_source[7] = | iqentry_7_latestID;
 
 
reg vqueued2;
3696,16 → 3908,16
assign Rc1 = fnRc(fetchbuf1_instr,vqueued2 ? vqe0 + 1 : vqe1,fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0};
assign Rt1 = fnRt(fetchbuf1_instr,vqueued2 ? vqet0 + 1 : vqet1,vl,fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0};
 
//
// additional logic for ISSUE
//
// for the moment, we look at ALU-input buffers to allow back-to-back issue of
// dependent instructions ... we do not, however, look ahead for DRAM requests
// that will become valid in the next cycle. instead, these have to propagate
// their results into the IQ entry directly, at which point it becomes issue-able
//
//
// additional logic for ISSUE
//
// for the moment, we look at ALU-input buffers to allow back-to-back issue of
// dependent instructions ... we do not, however, look ahead for DRAM requests
// that will become valid in the next cycle. instead, these have to propagate
// their results into the IQ entry directly, at which point it becomes issue-able
//
 
// note that, for all intents & purposes, iqentry_done == iqentry_agen ... no need to duplicate
// note that, for all intents & purposes, iqentry_done == iqentry_agen ... no need to duplicate
 
wire [QENTRIES-1:0] args_valid;
wire [QENTRIES-1:0] could_issue;
3716,16 → 3928,27
begin
assign args_valid[g] =
(iqentry_a1_v[g]
`ifdef FU_BYPASS
|| (iqentry_a1_s[g] == alu0_sourceid && alu0_dataready)
|| (iqentry_a1_s[g] == alu1_sourceid && alu1_dataready))
|| ((iqentry_a1_s[g] == alu1_sourceid && alu1_dataready) && (`NUM_ALU > 1))
|| ((iqentry_a1_s[g] == fpu1_sourceid && fpu1_dataready) && (`NUM_FPU > 0))
`endif
)
&& (iqentry_a2_v[g]
|| (iqentry_mem[g] & ~iqentry_agen[g] & ~iqentry_memndx[g]) // a2 needs to be valid for indexed instruction
`ifdef FU_BYPASS
|| (iqentry_a2_s[g] == alu0_sourceid && alu0_dataready)
|| (iqentry_a2_s[g] == alu1_sourceid && alu1_dataready))
|| ((iqentry_a2_s[g] == alu1_sourceid && alu1_dataready) && (`NUM_ALU > 1))
|| ((iqentry_a2_s[g] == fpu1_sourceid && fpu1_dataready) && (`NUM_FPU > 0))
`endif
)
&& (iqentry_a3_v[g]
// || (iqentry_mem[g] & ~iqentry_agen[g])
`ifdef FU_BYPASS
|| (iqentry_a3_s[g] == alu0_sourceid && alu0_dataready)
|| (iqentry_a3_s[g] == alu1_sourceid && alu1_dataready))
|| ((iqentry_a3_s[g] == alu1_sourceid && alu1_dataready) && (`NUM_ALU > 1))
`endif
)
;
 
assign could_issue[g] = iqentry_v[g] && !iqentry_done[g] && !iqentry_out[g]
3761,30 → 3984,47
 
always @*
begin
iqentry_id1issue = 8'h00;
iqentry_id2issue = 8'h00;
// aluissue is a task
iqentry_id1issue = {QENTRIES{1'b0}};
if (id1_available) begin
for (n = 0; n < QENTRIES; n = n + 1)
if (could_issueid[heads[n]] && iqentry_id1issue==8'h00)
if (could_issueid[heads[n]] && iqentry_id1issue=={QENTRIES{1'b0}})
iqentry_id1issue[heads[n]] = `TRUE;
end
 
if (id2_available) begin
for (n = 0; n < QENTRIES; n = n + 1)
if (could_issueid[heads[n]] && !iqentry_id1issue[heads[n]] && iqentry_id2issue==8'h00)
iqentry_id2issue[heads[n]] = `TRUE;
end
generate begin : gIDUIssue
if (`NUM_IDU > 1) begin
always @*
begin
iqentry_id2issue = {QENTRIES{1'b0}};
if (id2_available) begin
for (n = 0; n < QENTRIES; n = n + 1)
if (could_issueid[heads[n]] && !iqentry_id1issue[heads[n]] && iqentry_id2issue=={QENTRIES{1'b0}})
iqentry_id2issue[heads[n]] = `TRUE;
end
end
end
if (`NUM_IDU > 2) begin
always @*
begin
iqentry_id3issue = {QENTRIES{1'b0}};
if (id3_available) begin
for (n = 0; n < QENTRIES; n = n + 1)
if (could_issueid[heads[n]]
&& !iqentry_id1issue[heads[n]]
&& !iqentry_id2issue[heads[n]]
&& iqentry_id3issue=={QENTRIES{1'b0}})
iqentry_id3issue[heads[n]] = `TRUE;
end
end
end
end
endgenerate
 
always @*
begin
iqentry_alu0_issue = 8'h00;
iqentry_alu1_issue = 8'h00;
iqentry_alu0_issue = {QENTRIES{1'b0}};
iqentry_alu1_issue = {QENTRIES{1'b0}};
// aluissue is a task
// aluissue is a task
if (alu0_available & alu0_idle) begin
if (could_issue[head0] && iqentry_alu[head0]) begin
iqentry_alu0_issue[head0] = `TRUE;
3908,7 → 4148,7
`endif
end
 
if (alu1_available & alu1_idle) begin
if (alu1_available && alu1_idle && `NUM_ALU > 1) begin
if ((could_issue & ~iqentry_alu0_issue & ~iqentry_alu0) != 8'h00) begin
if (could_issue[head0] && iqentry_alu[head0]
&& !iqentry_alu0[head0] // alu0only
4047,23 → 4287,20
 
always @*
begin
iqentry_fpu_issue = 8'h00;
// fpuissue(fpu_idle,2'b00);
if (fpu_idle) begin
iqentry_fpu1_issue = {QENTRIES{1'b0}};
// fpu1issue(fpu1_idle,2'b00);
if (fpu1_idle && `NUM_FPU > 0) begin
if (could_issue[head0] && iqentry_fpu[head0]) begin
iqentry_fpu_issue[head0] = `TRUE;
iqentry_fpu_islot[head0] = 2'b00;
iqentry_fpu1_issue[head0] = `TRUE;
end
else if (could_issue[head1] && iqentry_fpu[head1])
begin
iqentry_fpu_issue[head1] = `TRUE;
iqentry_fpu_islot[head1] = 2'b00;
iqentry_fpu1_issue[head1] = `TRUE;
end
else if (could_issue[head2] && iqentry_fpu[head2]
&& (!(iqentry_v[head1] && (iqentry_sync[head1] || iqentry_fsync[head1])) || !iqentry_v[head0])
) begin
iqentry_fpu_issue[head2] = `TRUE;
iqentry_fpu_islot[head2] = 2'b00;
iqentry_fpu1_issue[head2] = `TRUE;
end
else if (could_issue[head3] && iqentry_fpu[head3]
&& (!(iqentry_v[head1] && (iqentry_sync[head1] || iqentry_fsync[head1])) || !iqentry_v[head0])
4072,8 → 4309,7
&& (!iqentry_v[head1]))
)
) begin
iqentry_fpu_issue[head3] = `TRUE;
iqentry_fpu_islot[head3] = 2'b00;
iqentry_fpu1_issue[head3] = `TRUE;
end
else if (could_issue[head4] && iqentry_fpu[head4]
&& (!(iqentry_v[head1] && (iqentry_sync[head1] || iqentry_fsync[head1])) || !iqentry_v[head0])
4087,8 → 4323,7
&& (!iqentry_v[head2]))
)
) begin
iqentry_fpu_issue[head4] = `TRUE;
iqentry_fpu_islot[head4] = 2'b00;
iqentry_fpu1_issue[head4] = `TRUE;
end
else if (could_issue[head5] && iqentry_fpu[head5]
&& (!(iqentry_v[head1] && (iqentry_sync[head1] || iqentry_fsync[head1])) || !iqentry_v[head0])
4108,8 → 4343,7
&& (!iqentry_v[head3]))
)
) begin
iqentry_fpu_issue[head5] = `TRUE;
iqentry_fpu_islot[head5] = 2'b00;
iqentry_fpu1_issue[head5] = `TRUE;
end
`ifdef FULL_ISSUE_LOGIC
else if (could_issue[head6] && iqentry_fpu[head6]
4137,8 → 4371,7
&& (!iqentry_v[head4]))
)
) begin
iqentry_fpu_issue[head6] = `TRUE;
iqentry_fpu_islot[head6] = 2'b00;
iqentry_fpu1_issue[head6] = `TRUE;
end
else if (could_issue[head7] && iqentry_fpu[head7]
&& (!(iqentry_v[head1] && (iqentry_sync[head1] || iqentry_fsync[head1])) || !iqentry_v[head0])
4174,13 → 4407,139
)
)
begin
iqentry_fpu_issue[head7] = `TRUE;
iqentry_fpu_islot[head7] = 2'b00;
iqentry_fpu1_issue[head7] = `TRUE;
end
`endif
end
end
 
always @*
begin
iqentry_fpu2_issue = {QENTRIES{1'b0}};
// fpu2issue(fpu2_idle,2'b00);
if (fpu2_idle && `NUM_FPU > 1) begin
if (could_issue[head0] && iqentry_fpu[head0] && !iqentry_fpu1_issue[head0]) begin
iqentry_fpu2_issue[head0] = `TRUE;
end
else if (could_issue[head1] && iqentry_fpu[head1] && !iqentry_fpu1_issue[head1])
begin
iqentry_fpu2_issue[head1] = `TRUE;
end
else if (could_issue[head2] && iqentry_fpu[head2] && !iqentry_fpu1_issue[head2]
&& (!(iqentry_v[head1] && (iqentry_sync[head1] || iqentry_fsync[head1])) || !iqentry_v[head0])
) begin
iqentry_fpu2_issue[head2] = `TRUE;
end
else if (could_issue[head3] && iqentry_fpu[head3] && !iqentry_fpu1_issue[head3]
&& (!(iqentry_v[head1] && (iqentry_sync[head1] || iqentry_fsync[head1])) || !iqentry_v[head0])
&& (!(iqentry_v[head2] && (iqentry_sync[head2] || iqentry_fsync[head2])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1]))
)
) begin
iqentry_fpu2_issue[head3] = `TRUE;
end
else if (could_issue[head4] && iqentry_fpu[head4] && !iqentry_fpu1_issue[head4]
&& (!(iqentry_v[head1] && (iqentry_sync[head1] || iqentry_fsync[head1])) || !iqentry_v[head0])
&& (!(iqentry_v[head2] && (iqentry_sync[head2] || iqentry_fsync[head2])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1]))
)
&& (!(iqentry_v[head3] && (iqentry_sync[head3] || iqentry_fsync[head3])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1])
&& (!iqentry_v[head2]))
)
) begin
iqentry_fpu2_issue[head4] = `TRUE;
end
else if (could_issue[head5] && iqentry_fpu[head5] && !iqentry_fpu1_issue[head5]
&& (!(iqentry_v[head1] && (iqentry_sync[head1] || iqentry_fsync[head1])) || !iqentry_v[head0])
&& (!(iqentry_v[head2] && (iqentry_sync[head2] || iqentry_fsync[head2])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1]))
)
&& (!(iqentry_v[head3] && (iqentry_sync[head3] || iqentry_fsync[head3])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1])
&& (!iqentry_v[head2]))
)
&& (!(iqentry_v[head4] && (iqentry_sync[head4] || iqentry_fsync[head4])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1])
&& (!iqentry_v[head2])
&& (!iqentry_v[head3]))
)
) begin
iqentry_fpu2_issue[head5] = `TRUE;
end
`ifdef FULL_ISSUE_LOGIC
else if (could_issue[head6] && iqentry_fpu[head6] && !iqentry_fpu1_issue[head6]
&& (!(iqentry_v[head1] && (iqentry_sync[head1] || iqentry_fsync[head1])) || !iqentry_v[head0])
&& (!(iqentry_v[head2] && (iqentry_sync[head2] || iqentry_fsync[head2])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1]))
)
&& (!(iqentry_v[head3] && (iqentry_sync[head3] || iqentry_fsync[head3])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1])
&& (!iqentry_v[head2]))
)
&& (!(iqentry_v[head4] && (iqentry_sync[head4] || iqentry_fsync[head4])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1])
&& (!iqentry_v[head2])
&& (!iqentry_v[head3]))
)
&& (!(iqentry_v[head5] && (iqentry_sync[head5] || iqentry_fsync[head5])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1])
&& (!iqentry_v[head2])
&& (!iqentry_v[head3])
&& (!iqentry_v[head4]))
)
) begin
iqentry_fpu2_issue[head6] = `TRUE;
end
else if (could_issue[head7] && iqentry_fpu[head7] && !iqentry_fpu1_issue[head7]
&& (!(iqentry_v[head1] && (iqentry_sync[head1] || iqentry_fsync[head1])) || !iqentry_v[head0])
&& (!(iqentry_v[head2] && (iqentry_sync[head2] || iqentry_fsync[head2])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1]))
)
&& (!(iqentry_v[head3] && (iqentry_sync[head3] || iqentry_fsync[head3])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1])
&& (!iqentry_v[head2]))
)
&& (!(iqentry_v[head4] && (iqentry_sync[head4] || iqentry_fsync[head4])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1])
&& (!iqentry_v[head2])
&& (!iqentry_v[head3]))
)
&& (!(iqentry_v[head5] && (iqentry_sync[head5] || iqentry_fsync[head5])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1])
&& (!iqentry_v[head2])
&& (!iqentry_v[head3])
&& (!iqentry_v[head4]))
)
&& (!(iqentry_v[head6] && (iqentry_sync[head6] || iqentry_fsync[head6])) ||
((!iqentry_v[head0])
&& (!iqentry_v[head1])
&& (!iqentry_v[head2])
&& (!iqentry_v[head3])
&& (!iqentry_v[head4])
&& (!iqentry_v[head5]))
)
)
begin
iqentry_fpu2_issue[head7] = `TRUE;
end
`endif
end
end
 
wire [QENTRIES-1:0] nextqd;
// Next queue id
4373,7 → 4732,7
// However, if the queue is full then issue anyway. A branch miss will likely occur.
always @*//(could_issue or head0 or head1 or head2 or head3 or head4 or head5 or head6 or head7)
begin
iqentry_fcu_issue = 8'h00;
iqentry_fcu_issue = {QENTRIES{1'b0}};
if (fcu_done) begin
if (could_issue[head0] && iqentry_fc[head0] && nextqd[head0]) begin
iqentry_fcu_issue[head0] = `TRUE;
4511,11 → 4870,12
issue_count = issue_count + 1;
 
memissue[ head1 ] = ~iqentry_stomp[head1] && iqentry_memready[ head1 ] // addr and data are valid
&& issue_count < `NUM_MEM
// ... and no preceding instruction is ready to go
//&& ~iqentry_memready[head0]
// ... and there is no address-overlap with any preceding instruction
&& (!iqentry_mem[head0] || (iqentry_agen[head0] & iqentry_out[head0])
|| (iqentry_a1_v[head0] && iqentry_a1[head1] != iqentry_a1[head0]))
|| (iqentry_a1_v[head0] && iqentry_a1[head1][AMSB:3] != iqentry_a1[head0][AMSB:3]))
// ... if a release, any prior memory ops must be done before this one
&& (iqentry_rl[head1] ? iqentry_done[head0] || !iqentry_v[head0] || !iqentry_mem[head0] : 1'b1)
// ... if a preivous op has the aquire bit set
4528,13 → 4888,14
 
memissue[ head2 ] = ~iqentry_stomp[head2] && iqentry_memready[ head2 ] // addr and data are valid
// ... and no preceding instruction is ready to go
&& issue_count < `NUM_MEM
//&& ~iqentry_memready[head0]
//&& ~iqentry_memready[head1]
// ... and there is no address-overlap with any preceding instruction
&& (!iqentry_mem[head0] || (iqentry_agen[head0] & iqentry_out[head0])
|| (iqentry_a1_v[head0] && iqentry_a1[head2] != iqentry_a1[head0]))
|| (iqentry_a1_v[head0] && iqentry_a1[head2][AMSB:3] != iqentry_a1[head0][AMSB:3]))
&& (!iqentry_mem[head1] || (iqentry_agen[head1] & iqentry_out[head1])
|| (iqentry_a1_v[head1] && iqentry_a1[head2] != iqentry_a1[head1]))
|| (iqentry_a1_v[head1] && iqentry_a1[head2][AMSB:3] != iqentry_a1[head1][AMSB:3]))
// ... if a release, any prior memory ops must be done before this one
&& (iqentry_rl[head2] ? (iqentry_done[head0] || !iqentry_v[head0] || !iqentry_mem[head0])
&& (iqentry_done[head1] || !iqentry_v[head1] || !iqentry_mem[head1])
4543,8 → 4904,8
&& !(iqentry_aq[head0] && iqentry_v[head0])
&& !(iqentry_aq[head1] && iqentry_v[head1])
// ... and there isn't a barrier, or everything before the barrier is done or invalid
&& (!(iqentry_v[head1] && iqentry_memsb[head1]) || (iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_v[head1] && iqentry_memdb[head1]) || (!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_iv[head1] && iqentry_memsb[head1]) || (iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_iv[head1] && iqentry_memdb[head1]) || (!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0]))
// ... and, if it is a SW, there is no chance of it being undone
&& (iqentry_load[head2] ||
!(iqentry_fc[head0]||iqentry_canex[head0])
4554,17 → 4915,17
memissue[ head3 ] = ~iqentry_stomp[head3] && iqentry_memready[ head3 ] // addr and data are valid
// ... and no preceding instruction is ready to go
&& issue_count < 3
&& issue_count < `NUM_MEM
//&& ~iqentry_memready[head0]
//&& ~iqentry_memready[head1]
//&& ~iqentry_memready[head2]
// ... and there is no address-overlap with any preceding instruction
&& (!iqentry_mem[head0] || (iqentry_agen[head0] & iqentry_out[head0])
|| (iqentry_a1_v[head0] && iqentry_a1[head3] != iqentry_a1[head0]))
|| (iqentry_a1_v[head0] && iqentry_a1[head3][AMSB:3] != iqentry_a1[head0][AMSB:3]))
&& (!iqentry_mem[head1] || (iqentry_agen[head1] & iqentry_out[head1])
|| (iqentry_a1_v[head1] && iqentry_a1[head3] != iqentry_a1[head1]))
|| (iqentry_a1_v[head1] && iqentry_a1[head3][AMSB:3] != iqentry_a1[head1][AMSB:3]))
&& (!iqentry_mem[head2] || (iqentry_agen[head2] & iqentry_out[head2])
|| (iqentry_a1_v[head2] && iqentry_a1[head3] != iqentry_a1[head2]))
|| (iqentry_a1_v[head2] && iqentry_a1[head3][AMSB:3] != iqentry_a1[head2][AMSB:3]))
// ... if a release, any prior memory ops must be done before this one
&& (iqentry_rl[head3] ? (iqentry_done[head0] || !iqentry_v[head0] || !iqentry_mem[head0])
&& (iqentry_done[head1] || !iqentry_v[head1] || !iqentry_mem[head1])
4575,13 → 4936,13
&& !(iqentry_aq[head1] && iqentry_v[head1])
&& !(iqentry_aq[head2] && iqentry_v[head2])
// ... and there isn't a barrier, or everything before the barrier is done or invalid
&& (!(iqentry_v[head1] && iqentry_memsb[head1]) || (iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_v[head2] && iqentry_memsb[head2]) ||
&& (!(iqentry_iv[head1] && iqentry_memsb[head1]) || (iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_iv[head2] && iqentry_memsb[head2]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1]))
)
&& (!(iqentry_v[head1] && iqentry_memdb[head1]) || (!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_v[head2] && iqentry_memdb[head2]) ||
&& (!(iqentry_iv[head1] && iqentry_memdb[head1]) || (!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_iv[head2] && iqentry_memdb[head2]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1]))
)
4595,7 → 4956,7
 
memissue[ head4 ] = ~iqentry_stomp[head4] && iqentry_memready[ head4 ] // addr and data are valid
// ... and no preceding instruction is ready to go
&& issue_count < 3
&& issue_count < `NUM_MEM
//&& ~iqentry_memready[head0]
//&& ~iqentry_memready[head1]
//&& ~iqentry_memready[head2]
4602,13 → 4963,13
//&& ~iqentry_memready[head3]
// ... and there is no address-overlap with any preceding instruction
&& (!iqentry_mem[head0] || (iqentry_agen[head0] & iqentry_out[head0])
|| (iqentry_a1_v[head0] && iqentry_a1[head4] != iqentry_a1[head0]))
|| (iqentry_a1_v[head0] && iqentry_a1[head4][AMSB:3] != iqentry_a1[head0][AMSB:3]))
&& (!iqentry_mem[head1] || (iqentry_agen[head1] & iqentry_out[head1])
|| (iqentry_a1_v[head1] && iqentry_a1[head4] != iqentry_a1[head1]))
|| (iqentry_a1_v[head1] && iqentry_a1[head4][AMSB:3] != iqentry_a1[head1][AMSB:3]))
&& (!iqentry_mem[head2] || (iqentry_agen[head2] & iqentry_out[head2])
|| (iqentry_a1_v[head2] && iqentry_a1[head4] != iqentry_a1[head2]))
|| (iqentry_a1_v[head2] && iqentry_a1[head4][AMSB:3] != iqentry_a1[head2][AMSB:3]))
&& (!iqentry_mem[head3] || (iqentry_agen[head3] & iqentry_out[head3])
|| (iqentry_a1_v[head3] && iqentry_a1[head4] != iqentry_a1[head3]))
|| (iqentry_a1_v[head3] && iqentry_a1[head4][AMSB:3] != iqentry_a1[head3][AMSB:3]))
// ... if a release, any prior memory ops must be done before this one
&& (iqentry_rl[head4] ? (iqentry_done[head0] || !iqentry_v[head0] || !iqentry_mem[head0])
&& (iqentry_done[head1] || !iqentry_v[head1] || !iqentry_mem[head1])
4621,22 → 4982,22
&& !(iqentry_aq[head2] && iqentry_v[head2])
&& !(iqentry_aq[head3] && iqentry_v[head3])
// ... and there isn't a barrier, or everything before the barrier is done or invalid
&& (!(iqentry_v[head1] && iqentry_memsb[head1]) || (iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_v[head2] && iqentry_memsb[head2]) ||
&& (!(iqentry_iv[head1] && iqentry_memsb[head1]) || (iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_iv[head2] && iqentry_memsb[head2]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1]))
)
&& (!(iqentry_v[head3] && iqentry_memsb[head3]) ||
&& (!(iqentry_iv[head3] && iqentry_memsb[head3]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1])
&& (iqentry_done[head2] || !iqentry_v[head2]))
)
&& (!(iqentry_v[head1] && iqentry_memdb[head1]) || (!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_v[head2] && iqentry_memdb[head2]) ||
&& (!(iqentry_iv[head2] && iqentry_memdb[head2]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1]))
)
&& (!(iqentry_v[head3] && iqentry_memdb[head3]) ||
&& (!(iqentry_iv[head3] && iqentry_memdb[head3]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1])
&& (!iqentry_mem[head2] || iqentry_done[head2] || !iqentry_v[head2]))
4652,7 → 5013,7
 
memissue[ head5 ] = ~iqentry_stomp[head5] && iqentry_memready[ head5 ] // addr and data are valid
// ... and no preceding instruction is ready to go
&& issue_count < 3
&& issue_count < `NUM_MEM
//&& ~iqentry_memready[head0]
//&& ~iqentry_memready[head1]
//&& ~iqentry_memready[head2]
4660,15 → 5021,15
//&& ~iqentry_memready[head4]
// ... and there is no address-overlap with any preceding instruction
&& (!iqentry_mem[head0] || (iqentry_agen[head0] & iqentry_out[head0])
|| (iqentry_a1_v[head0] && iqentry_a1[head5] != iqentry_a1[head0]))
|| (iqentry_a1_v[head0] && iqentry_a1[head5][AMSB:3] != iqentry_a1[head0][AMSB:3]))
&& (!iqentry_mem[head1] || (iqentry_agen[head1] & iqentry_out[head1])
|| (iqentry_a1_v[head1] && iqentry_a1[head5] != iqentry_a1[head1]))
|| (iqentry_a1_v[head1] && iqentry_a1[head5][AMSB:3] != iqentry_a1[head1][AMSB:3]))
&& (!iqentry_mem[head2] || (iqentry_agen[head2] & iqentry_out[head2])
|| (iqentry_a1_v[head2] && iqentry_a1[head5] != iqentry_a1[head2]))
|| (iqentry_a1_v[head2] && iqentry_a1[head5][AMSB:3] != iqentry_a1[head2][AMSB:3]))
&& (!iqentry_mem[head3] || (iqentry_agen[head3] & iqentry_out[head3])
|| (iqentry_a1_v[head3] && iqentry_a1[head5] != iqentry_a1[head3]))
|| (iqentry_a1_v[head3] && iqentry_a1[head5][AMSB:3] != iqentry_a1[head3][AMSB:3]))
&& (!iqentry_mem[head4] || (iqentry_agen[head4] & iqentry_out[head4])
|| (iqentry_a1_v[head4] && iqentry_a1[head5] != iqentry_a1[head4]))
|| (iqentry_a1_v[head4] && iqentry_a1[head5][AMSB:3] != iqentry_a1[head4][AMSB:3]))
// ... if a release, any prior memory ops must be done before this one
&& (iqentry_rl[head5] ? (iqentry_done[head0] || !iqentry_v[head0] || !iqentry_mem[head0])
&& (iqentry_done[head1] || !iqentry_v[head1] || !iqentry_mem[head1])
4683,33 → 5044,33
&& !(iqentry_aq[head3] && iqentry_v[head3])
&& !(iqentry_aq[head4] && iqentry_v[head4])
// ... and there isn't a barrier, or everything before the barrier is done or invalid
&& (!(iqentry_v[head1] && iqentry_memsb[head1]) || (iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_v[head2] && iqentry_memsb[head2]) ||
&& (!(iqentry_iv[head1] && iqentry_memsb[head1]) || (iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_iv[head2] && iqentry_memsb[head2]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1]))
)
&& (!(iqentry_v[head3] && iqentry_memsb[head3]) ||
&& (!(iqentry_iv[head3] && iqentry_memsb[head3]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1])
&& (iqentry_done[head2] || !iqentry_v[head2]))
)
&& (!(iqentry_v[head4] && iqentry_memsb[head4]) ||
&& (!(iqentry_iv[head4] && iqentry_memsb[head4]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1])
&& (iqentry_done[head2] || !iqentry_v[head2])
&& (iqentry_done[head3] || !iqentry_v[head3]))
)
&& (!(iqentry_v[head1] && iqentry_memdb[head1]) || (!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_v[head2] && iqentry_memdb[head2]) ||
&& (!(iqentry_iv[head1] && iqentry_memdb[head1]) || (!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_iv[head2] && iqentry_memdb[head2]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1]))
)
&& (!(iqentry_v[head3] && iqentry_memdb[head3]) ||
&& (!(iqentry_iv[head3] && iqentry_memdb[head3]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1])
&& (!iqentry_mem[head2] || iqentry_done[head2] || !iqentry_v[head2]))
)
&& (!(iqentry_v[head4] && iqentry_memdb[head4]) ||
&& (!(iqentry_iv[head4] && iqentry_memdb[head4]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1])
&& (!iqentry_mem[head2] || iqentry_done[head2] || !iqentry_v[head2])
4728,7 → 5089,7
`ifdef FULL_ISSUE_LOGIC
memissue[ head6 ] = ~iqentry_stomp[head6] && iqentry_memready[ head6 ] // addr and data are valid
// ... and no preceding instruction is ready to go
&& issue_count < 3
&& issue_count < `NUM_MEM
//&& ~iqentry_memready[head0]
//&& ~iqentry_memready[head1]
//&& ~iqentry_memready[head2]
4737,17 → 5098,17
//&& ~iqentry_memready[head5]
// ... and there is no address-overlap with any preceding instruction
&& (!iqentry_mem[head0] || (iqentry_agen[head0] & iqentry_out[head0])
|| (iqentry_a1_v[head0] && iqentry_a1[head6] != iqentry_a1[head0]))
|| (iqentry_a1_v[head0] && iqentry_a1[head6][AMSB:3] != iqentry_a1[head0][AMSB:3]))
&& (!iqentry_mem[head1] || (iqentry_agen[head1] & iqentry_out[head1])
|| (iqentry_a1_v[head1] && iqentry_a1[head6] != iqentry_a1[head1]))
|| (iqentry_a1_v[head1] && iqentry_a1[head6][AMSB:3] != iqentry_a1[head1][AMSB:3]))
&& (!iqentry_mem[head2] || (iqentry_agen[head2] & iqentry_out[head2])
|| (iqentry_a1_v[head2] && iqentry_a1[head6] != iqentry_a1[head2]))
|| (iqentry_a1_v[head2] && iqentry_a1[head6][AMSB:3] != iqentry_a1[head2][AMSB:3]))
&& (!iqentry_mem[head3] || (iqentry_agen[head3] & iqentry_out[head3])
|| (iqentry_a1_v[head3] && iqentry_a1[head6] != iqentry_a1[head3]))
|| (iqentry_a1_v[head3] && iqentry_a1[head6][AMSB:3] != iqentry_a1[head3][AMSB:3]))
&& (!iqentry_mem[head4] || (iqentry_agen[head4] & iqentry_out[head4])
|| (iqentry_a1_v[head4] && iqentry_a1[head6] != iqentry_a1[head4]))
|| (iqentry_a1_v[head4] && iqentry_a1[head6][AMSB:3] != iqentry_a1[head4][AMSB:3]))
&& (!iqentry_mem[head5] || (iqentry_agen[head5] & iqentry_out[head5])
|| (iqentry_a1_v[head5] && iqentry_a1[head6] != iqentry_a1[head5]))
|| (iqentry_a1_v[head5] && iqentry_a1[head6][AMSB:3] != iqentry_a1[head5][AMSB:3]))
&& (iqentry_rl[head6] ? (iqentry_done[head0] || !iqentry_v[head0] || !iqentry_mem[head0])
&& (iqentry_done[head1] || !iqentry_v[head1] || !iqentry_mem[head1])
&& (iqentry_done[head2] || !iqentry_v[head2] || !iqentry_mem[head2])
4763,23 → 5124,23
&& !(iqentry_aq[head4] && iqentry_v[head4])
&& !(iqentry_aq[head5] && iqentry_v[head5])
// ... and there isn't a barrier, or everything before the barrier is done or invalid
&& (!(iqentry_v[head1] && iqentry_memsb[head1]) || (iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_v[head2] && iqentry_memsb[head2]) ||
&& (!(iqentry_iv[head1] && iqentry_memsb[head1]) || (iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_iv[head2] && iqentry_memsb[head2]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1]))
)
&& (!(iqentry_v[head3] && iqentry_memsb[head3]) ||
&& (!(iqentry_iv[head3] && iqentry_memsb[head3]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1])
&& (iqentry_done[head2] || !iqentry_v[head2]))
)
&& (!(iqentry_v[head4] && iqentry_memsb[head4]) ||
&& (!(iqentry_iv[head4] && iqentry_memsb[head4]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1])
&& (iqentry_done[head2] || !iqentry_v[head2])
&& (iqentry_done[head3] || !iqentry_v[head3]))
)
&& (!(iqentry_v[head5] && iqentry_memsb[head5]) ||
&& (!(iqentry_iv[head5] && iqentry_memsb[head5]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1])
&& (iqentry_done[head2] || !iqentry_v[head2])
4786,23 → 5147,23
&& (iqentry_done[head3] || !iqentry_v[head3])
&& (iqentry_done[head4] || !iqentry_v[head4]))
)
&& (!(iqentry_v[head1] && iqentry_memdb[head1]) || (!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_v[head2] && iqentry_memdb[head2]) ||
&& (!(iqentry_iv[head1] && iqentry_memdb[head1]) || (!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_iv[head2] && iqentry_memdb[head2]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1]))
)
&& (!(iqentry_v[head3] && iqentry_memdb[head3]) ||
&& (!(iqentry_iv[head3] && iqentry_memdb[head3]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1])
&& (!iqentry_mem[head2] || iqentry_done[head2] || !iqentry_v[head2]))
)
&& (!(iqentry_v[head4] && iqentry_memdb[head4]) ||
&& (!(iqentry_iv[head4] && iqentry_memdb[head4]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1])
&& (!iqentry_mem[head2] || iqentry_done[head2] || !iqentry_v[head2])
&& (!iqentry_mem[head3] || iqentry_done[head3] || !iqentry_v[head3]))
)
&& (!(iqentry_v[head5] && iqentry_memdb[head5]) ||
&& (!(iqentry_iv[head5] && iqentry_memdb[head5]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1])
&& (!iqentry_mem[head2] || iqentry_done[head2] || !iqentry_v[head2])
4822,7 → 5183,7
 
memissue[ head7 ] = ~iqentry_stomp[head7] && iqentry_memready[ head7 ] // addr and data are valid
// ... and no preceding instruction is ready to go
&& issue_count < 3
&& issue_count < `NUM_MEM
//&& ~iqentry_memready[head0]
//&& ~iqentry_memready[head1]
//&& ~iqentry_memready[head2]
4832,19 → 5193,19
//&& ~iqentry_memready[head6]
// ... and there is no address-overlap with any preceding instruction
&& (!iqentry_mem[head0] || (iqentry_agen[head0] & iqentry_out[head0])
|| (iqentry_a1_v[head0] && iqentry_a1[head7] != iqentry_a1[head0]))
|| (iqentry_a1_v[head0] && iqentry_a1[head7][AMSB:3] != iqentry_a1[head0][AMSB:3]))
&& (!iqentry_mem[head1] || (iqentry_agen[head1] & iqentry_out[head1])
|| (iqentry_a1_v[head1] && iqentry_a1[head7] != iqentry_a1[head1]))
|| (iqentry_a1_v[head1] && iqentry_a1[head7][AMSB:3] != iqentry_a1[head1][AMSB:3]))
&& (!iqentry_mem[head2] || (iqentry_agen[head2] & iqentry_out[head2])
|| (iqentry_a1_v[head2] && iqentry_a1[head7] != iqentry_a1[head2]))
|| (iqentry_a1_v[head2] && iqentry_a1[head7][AMSB:3] != iqentry_a1[head2][AMSB:3]))
&& (!iqentry_mem[head3] || (iqentry_agen[head3] & iqentry_out[head3])
|| (iqentry_a1_v[head3] && iqentry_a1[head7] != iqentry_a1[head3]))
|| (iqentry_a1_v[head3] && iqentry_a1[head7][AMSB:3] != iqentry_a1[head3][AMSB:3]))
&& (!iqentry_mem[head4] || (iqentry_agen[head4] & iqentry_out[head4])
|| (iqentry_a1_v[head4] && iqentry_a1[head7] != iqentry_a1[head4]))
|| (iqentry_a1_v[head4] && iqentry_a1[head7][AMSB:3] != iqentry_a1[head4][AMSB:3]))
&& (!iqentry_mem[head5] || (iqentry_agen[head5] & iqentry_out[head5])
|| (iqentry_a1_v[head5] && iqentry_a1[head7] != iqentry_a1[head5]))
|| (iqentry_a1_v[head5] && iqentry_a1[head7][AMSB:3] != iqentry_a1[head5][AMSB:3]))
&& (!iqentry_mem[head6] || (iqentry_agen[head6] & iqentry_out[head6])
|| (iqentry_a1_v[head6] && iqentry_a1[head7] != iqentry_a1[head6]))
|| (iqentry_a1_v[head6] && iqentry_a1[head7][AMSB:3] != iqentry_a1[head6][AMSB:3]))
&& (iqentry_rl[head7] ? (iqentry_done[head0] || !iqentry_v[head0] || !iqentry_mem[head0])
&& (iqentry_done[head1] || !iqentry_v[head1] || !iqentry_mem[head1])
&& (iqentry_done[head2] || !iqentry_v[head2] || !iqentry_mem[head2])
4862,23 → 5223,23
&& !(iqentry_aq[head5] && iqentry_v[head5])
&& !(iqentry_aq[head6] && iqentry_v[head6])
// ... and there isn't a barrier, or everything before the barrier is done or invalid
&& (!(iqentry_v[head1] && iqentry_memsb[head1]) || (iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_v[head2] && iqentry_memsb[head2]) ||
&& (!(iqentry_iv[head1] && iqentry_memsb[head1]) || (iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_iv[head2] && iqentry_memsb[head2]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1]))
)
&& (!(iqentry_v[head3] && iqentry_memsb[head3]) ||
&& (!(iqentry_iv[head3] && iqentry_memsb[head3]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1])
&& (iqentry_done[head2] || !iqentry_v[head2]))
)
&& (!(iqentry_v[head4] && iqentry_memsb[head4]) ||
&& (!(iqentry_iv[head4] && iqentry_memsb[head4]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1])
&& (iqentry_done[head2] || !iqentry_v[head2])
&& (iqentry_done[head3] || !iqentry_v[head3]))
)
&& (!(iqentry_v[head5] && iqentry_memsb[head5]) ||
&& (!(iqentry_iv[head5] && iqentry_memsb[head5]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1])
&& (iqentry_done[head2] || !iqentry_v[head2])
4885,7 → 5246,7
&& (iqentry_done[head3] || !iqentry_v[head3])
&& (iqentry_done[head4] || !iqentry_v[head4]))
)
&& (!(iqentry_v[head6] && iqentry_memsb[head6]) ||
&& (!(iqentry_iv[head6] && iqentry_memsb[head6]) ||
((iqentry_done[head0] || !iqentry_v[head0])
&& (iqentry_done[head1] || !iqentry_v[head1])
&& (iqentry_done[head2] || !iqentry_v[head2])
4893,23 → 5254,23
&& (iqentry_done[head4] || !iqentry_v[head4])
&& (iqentry_done[head5] || !iqentry_v[head5]))
)
&& (!(iqentry_v[head1] && iqentry_memdb[head1]) || (!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_v[head2] && iqentry_memdb[head2]) ||
&& (!(iqentry_iv[head1] && iqentry_memdb[head1]) || (!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0]))
&& (!(iqentry_iv[head2] && iqentry_memdb[head2]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1]))
)
&& (!(iqentry_v[head3] && iqentry_memdb[head3]) ||
&& (!(iqentry_iv[head3] && iqentry_memdb[head3]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1])
&& (!iqentry_mem[head2] || iqentry_done[head2] || !iqentry_v[head2]))
)
&& (!(iqentry_v[head4] && iqentry_memdb[head4]) ||
&& (!(iqentry_iv[head4] && iqentry_memdb[head4]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1])
&& (!iqentry_mem[head2] || iqentry_done[head2] || !iqentry_v[head2])
&& (!iqentry_mem[head3] || iqentry_done[head3] || !iqentry_v[head3]))
)
&& (!(iqentry_v[head5] && iqentry_memdb[head5]) ||
&& (!(iqentry_iv[head5] && iqentry_memdb[head5]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1])
&& (!iqentry_mem[head2] || iqentry_done[head2] || !iqentry_v[head2])
4916,7 → 5277,7
&& (!iqentry_mem[head3] || iqentry_done[head3] || !iqentry_v[head3])
&& (!iqentry_mem[head4] || iqentry_done[head4] || !iqentry_v[head4]))
)
&& (!(iqentry_v[head6] && iqentry_memdb[head6]) ||
&& (!(iqentry_iv[head6] && iqentry_memdb[head6]) ||
((!iqentry_mem[head0] || iqentry_done[head0] || !iqentry_v[head0])
&& (!iqentry_mem[head1] || iqentry_done[head1] || !iqentry_v[head1])
&& (!iqentry_mem[head2] || iqentry_done[head2] || !iqentry_v[head2])
4936,6 → 5297,37
`endif
end
 
reg [2:0] wbptr;
always @*
begin
// Crashes sim
// wbptr <= `WB_DEPTH-1;
// if (wb_v==8'h0)
// wbptr <= 3'd0;
// else
// begin
// for (n = `WB_DEPTH-2; n >= 0; n = n - 1)
// if (wb_v[n] && wbptr==`WB_DEPTH-1)
// wbptr <= n + 1;
// end
if (wb_v[6])
wbptr <= 3'd7;
else if (wb_v[5])
wbptr <= 3'd6;
else if (wb_v[4])
wbptr <= 3'd5;
else if (wb_v[3])
wbptr <= 3'd4;
else if (wb_v[2])
wbptr <= 3'd3;
else if (wb_v[1])
wbptr <= 3'd2;
else if (wb_v[0])
wbptr <= 3'd1;
else
wbptr <= 3'd0;
end
 
// Stomp logic for branch miss.
 
FT64_stomp #(QENTRIES) ustmp1
4969,11 → 5361,12
stompedOnRets = stompedOnRets + 4'd1;
end
 
reg id1_vi, id2_vi;
wire [4:0] id1_ido, id2_ido;
wire id1_vo, id2_vo;
wire id1_clk, id2_clk;
reg id1_vi, id2_vi, id3_vi;
wire [4:0] id1_ido, id2_ido, id3_ido;
wire id1_vo, id2_vo, id3_vo;
wire id1_clk, id2_clk, id3_clk;
 
// Always at least one decoder
BUFGCE uclkb2
(
.I(clk_i),
4997,6 → 5390,8
.idv_o(id1_vo)
);
 
generate begin : gIDUInst
if (`NUM_IDU > 1) begin
BUFGCE uclkb3
(
.I(clk_i),
5019,7 → 5414,34
.id_o(id2_ido),
.idv_o(id2_vo)
);
end
if (`NUM_IDU > 2) begin
BUFGCE uclkb4
(
.I(clk_i),
.CE(id3_available),
.O(id3_clk)
);
 
FT64_idecoder uid2
(
.clk(id3_clk),
.idv_i(id3_vi),
.id_i(id3_id),
.instr(id3_instr),
.ven(id3_ven),
.vl(id3_vl),
.thrd(id3_thrd),
.predict_taken(id3_pt),
.Rt(id3_Rt),
.bus(id3_bus),
.id_o(id3_ido),
.idv_o(id3_vo)
);
end
end
endgenerate
 
//
// EXECUTE
//
5053,6 → 5475,8
.mem(alu0_mem),
.shift48(alu0_shft48)
);
generate begin : gAluInst
if (`NUM_ALU > 1) begin
FT64_alu #(.BIG(1'b0),.SUP_VECTOR(SUP_VECTOR)) ualu1 (
.rst(rst),
.clk(clk),
5080,31 → 5504,76
.mem(alu1_mem),
.shift48(alu1_shft48)
);
end
end
endgenerate
 
generate begin : gFPUInst
if (`NUM_FPU > 0) begin
wire fpu1_clk;
BUFGCE ufpc1
(
.I(clk_i),
.CE(fpu1_available),
.O(fpu1_clk)
);
fpUnit ufp1
(
.rst(rst),
.clk(clk),
.clk(fpu1_clk),
.clk4x(clk4x),
.ce(1'b1),
.ir(fpu_instr),
.ld(fpu_ld),
.a(fpu_argA),
.b(fpu_argB),
.imm(fpu_argI),
.o(fpu_bus),
.ir(fpu1_instr),
.ld(fpu1_ld),
.a(fpu1_argA),
.b(fpu1_argB),
.imm(fpu1_argI),
.o(fpu1_bus),
.csr_i(),
.status(fpu_status),
.status(fpu1_status),
.exception(),
.done(fpu_done)
.done(fpu1_done)
);
assign fpu_exc = |fpu_status[15:0] ? `FLT_FLT : `FLT_NONE;
end
if (`NUM_FPU > 1) begin
wire fpu2_clk;
BUFGCE ufpc2
(
.I(clk_i),
.CE(fpu2_available),
.O(fpu2_clk)
);
fpUnit ufp1
(
.rst(rst),
.clk(fpu2_clk),
.clk4x(clk4x),
.ce(1'b1),
.ir(fpu2_instr),
.ld(fpu2_ld),
.a(fpu2_argA),
.b(fpu2_argB),
.imm(fpu2_argI),
.o(fpu2_bus),
.csr_i(),
.status(fpu2_status),
.exception(),
.done(fpu2_done)
);
end
end
endgenerate
 
assign fpu_exc = fpu1_available ? (|fpu1_status[15:0] ? `FLT_FLT : `FLT_NONE) : `FLT_UNIMP;
 
assign alu0_v = alu0_dataready,
alu1_v = alu1_dataready;
assign alu0_id = alu0_sourceid,
alu1_id = alu1_sourceid;
assign fpu_v = fpu_dataready;
assign fpu_id = fpu_sourceid;
assign fpu1_v = fpu1_dataready;
assign fpu1_id = fpu1_sourceid;
assign fpu2_v = fpu2_dataready;
assign fpu2_id = fpu2_sourceid;
 
`ifdef SUPPORT_SMT
wire [1:0] olm = ol[fcu_thrd];
5318,17 → 5787,23
commit0_tgt <= iqentry_tgt[head0];
commit0_we <= iqentry_we[head0];
commit0_bus <= iqentry_res[head0];
commit1_v <= ({iqentry_v[head0], iqentry_cmt[head0]} != 2'b10
&& {iqentry_v[head1], iqentry_cmt[head1]} == 2'b11
&& ~|panic);
commit1_id <= {iqentry_mem[head1], head1};
commit1_tgt <= iqentry_tgt[head1];
commit1_we <= iqentry_we[head1];
commit1_bus <= iqentry_res[head1];
if (`NUM_CMT > 1) begin
commit1_v <= ({iqentry_v[head0], iqentry_cmt[head0]} != 2'b10
&& {iqentry_v[head1], iqentry_cmt[head1]} == 2'b11
&& ~|panic);
commit1_id <= {iqentry_mem[head1], head1};
commit1_tgt <= iqentry_tgt[head1];
commit1_we <= iqentry_we[head1];
commit1_bus <= iqentry_res[head1];
end
else begin
commit1_tgt <= 12'h000;
commit1_we <= 8'h00;
end
end
assign int_commit = (commit0_v && IsIrq(iqentry_instr[head0])) ||
(commit0_v && commit1_v && IsIrq(iqentry_instr[head1]));
(commit0_v && commit1_v && IsIrq(iqentry_instr[head1]) && `NUM_CMT > 1);
 
// Detect if a given register will become valid during the current cycle.
// We want a signal that is active during the current clock cycle for the read
5356,7 → 5831,7
if (commit0_v && n=={commit0_tgt[7:0]})
regIsValid[n] = regIsValid[n] | (rf_source[ {commit0_tgt[7:0]} ] == commit0_id
|| (branchmiss && branchmiss_thrd == iqentry_thrd[commit0_id[`QBITS]] && iqentry_source[ commit0_id[`QBITS] ]));
if (commit1_v && n=={commit1_tgt[7:0]})
if (commit1_v && n=={commit1_tgt[7:0]} && `NUM_CMT > 1)
regIsValid[n] = regIsValid[n] | (rf_source[ {commit1_tgt[7:0]} ] == commit1_id
|| (branchmiss && branchmiss_thrd == iqentry_thrd[commit0_id[`QBITS]] && iqentry_source[ commit1_id[`QBITS] ]));
end
5558,13 → 6033,13
// Simulation doesn't like it if things are under separate always blocks.
// Synthesis doesn't like it if things are under the same always block.
 
always @(posedge clk)
begin
branchmiss <= excmiss|fcu_branchmiss;
misspc <= excmiss ? excmisspc : fcu_misspc;
missid <= excmiss ? (|iqentry_exc[head0] ? head0 : head1) : fcu_sourceid;
branchmiss_thrd <= excmiss ? excthrd : fcu_thrd;
end
//always @(posedge clk)
//begin
// branchmiss <= excmiss|fcu_branchmiss;
// misspc <= excmiss ? excmisspc : fcu_misspc;
// missid <= excmiss ? (|iqentry_exc[head0] ? head0 : head1) : fcu_sourceid;
// branchmiss_thrd <= excmiss ? excthrd : fcu_thrd;
//end
 
always @(posedge clk)
if (rst) begin
5593,8 → 6068,8
iqentry_alu0_issue[n] <= FALSE;
iqentry_alu1_issue[n] <= FALSE;
iqentry_fpu[n] <= FALSE;
iqentry_fpu_islot[n] <= 2'b00;
iqentry_fpu_issue[n] <= FALSE;
iqentry_fpu1_issue[n] <= FALSE;
iqentry_fpu2_issue[n] <= FALSE;
iqentry_fsync[n] <= FALSE;
iqentry_fc[n] <= FALSE;
iqentry_fcu_issue[n] <= FALSE;
5618,6 → 6093,8
iqentry_memndx[n] <= FALSE;
iqentry_memissue[n] <= FALSE;
iqentry_mem_islot[n] <= 3'd0;
iqentry_memdb[n] <= FALSE;
iqentry_memsb[n] <= FALSE;
iqentry_tgt[n] <= 6'd0;
iqentry_imm[n] <= 1'b0;
iqentry_a0[n] <= 64'd0;
5632,6 → 6109,7
iqentry_a3_s[n] <= 5'd0;
iqentry_canex[n] <= FALSE;
end
bwhich <= 2'b00;
dram0 <= `DRAMSLOT_AVAIL;
dram1 <= `DRAMSLOT_AVAIL;
dram2 <= `DRAMSLOT_AVAIL;
5654,11 → 6132,7
head6 <= 6;
head7 <= 7;
panic = `PANIC_NONE;
id1_available <= `ID1_AVAIL;
id2_available <= `ID2_AVAIL;
alu0_available <= `ALU0_AVAIL;
alu0_dataready <= 0;
alu1_available <= `ALU1_AVAIL;
alu1_dataready <= 0;
alu0_sourceid <= 5'd0;
alu1_sourceid <= 5'd0;
5688,7 → 6162,6
alu1_tgt <= 6'h00;
alu1_ven <= 6'd0;
`endif
fpu1_available <= `FPU1_AVAIL;
fcu_dataready <= 0;
fcu_instr <= `NOP_INSN;
fcu_retadr_v <= 0;
5720,7 → 6193,8
for (n = 0; n < PREGS; n = n + 1)
rf_v[n] <= `VAL;
tgtq <= FALSE;
fp_rm <= 3'd0; // round nearest even - default rounding mode
fp1_rm <= 3'd0; // round nearest even - default rounding mode
fp2_rm <= 3'd0;
waitctr <= 64'd0;
for (n = 0; n < 16; n = n + 1)
badaddr[n] <= 64'd0;
5740,8 → 6214,38
fcu_done <= `TRUE;
sema <= 64'h0;
tvec[0] <= RSTPC;
pmr <= 64'hFFFFFFFFFFFFFFFF;
pmr[0] <= `ID1_AVAIL;
pmr[1] <= `ID2_AVAIL;
pmr[2] <= `ID3_AVAIL;
pmr[8] <= `ALU0_AVAIL;
pmr[9] <= `ALU1_AVAIL;
pmr[16] <= `FPU1_AVAIL;
pmr[17] <= `FPU2_AVAIL;
pmr[24] <= `MEM1_AVAIL;
pmr[25] <= `MEM2_AVAIL;
pmr[26] <= `MEM3_AVAIL;
pmr[32] <= `FCU_AVAIL;
for (n = 0; n < `WB_DEPTH; n = n + 1) begin
wb_v[n] <= 1'b0;
wb_rmw[n] <= 1'b0;
wb_id[n] <= {QENTRIES{1'b0}};
end
wb_en <= `TRUE;
wbo_id <= {QENTRIES{1'b0}};
`ifdef SIM
wb_merges <= 32'd0;
`endif
end
else begin
if (|fb_panic)
panic <= fb_panic;
begin
branchmiss <= excmiss|fcu_branchmiss;
misspc <= excmiss ? excmisspc : fcu_misspc;
missid <= excmiss ? (|iqentry_exc[head0] ? head0 : head1) : fcu_sourceid;
branchmiss_thrd <= excmiss ? excthrd : fcu_thrd;
end
// The following signals only pulse
 
// Instruction decode output should only pulse once for a queue entry. We
5748,7 → 6252,12
// want the decode to be invalidated after a clock cycle so that it isn't
// inadvertently used to update the queue at a later point.
id1_vi <= `INV;
id2_vi <= `INV;
if (`NUM_IDU > 1)
id2_vi <= `INV;
if (`NUM_IDU > 2)
id3_vi <= `INV;
if (iqentry_v[nid] && iqentry_sn[nid] > iqentry_sn[fcu_id[`QBITS]])
fcu_dataready <= `INV;
ld_time <= {ld_time[4:0],1'b0};
wc_times <= wc_time;
rf_vra0 <= regIsValid[Ra0s];
5775,7 → 6284,8
tick <= tick + 64'd1;
alu0_ld <= FALSE;
alu1_ld <= FALSE;
fpu_ld <= FALSE;
fpu1_ld <= FALSE;
fpu2_ld <= FALSE;
fcu_ld <= FALSE;
fcu_retadr_v <= FALSE;
dramA_v <= FALSE;
5825,7 → 6335,7
if (commit0_tgt[5:0]==6'd30 && commit0_bus==64'd0)
$display("FP <= 0");
end
if (commit1_v) begin
if (commit1_v && `NUM_CMT > 1) begin
if (!rf_v[ {commit1_tgt[7:0]} ]) begin
if ({commit1_tgt[7:0]}=={commit0_tgt[7:0]})
rf_v[ {commit1_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}];
5843,16 → 6353,16
end
rf_v[0] <= 1;
 
//
// ENQUEUE
//
// place up to two instructions from the fetch buffer into slots in the IQ.
// note: they are placed in-order, and they are expected to be executed
// 0, 1, or 2 of the fetch buffers may have valid data
// 0, 1, or 2 slots in the instruction queue may be available.
// if we notice that one of the instructions in the fetch buffer is a predicted branch,
// (set branchback/backpc and delete any instructions after it in fetchbuf)
//
//
// ENQUEUE
//
// place up to two instructions from the fetch buffer into slots in the IQ.
// note: they are placed in-order, and they are expected to be executed
// 0, 1, or 2 of the fetch buffers may have valid data
// 0, 1, or 2 slots in the instruction queue may be available.
// if we notice that one of the instructions in the fetch buffer is a predicted branch,
// (set branchback/backpc and delete any instructions after it in fetchbuf)
//
 
// enqueue fetchbuf0 and fetchbuf1, but only if there is room,
// and ignore fetchbuf1 if fetchbuf0 has a backwards branch in it.
6429,7 → 6939,7
alu0_dataready <= FALSE;
end
 
if (alu1_v) begin
if (alu1_v && `NUM_ALU > 1) begin
iqentry_tgt [ alu1_id[`QBITS] ] <= alu1_tgt;
iqentry_res [ alu1_id[`QBITS] ] <= alu1_bus;
iqentry_exc [ alu1_id[`QBITS] ] <= alu1_exc;
6440,18 → 6950,29
alu1_dataready <= FALSE;
end
 
if (fpu_v) begin
iqentry_res [ fpu_id[`QBITS] ] <= fpu_bus;
iqentry_a0 [ fpu_id[`QBITS] ] <= fpu_status;
iqentry_exc [ fpu_id[`QBITS] ] <= fpu_exc;
iqentry_done[ fpu_id[`QBITS] ] <= fpu_done;
iqentry_cmt[ fpu_id[`QBITS] ] <= fpu_done;
iqentry_out [ fpu_id[`QBITS] ] <= `INV;
if (fpu1_v) begin
iqentry_res [ fpu1_id[`QBITS] ] <= fpu1_bus;
iqentry_a0 [ fpu1_id[`QBITS] ] <= fpu1_status;
iqentry_exc [ fpu1_id[`QBITS] ] <= fpu1_exc;
iqentry_done[ fpu1_id[`QBITS] ] <= fpu1_done;
iqentry_cmt[ fpu1_id[`QBITS] ] <= fpu1_done;
iqentry_out [ fpu1_id[`QBITS] ] <= `INV;
//iqentry_agen[ fpu_id[`QBITS] ] <= `VAL; // RET
fpu_dataready <= FALSE;
fpu1_dataready <= FALSE;
end
 
if (fcu_wr) begin
if (fpu2_v && `NUM_FPU > 1) begin
iqentry_res [ fpu2_id[`QBITS] ] <= fpu2_bus;
iqentry_a0 [ fpu2_id[`QBITS] ] <= fpu2_status;
iqentry_exc [ fpu2_id[`QBITS] ] <= fpu2_exc;
iqentry_done[ fpu2_id[`QBITS] ] <= fpu2_done;
iqentry_cmt[ fpu2_id[`QBITS] ] <= fpu2_done;
iqentry_out [ fpu2_id[`QBITS] ] <= `INV;
//iqentry_agen[ fpu_id[`QBITS] ] <= `VAL; // RET
fpu2_dataready <= FALSE;
end
 
if (fcu_wr & ~fcu_done) begin
if (fcu_ld)
waitctr <= fcu_argA;
iqentry_res [ fcu_id[`QBITS] ] <= fcu_bus;
6495,7 → 7016,7
//iqentry_instr[fcu_id][`INSTRUCTION_OP] <= `NOP;
end
// if (dram_v && iqentry_v[ dram_id[`QBITS] ] && iqentry_mem[ dram_id[`QBITS] ] ) begin // if data for stomped instruction, ignore
if (dramA_v && iqentry_v[ dramA_id[`QBITS] ] && iqentry_load[ dramA_id[`QBITS] ]) begin // if data for stomped instruction, ignore
if (mem1_available && dramA_v && iqentry_v[ dramA_id[`QBITS] ] && iqentry_load[ dramA_id[`QBITS] ]) begin // if data for stomped instruction, ignore
iqentry_res [ dramA_id[`QBITS] ] <= dramA_bus;
iqentry_exc [ dramA_id[`QBITS] ] <= dramA_exc;
iqentry_done[ dramA_id[`QBITS] ] <= `VAL;
6505,7 → 7026,7
// if (iqentry_lptr[dram0_id[`QBITS]])
// wbrcd[pcr[5:0]] <= 1'b1;
end
if (dramB_v && iqentry_v[ dramB_id[`QBITS] ] && iqentry_load[ dramB_id[`QBITS] ]) begin // if data for stomped instruction, ignore
if (mem2_available && `NUM_MEM > 1 && dramB_v && iqentry_v[ dramB_id[`QBITS] ] && iqentry_load[ dramB_id[`QBITS] ]) begin // if data for stomped instruction, ignore
iqentry_res [ dramB_id[`QBITS] ] <= dramB_bus;
iqentry_exc [ dramB_id[`QBITS] ] <= dramB_exc;
iqentry_done[ dramB_id[`QBITS] ] <= `VAL;
6515,7 → 7036,7
// if (iqentry_lptr[dram1_id[`QBITS]])
// wbrcd[pcr[5:0]] <= 1'b1;
end
if (dramC_v && iqentry_v[ dramC_id[`QBITS] ] && iqentry_load[ dramC_id[`QBITS] ]) begin // if data for stomped instruction, ignore
if (mem3_available && `NUM_MEM > 2 && dramC_v && iqentry_v[ dramC_id[`QBITS] ] && iqentry_load[ dramC_id[`QBITS] ]) begin // if data for stomped instruction, ignore
iqentry_res [ dramC_id[`QBITS] ] <= dramC_bus;
iqentry_exc [ dramC_id[`QBITS] ] <= dramC_exc;
iqentry_done[ dramC_id[`QBITS] ] <= `VAL;
6529,17 → 7050,17
//
// set the IQ entry == DONE as soon as the SW is let loose to the memory system
//
if (dram0 == `DRAMSLOT_BUSY && IsStore(dram0_instr)) begin
if (mem1_available && dram0 == `DRAMSLOT_BUSY && IsStore(dram0_instr)) begin
if ((alu0_v && (dram0_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram0_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE;
iqentry_done[ dram0_id[`QBITS] ] <= `VAL;
iqentry_out[ dram0_id[`QBITS] ] <= `INV;
end
if (dram1 == `DRAMSLOT_BUSY && IsStore(dram1_instr)) begin
if (mem2_available && `NUM_MEM > 1 && dram1 == `DRAMSLOT_BUSY && IsStore(dram1_instr)) begin
if ((alu0_v && (dram1_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram1_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE;
iqentry_done[ dram1_id[`QBITS] ] <= `VAL;
iqentry_out[ dram1_id[`QBITS] ] <= `INV;
end
if (dram2 == `DRAMSLOT_BUSY && IsStore(dram2_instr)) begin
if (mem3_available && `NUM_MEM > 2 && dram2 == `DRAMSLOT_BUSY && IsStore(dram2_instr)) begin
if ((alu0_v && (dram2_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram2_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE;
iqentry_done[ dram2_id[`QBITS] ] <= `VAL;
iqentry_out[ dram2_id[`QBITS] ] <= `INV;
6558,18 → 7079,32
 
for (n = 0; n < QENTRIES; n = n + 1)
begin
setargs(n,{1'b0,fpu_id},fpu_v,fpu_bus);
if (`NUM_FPU > 0)
setargs(n,{1'b0,fpu1_id},fpu1_v,fpu1_bus);
if (`NUM_FPU > 1)
setargs(n,{1'b0,fpu2_id},fpu2_v,fpu2_bus);
 
setargs(n,{1'b0,alu0_id},alu0_v,alu0_bus);
setargs(n,{1'b0,alu1_id},alu1_v,alu1_bus);
if (`NUM_ALU > 1)
setargs(n,{1'b0,alu1_id},alu1_v,alu1_bus);
 
setargs(n,{1'b0,fcu_id},fcu_wr,fcu_bus);
 
setargs(n,{1'b0,dramA_id},dramA_v,dramA_bus);
setargs(n,{1'b0,dramB_id},dramB_v,dramB_bus);
setargs(n,{1'b0,dramC_id},dramC_v,dramC_bus);
if (`NUM_MEM > 1)
setargs(n,{1'b0,dramB_id},dramB_v,dramB_bus);
if (`NUM_MEM > 2)
setargs(n,{1'b0,dramC_id},dramC_v,dramC_bus);
 
setargs(n,commit0_id,commit0_v,commit0_bus);
setargs(n,commit1_id,commit1_v,commit1_bus);
if (`NUM_CMT > 1)
setargs(n,commit1_id,commit1_v,commit1_bus);
 
setinsn(n[`QBITS],id1_ido,id1_available&id1_vo,id1_bus);
setinsn(n[`QBITS],id2_ido,id2_available&id2_vo,id2_bus);
if (`NUM_IDU > 1)
setinsn(n[`QBITS],id2_ido,id2_available&id2_vo,id2_bus);
if (`NUM_IDU > 2)
setinsn(n[`QBITS],id3_ido,id3_available&id3_vo,id3_bus);
end
 
//
6587,8 → 7122,10
id1_id <= n[4:0];
id1_instr <= iqentry_rtop[n] ? (
iqentry_a3_v[n] ? iqentry_a3[n]
`ifdef FU_BYPASS
: (iqentry_a3_s[n] == alu0_id) ? alu0_bus
: (iqentry_a3_s[n] == alu1_id) ? alu1_bus
`endif
: `NOP_INSN)
: iqentry_instr[n];
id1_ven <= iqentry_ven[n];
6598,24 → 7135,50
id1_pt <= iqentry_pt[n];
end
end
if (`NUM_IDU > 1) begin
for (n = 0; n < QENTRIES; n = n + 1)
if (id2_available) begin
if (iqentry_id2issue[n] && !iqentry_iv[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
id2_vi <= `VAL;
id2_id <= n[4:0];
id2_instr <= iqentry_rtop[n] ? (
iqentry_a3_v[n] ? iqentry_a3[n]
: (iqentry_a3_s[n] == alu0_id) ? alu0_bus
: (iqentry_a3_s[n] == alu1_id) ? alu1_bus
: `NOP_INSN)
: iqentry_instr[n];
id2_ven <= iqentry_ven[n];
id2_vl <= iqentry_vl[n];
id2_thrd <= iqentry_thrd[n];
id2_Rt <= iqentry_tgt[n][4:0];
id2_pt <= iqentry_pt[n];
if (id2_available) begin
if (iqentry_id2issue[n] && !iqentry_iv[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
id2_vi <= `VAL;
id2_id <= n[4:0];
id2_instr <= iqentry_rtop[n] ? (
iqentry_a3_v[n] ? iqentry_a3[n]
`ifdef FU_BYPASS
: (iqentry_a3_s[n] == alu0_id) ? alu0_bus
: (iqentry_a3_s[n] == alu1_id) ? alu1_bus
`endif
: `NOP_INSN)
: iqentry_instr[n];
id2_ven <= iqentry_ven[n];
id2_vl <= iqentry_vl[n];
id2_thrd <= iqentry_thrd[n];
id2_Rt <= iqentry_tgt[n][4:0];
id2_pt <= iqentry_pt[n];
end
end
end
if (`NUM_IDU > 2) begin
for (n = 0; n < QENTRIES; n = n + 1)
if (id3_available) begin
if (iqentry_id3issue[n] && !iqentry_iv[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
id3_vi <= `VAL;
id3_id <= n[4:0];
id3_instr <= iqentry_rtop[n] ? (
iqentry_a3_v[n] ? iqentry_a3[n]
`ifdef FU_BYPASS
: (iqentry_a3_s[n] == alu0_id) ? alu0_bus
: (iqentry_a3_s[n] == alu1_id) ? alu1_bus
`endif
: `NOP_INSN)
: iqentry_instr[n];
id3_ven <= iqentry_ven[n];
id3_vl <= iqentry_vl[n];
id3_thrd <= iqentry_thrd[n];
id3_Rt <= iqentry_tgt[n][4:0];
id3_pt <= iqentry_pt[n];
end
end
end
 
for (n = 0; n < QENTRIES; n = n + 1)
if (iqentry_alu0_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
6622,24 → 7185,49
if (alu0_available & alu0_done) begin
alu0_sourceid <= n[3:0];
alu0_instr <= iqentry_rtop[n] ? (
`ifdef FU_BYPASS
iqentry_a3_v[n] ? iqentry_a3[n]
: (iqentry_a3_s[n] == alu0_id) ? alu0_bus : alu1_bus)
: (iqentry_a3_s[n] == alu0_id) ? alu0_bus
: (iqentry_a3_s[n] == fpu1_id && `NUM_FPU > 0) ? fpu1_bus
: alu1_bus)
`else
iqentry_a3[n])
`endif
: iqentry_instr[n];
alu0_bt <= iqentry_bt[n];
alu0_mem <= iqentry_mem[n];
alu0_shft48 <= iqentry_shft48[n];
alu0_pc <= iqentry_pc[n];
alu0_argA <= iqentry_a1_v[n] ? iqentry_a1[n]
: (iqentry_a1_s[n] == alu0_id) ? alu0_bus : alu1_bus;
alu0_argA <=
`ifdef FU_BYPASS
iqentry_a1_v[n] ? iqentry_a1[n]
: (iqentry_a1_s[n] == alu0_id) ? alu0_bus
: (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? fpu1_bus
: alu1_bus;
`else
iqentry_a1[n];
`endif
alu0_argB <= iqentry_imm[n]
? iqentry_a0[n]
`ifdef FU_BYPASS
: (iqentry_a2_v[n] ? iqentry_a2[n]
: (iqentry_a2_s[n] == alu0_id) ? alu0_bus : alu1_bus);
alu0_argC <= iqentry_a3_v[n] ? iqentry_a3[n]
: (iqentry_a2_s[n] == alu0_id) ? alu0_bus
: (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? fpu1_bus
: alu1_bus);
`else
: iqentry_a2[n];
`endif
alu0_argC <=
`ifdef FU_BYPASS
iqentry_a3_v[n] ? iqentry_a3[n]
: (iqentry_a3_s[n] == alu0_id) ? alu0_bus : alu1_bus;
`else
iqentry_a3[n];
`endif
alu0_argI <= iqentry_a0[n];
alu0_tgt <= IsVeins(iqentry_instr[n]) ?
{6'h0,1'b1,iqentry_tgt[n][4:0]} | ((iqentry_a2_v[n] ? iqentry_a2[n][5:0]
{6'h0,1'b1,iqentry_tgt[n][4:0]} | ((
iqentry_a2_v[n] ? iqentry_a2[n][5:0]
: (iqentry_a2_s[n] == alu0_id) ? alu0_bus[5:0]
: (iqentry_a2_s[n] == alu1_id) ? alu1_bus[5:0]
: {4{16'h0000}})) << 6 :
6656,7 → 7244,7
end
end
end
 
if (`NUM_ALU > 1) begin
for (n = 0; n < QENTRIES; n = n + 1)
if (iqentry_alu1_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
if (alu1_available && alu1_done) begin
6668,14 → 7256,32
alu1_mem <= iqentry_mem[n];
alu1_shft48 <= iqentry_shft48[n];
alu1_pc <= iqentry_pc[n];
alu1_argA <= iqentry_a1_v[n] ? iqentry_a1[n]
: (iqentry_a1_s[n] == alu0_id) ? alu0_bus : alu1_bus;
alu1_argA <=
`ifdef FU_BYPASS
iqentry_a1_v[n] ? iqentry_a1[n]
: (iqentry_a1_s[n] == alu0_id) ? alu0_bus
: (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? fpu1_bus
: alu1_bus;
`else
iqentry_a1[n];
`endif
alu1_argB <= iqentry_imm[n]
? iqentry_a0[n]
`ifdef FU_BYPASS
: (iqentry_a2_v[n] ? iqentry_a2[n]
: (iqentry_a2_s[n] == alu0_id) ? alu0_bus : alu1_bus);
alu1_argC <= iqentry_a3_v[n] ? iqentry_a3[n]
: (iqentry_a2_s[n] == alu0_id) ? alu0_bus
: (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? fpu1_bus
: alu1_bus);
`else
: iqentry_a2[n];
`endif
alu1_argC <=
`ifdef FU_BYPASS
iqentry_a3_v[n] ? iqentry_a3[n]
: (iqentry_a3_s[n] == alu0_id) ? alu0_bus : alu1_bus;
`else
iqentry_a3[n];
`endif
alu1_argI <= iqentry_a0[n];
alu1_tgt <= IsVeins(iqentry_instr[n]) ?
{6'h0,1'b1,iqentry_tgt[n][4:0]} | ((iqentry_a2_v[n] ? iqentry_a2[n][5:0]
6694,27 → 7300,85
end
end
end
end
 
for (n = 0; n < QENTRIES; n = n + 1)
if (iqentry_fpu_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
if (fpu1_available & fpu_done) begin
fpu_sourceid <= n[3:0];
fpu_instr <= iqentry_instr[n];
fpu_pc <= iqentry_pc[n];
fpu_argA <= iqentry_a1_v[n] ? iqentry_a1[n]
: (iqentry_a1_s[n] == alu0_id) ? alu0_bus : alu1_bus;
fpu_argB <= (iqentry_a2_v[n] ? iqentry_a2[n]
: (iqentry_a2_s[n] == alu0_id) ? alu0_bus : alu1_bus);
fpu_argC <= iqentry_a3_v[n] ? iqentry_a3[n]
if (iqentry_fpu1_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
if (fpu1_available & fpu1_done) begin
fpu1_sourceid <= n[3:0];
fpu1_instr <= iqentry_instr[n];
fpu1_pc <= iqentry_pc[n];
fpu1_argA <=
`ifdef FU_BYPASS
iqentry_a1_v[n] ? iqentry_a1[n]
: (iqentry_a1_s[n] == alu0_id) ? alu0_bus
: (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? fpu1_bus
: alu1_bus;
`else
iqentry_a1[n];
`endif
fpu1_argB <=
`ifdef FU_BYPASS
(iqentry_a2_v[n] ? iqentry_a2[n]
: (iqentry_a2_s[n] == alu0_id) ? alu0_bus
: (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? fpu1_bus
: alu1_bus);
`else
iqentry_a2[n];
`endif
fpu1_argC <=
`ifdef FU_BYPASS
iqentry_a3_v[n] ? iqentry_a3[n]
: (iqentry_a3_s[n] == alu0_id) ? alu0_bus : alu1_bus;
fpu_argI <= iqentry_a0[n];
fpu_dataready <= `VAL;
fpu_ld <= TRUE;
`else
iqentry_a3[n];
`endif
fpu1_argI <= iqentry_a0[n];
fpu1_dataready <= `VAL;
fpu1_ld <= TRUE;
iqentry_out[n] <= `VAL;
end
end
 
for (n = 0; n < QENTRIES; n = n + 1)
if (`NUM_FPU > 1 && iqentry_fpu2_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
if (fpu2_available & fpu2_done) begin
fpu2_sourceid <= n[3:0];
fpu2_instr <= iqentry_instr[n];
fpu2_pc <= iqentry_pc[n];
fpu2_argA <=
`ifdef FU_BYPASS
iqentry_a1_v[n] ? iqentry_a1[n]
: (iqentry_a1_s[n] == alu0_id) ? alu0_bus
: (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? fpu1_bus
: alu1_bus;
`else
iqentry_a1[n];
`endif
fpu2_argB <=
`ifdef FU_BYPASS
(iqentry_a2_v[n] ? iqentry_a2[n]
: (iqentry_a2_s[n] == alu0_id) ? alu0_bus
: (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? fpu1_bus
: alu1_bus);
`else
iqentry_a2[n];
`endif
fpu2_argC <=
`ifdef FU_BYPASS
iqentry_a3_v[n] ? iqentry_a3[n]
: (iqentry_a3_s[n] == alu0_id) ? alu0_bus : alu1_bus;
`else
iqentry_a3[n];
`endif
fpu2_argI <= iqentry_a0[n];
fpu2_dataready <= `VAL;
fpu2_ld <= TRUE;
iqentry_out[n] <= `VAL;
end
end
 
for (n = 0; n < QENTRIES; n = n + 1)
if (iqentry_fcu_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
if (fcu_done) begin
fcu_sourceid <= n[3:0];
6727,7 → 7391,9
fcu_bt <= iqentry_bt[n];
fcu_pc <= iqentry_pc[n];
fcu_argA <= iqentry_a1_v[n] ? iqentry_a1[n]
: (iqentry_a1_s[n] == alu0_id) ? alu0_bus : alu1_bus;
: (iqentry_a1_s[n] == alu0_id) ? alu0_bus
: (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? fpu1_bus
: alu1_bus;
`ifdef SUPPORT_SMT
fcu_argB <= IsRTI(iqentry_instr[n]) ? epc0[iqentry_thrd[n]]
`else
6734,7 → 7400,9
fcu_argB <= IsRTI(iqentry_instr[n]) ? epc0
`endif
: (iqentry_a2_v[n] ? iqentry_a2[n]
: (iqentry_a2_s[n] == alu0_id) ? alu0_bus : alu1_bus);
: (iqentry_a2_s[n] == alu0_id) ? alu0_bus
: (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? fpu1_bus
: alu1_bus);
waitctr <= iqentry_imm[n]
? iqentry_a0[n]
: (iqentry_a2_v[n] ? iqentry_a2[n]
6775,7 → 7443,7
 
//
// grab requests that have finished and put them on the dram_bus
if (dram0 == `DRAMREQ_READY) begin
if (mem1_available && dram0 == `DRAMREQ_READY) begin
dram0 <= `DRAMSLOT_AVAIL;
dramA_v <= dram0_load;
dramA_id <= dram0_id;
6785,7 → 7453,7
end
// else
// dramA_v <= `INV;
if (dram1 == `DRAMREQ_READY) begin
if (mem2_available && dram1 == `DRAMREQ_READY && `NUM_MEM > 1) begin
dram1 <= `DRAMSLOT_AVAIL;
dramB_v <= dram1_load;
dramB_id <= dram1_id;
6795,7 → 7463,7
end
// else
// dramB_v <= `INV;
if (dram2 == `DRAMREQ_READY) begin
if (mem3_available && dram2 == `DRAMREQ_READY && `NUM_MEM > 2) begin
dram2 <= `DRAMSLOT_AVAIL;
dramC_v <= dram2_load;
dramC_id <= dram2_id;
6831,8 → 7499,8
 
last_issue = 8;
for (n = 0; n < QENTRIES; n = n + 1)
if (~iqentry_stomp[n] && iqentry_memissue[n] && iqentry_agen[n] && ~iqentry_out[n]) begin
if (dram0 == `DRAMSLOT_AVAIL) begin
if (~iqentry_stomp[n] && iqentry_memissue[n] && iqentry_agen[n] && ~iqentry_out[n] && ~iqentry_done[n]) begin
if (mem1_available && dram0 == `DRAMSLOT_AVAIL) begin
dramA_v <= `INV;
dram0 <= `DRAMSLOT_BUSY;
dram0_id <= { 1'b1, n[`QBITS] };
6845,7 → 7513,6
// if (ol[iqentry_thrd[n]]==`OL_USER)
// dram0_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0};
// else
dram0_seg <= 64'd0;
dram0_unc <= iqentry_a1[n][31:20]==12'hFFD || !dce || IsVolatileLoad(iqentry_instr[n]);
dram0_memsize <= MemSize(iqentry_instr[n]);
dram0_load <= iqentry_load[n];
6860,8 → 7527,8
if (last_issue < 8)
iqentry_out[last_issue] <= `VAL;
for (n = 0; n < QENTRIES; n = n + 1)
if (~iqentry_stomp[n] && iqentry_memissue[n] && iqentry_agen[n] && ~iqentry_out[n]) begin
if (n < last_issue) begin
if (~iqentry_stomp[n] && iqentry_memissue[n] && iqentry_agen[n] && ~iqentry_out[n] && ~iqentry_done[n]) begin
if (mem2_available && n < last_issue && `NUM_MEM > 1) begin
if (dram1 == `DRAMSLOT_AVAIL) begin
dramB_v <= `INV;
dram1 <= `DRAMSLOT_BUSY;
6875,7 → 7542,6
// if (ol[iqentry_thrd[n]]==`OL_USER)
// dram1_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0};
// else
dram1_seg <= 64'd0;
dram1_unc <= iqentry_a1[n][31:20]==12'hFFD || !dce || IsVolatileLoad(iqentry_instr[n]);
dram1_memsize <= MemSize(iqentry_instr[n]);
dram1_load <= iqentry_load[n];
6888,8 → 7554,8
if (last_issue < 8)
iqentry_out[last_issue] <= `VAL;
for (n = 0; n < QENTRIES; n = n + 1)
if (~iqentry_stomp[n] && iqentry_memissue[n] && iqentry_agen[n] && ~iqentry_out[n]) begin
if (n < last_issue) begin
if (~iqentry_stomp[n] && iqentry_memissue[n] && iqentry_agen[n] && ~iqentry_out[n] && ~iqentry_done[n]) begin
if (mem3_available && n < last_issue && `NUM_MEM > 2) begin
if (dram2 == `DRAMSLOT_AVAIL) begin
dramC_v <= `INV;
dram2 <= `DRAMSLOT_BUSY;
6903,7 → 7569,6
// if (ol[iqentry_thrd[n]]==`OL_USER)
// dram2_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0};
// else
dram2_seg <= 64'd0;
dram2_unc <= iqentry_a1[n][31:20]==12'hFFD || !dce || IsVolatileLoad(iqentry_instr[n]);
dram2_memsize <= MemSize(iqentry_instr[n]);
dram2_load <= iqentry_load[n];
6927,88 → 7592,168
 
 
//
// COMMIT PHASE (dequeue only ... not register-file update)
//
// look at head0 and head1 and let 'em write to the register file if they are ready
//
//
// COMMIT PHASE (dequeue only ... not register-file update)
//
// look at head0 and head1 and let 'em write to the register file if they are ready
//
// always @(posedge clk) begin: commit_phase
 
oddball_commit(commit0_v, head0);
oddball_commit(commit1_v, head1);
oddball_commit(commit0_v, head0);
if (`NUM_CMT > 1)
oddball_commit(commit1_v, head1);
 
// Fetch and queue are limited to two instructions per cycle, so we might as
// well limit retiring to two instructions max to conserve logic.
//
if (~|panic)
casez ({ iqentry_v[head0],
iqentry_cmt[head0],
iqentry_v[head1],
iqentry_cmt[head1]})
casez ({ iqentry_v[head0],
iqentry_cmt[head0],
iqentry_v[head1],
iqentry_cmt[head1],
iqentry_v[head2],
iqentry_cmt[head2]})
 
// retire 3
4'b0?_0?:
if (head0 != tail0 && head1 != tail0) begin
6'b0?_0?_0?:
if (head0 != tail0 && head1 != tail0 && head2 != tail0) begin
head_inc(3);
end
else if (head0 != tail0 && head1 != tail0) begin
head_inc(2);
end
else if (head0 != tail0) begin
head_inc(1);
end
6'b0?_0?_10: ;
6'b0?_0?_11:
if (`NUM_CMT > 2 || iqentry_tgt[head2][4:0]==5'd0) begin
iqentry_v[head2] <= `INV;
head_inc(3);
end
else begin
head_inc(2);
end
 
// retire 1 (wait for regfile for head1)
4'b0?_10:
head_inc(1);
6'b0?_10_??:
head_inc(1);
 
// retire 2
4'b0?_11:
begin
iqentry_v[head1] <= `INV;
iqentry_iv[head1] <= `INV;
iqentry_alu[head1] <= `INV;
head_inc(2);
6'b0?_11_0?,
6'b0?_11_10:
if (`NUM_CMT > 1 || iqentry_tgt[head1]==12'd0) begin
iqentry_v[head1] <= `INV;
head_inc(2);
end
 
// retire 0 (stuck on head0)
4'b10_??: ;
// retire 1 or 2
4'b11_0?:
if (head1 != tail0) begin
else begin
head_inc(1);
end
6'b0?_11_11:
if (`NUM_CMT > 2 || (`NUM_CMT > 1 && iqentry_tgt[head2] == 12'd0)) begin
iqentry_v[head1] <= `INV;
iqentry_v[head2] <= `INV;
head_inc(3);
end
else if (`NUM_CMT > 1 || iqentry_tgt[head1]==12'd0) begin
iqentry_v[head1] <= `INV;
head_inc(2);
end
else
head_inc(1);
6'b10_??_??: ;
6'b11_0?_0?:
if (head1 != tail0 && head2 != tail0) begin
iqentry_v[head0] <= `INV;
iqentry_iv[head0] <= `INV;
iqentry_alu[head0] <= `INV;
head_inc(3);
end
else if (head1 != tail0) begin
iqentry_v[head0] <= `INV;
head_inc(2);
end
else begin
end
else begin
iqentry_v[head0] <= `INV;
iqentry_iv[head0] <= `INV;
iqentry_alu[head0] <= `INV;
head_inc(1);
end
6'b11_0?_10:
if (head1 != tail0) begin
iqentry_v[head0] <= `INV;
head_inc(2);
end
else begin
iqentry_v[head0] <= `INV;
head_inc(1);
end
6'b11_0?_11:
if (head1 != tail0) begin
if (`NUM_CMT > 2 || iqentry_tgt[head2]==12'd0) begin
iqentry_v[head0] <= `INV;
iqentry_v[head2] <= `INV;
head_inc(3);
end
else begin
iqentry_v[head0] <= `INV;
head_inc(2);
end
end
else begin
iqentry_v[head0] <= `INV;
head_inc(1);
end
6'b11_10_??:
begin
iqentry_v[head0] <= `INV;
head_inc(1);
end
6'b11_11_0?:
if (`NUM_CMT > 1 && head2 != tail0) begin
iqentry_v[head0] <= `INV;
iqentry_v[head1] <= `INV;
head_inc(3);
end
else if (iqentry_tgt[head1]== 12'd0 && head2 != tail0) begin
iqentry_v[head0] <= `INV;
iqentry_v[head1] <= `INV;
head_inc(3);
end
else if (`NUM_CMT > 1 || iqentry_tgt[head1]==12'd0) begin
iqentry_v[head0] <= `INV;
iqentry_v[head1] <= `INV;
head_inc(2);
end
else begin
iqentry_v[head0] <= `INV;
head_inc(1);
end
6'b11_11_10:
if (`NUM_CMT > 1 || iqentry_tgt[head1]==12'd0) begin
iqentry_v[head0] <= `INV;
iqentry_v[head1] <= `INV;
head_inc(2);
end
else begin
iqentry_v[head0] <= `INV;
head_inc(1);
end
6'b11_11_11:
if (`NUM_CMT > 2 || (`NUM_CMT > 1 && iqentry_tgt[head2]==12'd0)) begin
iqentry_v[head0] <= `INV;
iqentry_v[head1] <= `INV;
iqentry_v[head2] <= `INV;
head_inc(3);
end
 
// retire 1 (wait for regfile for head1)
4'b11_10:
begin
else if (`NUM_CMT > 1 || iqentry_tgt[head1]==12'd0) begin
iqentry_v[head0] <= `INV;
iqentry_iv[head0] <= `INV;
iqentry_alu[head0] <= `INV;
iqentry_v[head1] <= `INV;
head_inc(2);
end
else begin
iqentry_v[head0] <= `INV;
head_inc(1);
end
endcase
 
// retire 2
4'b11_11:
begin
iqentry_v[head0] <= `INV; // may conflict with STOMP, but since both are setting to 0, it is okay
iqentry_v[head1] <= `INV; // may conflict with STOMP, but since both are setting to 0, it is okay
iqentry_iv[head0] <= `INV;
iqentry_iv[head1] <= `INV;
iqentry_alu[head0] <= `INV;
iqentry_alu[head1] <= `INV;
head_inc(2);
end
endcase
 
 
rf_source[0] <= 0;
L1_wr0 <= FALSE;
L1_wr1 <= FALSE;
7032,11 → 7777,11
L1_adr <= {pcr[5:0],pc0[31:3],3'h0};
L2_adr <= {pcr[5:0],pc0[31:3],3'h0};
L1_invline <= TRUE;
icwhich <= 1'b0;
icwhich <= 2'b00;
iccnt <= 3'b00;
icstate <= IC2;
end
else if (!ihit1) begin
else if (!ihit1 && `WAYS > 1) begin
`ifdef SUPPORT_SMT
L1_adr <= {pcr[5:0],pc1[31:3],3'h0};
L2_adr <= {pcr[5:0],pc1[31:3],3'h0};
7045,10 → 7790,23
L2_adr <= {pcr[5:0],pc0plus6[31:3],3'h0};
`endif
L1_invline <= TRUE;
icwhich <= 1'b1;
icwhich <= 2'b01;
iccnt <= 3'b00;
icstate <= IC2;
end
else if (!ihit2 && `WAYS > 2) begin
`ifdef SUPPORT_SMT
L1_adr <= {pcr[5:0],pc2[31:3],3'h0};
L2_adr <= {pcr[5:0],pc2[31:3],3'h0};
`else
L1_adr <= {pcr[5:0],pc0plus12[31:3],3'h0};
L2_adr <= {pcr[5:0],pc0plus12[31:3],3'h0};
`endif
L1_invline <= TRUE;
icwhich <= 2'b10;
iccnt <= 3'b00;
icstate <= IC2;
end
end
IC2: icstate <= IC3;
IC3: icstate <= IC3a;
7060,10 → 7818,11
// The IC machine will stall in this state until the BIU has loaded the
// L2 cache.
IC4:
if (ihit2 && picstate==IC3a) begin
L1_en <= 10'h3FF;
L1_wr1 <= TRUE;
if (ihitL2 && picstate==IC3a) begin
L1_en <= 9'h1FF;
L1_wr0 <= TRUE;
L1_wr1 <= TRUE && `WAYS > 1;
L1_wr2 <= TRUE && `WAYS > 2;
L1_adr <= L2_adr;
L2_rdat <= L2_dato;
icstate <= IC5;
7071,9 → 7830,10
else if (bstate!=B9)
;
else begin
L1_en <= 10'h3FF;
L1_wr1 <= TRUE;
L1_en <= 9'h1FF;
L1_wr0 <= TRUE;
L1_wr1 <= TRUE && `WAYS > 1;
L1_wr2 <= TRUE && `WAYS > 2;
L1_adr <= L2_adr;
L2_rdat <= L2_dato;
icstate <= IC5;
7080,9 → 7840,10
end
IC5:
begin
L1_en <= 10'h000;
L1_en <= 9'h000;
L1_wr0 <= FALSE;
L1_wr1 <= FALSE;
L1_wr2 <= FALSE;
icstate <= IC6;
end
IC6: icstate <= IC7;
7094,7 → 7855,7
default: icstate <= IDLE;
endcase
 
if (dram0_load)
if (mem1_available && dram0_load)
case(dram0)
`DRAMSLOT_AVAIL: ;
`DRAMSLOT_BUSY: dram0 <= dram0 + !dram0_unc;
7106,7 → 7867,7
`DRAMREQ_READY: ;
endcase
 
if (dram1_load)
if (mem2_available && dram1_load && `NUM_MEM > 1)
case(dram1)
`DRAMSLOT_AVAIL: ;
`DRAMSLOT_BUSY: dram1 <= dram1 + !dram1_unc;
7118,7 → 7879,7
`DRAMREQ_READY: ;
endcase
 
if (dram2_load)
if (mem3_available && dram2_load && `NUM_MEM > 2)
case(dram2)
`DRAMSLOT_AVAIL: ;
`DRAMSLOT_BUSY: dram2 <= dram2 + !dram2_unc;
7139,18 → 7900,48
 
case(bstate)
BIDLE:
begin
isCAS <= FALSE;
isAMO <= FALSE;
isInc <= FALSE;
isSpt <= FALSE;
isRMW <= FALSE;
rdvq <= 1'b0;
errq <= 1'b0;
exvq <= 1'b0;
bwhich <= 2'b11;
preload <= FALSE;
if (dram0==`DRAMSLOT_BUSY && dram0_rmw) begin
begin
isCAS <= FALSE;
isAMO <= FALSE;
isInc <= FALSE;
isSpt <= FALSE;
isRMW <= FALSE;
rdvq <= 1'b0;
errq <= 1'b0;
exvq <= 1'b0;
bwhich <= 2'b00;
preload <= FALSE;
`ifdef HAS_WB
if (wb_v[0] & wb_en) begin
cyc_o <= `HIGH;
stb_o <= `HIGH;
we_o <= `HIGH;
sel_o <= wb_sel[0];
adr_o <= wb_addr[0];
dat_o <= wb_data[0];
ol_o <= wb_ol[0];
wbo_id <= wb_id[0];
bstate <= wb_rmw[0] ? B12 : B1;
end
begin
for (j = 1; j < `WB_DEPTH; j = j + 1) begin
wb_v[j-1] <= wb_v[j];
wb_id[j-1] <= wb_id[j];
wb_rmw[j-1] <= wb_rmw[j];
wb_sel[j-1] <= wb_sel[j];
wb_addr[j-1] <= wb_addr[j];
wb_data[j-1] <= wb_data[j];
wb_ol[j-1] <= wb_ol[j];
end
wb_v[`WB_DEPTH-1] <= `INV;
wb_rmw[`WB_DEPTH-1] <= `FALSE;
end
 
// if (|wb_v)
// ;
// else
`endif
if (~|wb_v && mem1_available && dram0==`DRAMSLOT_BUSY && dram0_rmw) begin
`ifdef SUPPORT_DBG
if (dbg_smatch0|dbg_lmatch0) begin
dramA_v <= `TRUE;
7162,7 → 7953,6
else
`endif
begin
dram0 <= `DRAMSLOT_HASBUS;
isRMW <= dram0_rmw;
isCAS <= IsCAS(dram0_instr);
isAMO <= IsAMO(dram0_instr);
7169,6 → 7959,7
isInc <= IsInc(dram0_instr);
casid <= dram0_id;
bwhich <= 2'b00;
dram0 <= `DRAMSLOT_HASBUS;
cyc_o <= `HIGH;
stb_o <= `HIGH;
sel_o <= fnSelect(dram0_instr,dram0_addr);
7178,7 → 7969,7
bstate <= B12;
end
end
else if (dram1==`DRAMSLOT_BUSY && dram1_rmw) begin
else if (~|wb_v && mem2_available && dram1==`DRAMSLOT_BUSY && dram1_rmw && `NUM_MEM > 1) begin
`ifdef SUPPORT_DBG
if (dbg_smatch1|dbg_lmatch1) begin
dramB_v <= `TRUE;
7190,7 → 7981,6
else
`endif
begin
dram1 <= `DRAMSLOT_HASBUS;
isRMW <= dram1_rmw;
isCAS <= IsCAS(dram1_instr);
isAMO <= IsAMO(dram1_instr);
7197,6 → 7987,7
isInc <= IsInc(dram1_instr);
casid <= dram1_id;
bwhich <= 2'b01;
dram1 <= `DRAMSLOT_HASBUS;
cyc_o <= `HIGH;
stb_o <= `HIGH;
sel_o <= fnSelect(dram1_instr,dram1_addr);
7206,7 → 7997,7
bstate <= B12;
end
end
else if (dram2==`DRAMSLOT_BUSY && dram2_rmw) begin
else if (~|wb_v && mem3_available && dram2==`DRAMSLOT_BUSY && dram2_rmw && `NUM_MEM > 2) begin
`ifdef SUPPORT_DBG
if (dbg_smatch2|dbg_lmatch2) begin
dramC_v <= `TRUE;
7218,7 → 8009,6
else
`endif
begin
dram2 <= `DRAMSLOT_HASBUS;
isRMW <= dram2_rmw;
isCAS <= IsCAS(dram2_instr);
isAMO <= IsAMO(dram2_instr);
7225,6 → 8015,7
isInc <= IsInc(dram2_instr);
casid <= dram2_id;
bwhich <= 2'b10;
dram2 <= `DRAMSLOT_HASBUS;
cyc_o <= `HIGH;
stb_o <= `HIGH;
sel_o <= fnSelect(dram2_instr,dram2_addr);
7234,7 → 8025,7
bstate <= B12;
end
end
else if (dram0==`DRAMSLOT_BUSY && IsStore(dram0_instr)) begin
else if (mem1_available && dram0==`DRAMSLOT_BUSY && IsStore(dram0_instr)) begin
`ifdef SUPPORT_DBG
if (dbg_smatch0) begin
dramA_v <= `TRUE;
7246,21 → 8037,37
else
`endif
begin
dram0 <= `DRAMSLOT_HASBUS;
dram0_instr[`INSTRUCTION_OP] <= `NOP;
bwhich <= 2'b00;
cyc_o <= `HIGH;
stb_o <= `HIGH;
we_o <= `HIGH;
bwhich <= 2'b00;
`ifndef HAS_WB
dram0 <= `DRAMSLOT_HASBUS;
dram0_instr[`INSTRUCTION_OP] <= `NOP;
cyc_o <= `HIGH;
stb_o <= `HIGH;
sel_o <= fnSelect(dram0_instr,dram0_addr);
adr_o <= dram0_addr;
dat_o <= fnDato(dram0_instr,dram0_data);
cr_o <= IsSWC(dram0_instr);
ol_o <= dram0_ol;
bstate <= B1;
`else
if (wbptr<`WB_DEPTH-1) begin
dram0 <= `DRAMREQ_READY;
dram0_instr[`INSTRUCTION_OP] <= `NOP;
wb_update(
dram0_id,
`FALSE,
fnSelect(dram0_instr,dram0_addr),
dram0_ol,
dram0_addr,
fnDato(dram0_instr,dram0_data)
);
iqentry_done[ dram0_id[`QBITS] ] <= `VAL;
iqentry_out[ dram0_id[`QBITS] ] <= `INV;
end
`endif
// cr_o <= IsSWC(dram0_instr);
end
end
else if (dram1==`DRAMSLOT_BUSY && IsStore(dram1_instr)) begin
else if (mem2_available && dram1==`DRAMSLOT_BUSY && IsStore(dram1_instr) && `NUM_MEM > 1) begin
`ifdef SUPPORT_DBG
if (dbg_smatch1) begin
dramB_v <= `TRUE;
7272,21 → 8079,37
else
`endif
begin
bwhich <= 2'b01;
`ifndef HAS_WB
dram1 <= `DRAMSLOT_HASBUS;
dram1_instr[`INSTRUCTION_OP] <= `NOP;
bwhich <= 2'b01;
cyc_o <= `HIGH;
stb_o <= `HIGH;
we_o <= `HIGH;
cyc_o <= `HIGH;
stb_o <= `HIGH;
sel_o <= fnSelect(dram1_instr,dram1_addr);
adr_o <= dram1_addr;
dat_o <= fnDato(dram1_instr,dram1_data);
cr_o <= IsSWC(dram1_instr);
ol_o <= dram1_ol;
bstate <= B1;
`else
if (wbptr<`WB_DEPTH-1) begin
dram1 <= `DRAMREQ_READY;
dram1_instr[`INSTRUCTION_OP] <= `NOP;
wb_update(
dram1_id,
`FALSE,
fnSelect(dram1_instr,dram1_addr),
dram1_ol,
dram1_addr,
fnDato(dram1_instr,dram1_data)
);
iqentry_done[ dram1_id[`QBITS] ] <= `VAL;
iqentry_out[ dram1_id[`QBITS] ] <= `INV;
end
`endif
// cr_o <= IsSWC(dram0_instr);
end
end
else if (dram2==`DRAMSLOT_BUSY && IsStore(dram2_instr)) begin
else if (mem3_available && dram2==`DRAMSLOT_BUSY && IsStore(dram2_instr) && `NUM_MEM > 2) begin
`ifdef SUPPORT_DBG
if (dbg_smatch2) begin
dramC_v <= `TRUE;
7298,22 → 8121,38
else
`endif
begin
bwhich <= 2'b10;
`ifndef HAS_WB
dram2 <= `DRAMSLOT_HASBUS;
dram2_instr[`INSTRUCTION_OP] <= `NOP;
bwhich <= 2'b10;
cyc_o <= `HIGH;
stb_o <= `HIGH;
we_o <= `HIGH;
cyc_o <= `HIGH;
stb_o <= `HIGH;
sel_o <= fnSelect(dram2_instr,dram2_addr);
adr_o <= dram2_addr;
dat_o <= fnDato(dram2_instr,dram2_data);
cr_o <= IsSWC(dram2_instr);
ol_o <= dram2_ol;
bstate <= B1;
`else
if (wbptr<`WB_DEPTH-1) begin
dram2 <= `DRAMREQ_READY;
dram2_instr[`INSTRUCTION_OP] <= `NOP;
wb_update(
dram2_id,
`FALSE,
fnSelect(dram2_instr,dram2_addr),
dram2_ol,
dram2_addr,
fnDato(dram2_instr,dram2_data)
);
iqentry_done[ dram2_id[`QBITS] ] <= `VAL;
iqentry_out[ dram2_id[`QBITS] ] <= `INV;
end
`endif
// cr_o <= IsSWC(dram0_instr);
end
end
// Check for read misses on the data cache
else if (!dram0_unc && dram0==`DRAMSLOT_REQBUS && dram0_load) begin
else if (mem1_available && !dram0_unc && dram0==`DRAMSLOT_REQBUS && dram0_load) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch0) begin
dramA_v <= `TRUE;
7331,7 → 8170,7
bstate <= B2;
end
end
else if (!dram1_unc && dram1==`DRAMSLOT_REQBUS && dram1_load) begin
else if (~|wb_v && mem2_available && !dram1_unc && dram1==`DRAMSLOT_REQBUS && dram1_load && `NUM_MEM > 1) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch1) begin
dramB_v <= `TRUE;
7349,7 → 8188,7
bstate <= B2;
end
end
else if (!dram2_unc && dram2==`DRAMSLOT_REQBUS && dram2_load) begin
else if (~|wb_v && mem3_available && !dram2_unc && dram2==`DRAMSLOT_REQBUS && dram2_load && `NUM_MEM > 2) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch2) begin
dramC_v <= `TRUE;
7367,7 → 8206,7
bstate <= B2;
end
end
else if (dram0_unc && dram0==`DRAMSLOT_BUSY && dram0_load) begin
else if (~|wb_v && mem1_available && dram0_unc && dram0==`DRAMSLOT_BUSY && dram0_load) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch0) begin
dramA_v <= `TRUE;
7389,7 → 8228,7
bstate <= B12;
end
end
else if (dram1_unc && dram1==`DRAMSLOT_BUSY && dram1_load) begin
else if (~|wb_v && mem2_available && dram1_unc && dram1==`DRAMSLOT_BUSY && dram1_load && `NUM_MEM > 1) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch1) begin
dramB_v <= `TRUE;
7411,7 → 8250,7
bstate <= B12;
end
end
else if (dram2_unc && dram2==`DRAMSLOT_BUSY && dram2_load) begin
else if (~|wb_v && mem3_available && dram2_unc && dram2==`DRAMSLOT_BUSY && dram2_load && `NUM_MEM > 2) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch2) begin
dramC_v <= `TRUE;
7434,13 → 8273,14
end
end
// Check for L2 cache miss
else if (!ihit2) begin
else if (~|wb_v && !ihitL2) begin
cti_o <= 3'b001;
bte_o <= 2'b01; // 4 beat burst wrap
bte_o <= 2'b00;//2'b01; // 4 beat burst wrap
cyc_o <= `HIGH;
stb_o <= `HIGH;
sel_o <= 8'hFF;
icl_o <= `HIGH;
iccnt <= 3'd0;
// adr_o <= icwhich ? {pc0[31:5],5'b0} : {pc1[31:5],5'b0};
// L2_adr <= icwhich ? {pc0[31:5],5'b0} : {pc1[31:5],5'b0};
adr_o <= {pcr[5:0],L1_adr[31:5],5'h0};
7451,6 → 8291,8
end
end
// Terminal state for a store operation.
// Note that if only a single memory channel is selected, bwhich will be a
// constant 0. This should cause the extra code to be removed.
B1:
if (acki|err_i) begin
isStore <= `TRUE;
7467,33 → 8309,48
// instruction should be surrounded by SYNC's.
if (cr_o)
sema[0] <= rbi_i;
`ifdef HAS_WB
for (n = 0; n < QENTRIES; n = n + 1) begin
if (wbo_id[n]) begin
iqentry_exc[n] <= wrv_i|err_i ? `FLT_DWF : `FLT_NONE;
if (err_i|wrv_i) begin
iqentry_a1[n] <= adr_o;
wb_v <= 8'h00; // Invalidate write buffer if there is a problem with the store
wb_en <= `FALSE; // and disable write buffer
end
iqentry_cmt[n] <= `VAL;
iqentry_aq[n] <= `INV;
end
end
`else
case(bwhich)
2'd0: begin
2'd0: if (mem1_available) begin
dram0 <= `DRAMREQ_READY;
iqentry_exc[dram0_id[`QBITS]] <= wrv_i|err_i ? `FLT_DWF : `FLT_NONE;
if (err_i|wrv_i) iqentry_a1[dram0_id[`QBITS]] <= adr_o;
iqentry_cmt[ dram0_id[`QBITS] ] <= `VAL;
iqentry_aq[ dram0_id[`QBITS] ] <= `INV;
iqentry_cmt[ dram0_id[`QBITS] ] <= `VAL;
iqentry_aq[ dram0_id[`QBITS] ] <= `INV;
//iqentry_out[ dram0_id[`QBITS] ] <= `INV;
end
2'd1: begin
2'd1: if (`NUM_MEM > 1) begin
dram1 <= `DRAMREQ_READY;
iqentry_exc[dram1_id[`QBITS]] <= wrv_i|err_i ? `FLT_DWF : `FLT_NONE;
if (err_i|wrv_i) iqentry_a1[dram1_id[`QBITS]] <= adr_o;
iqentry_cmt[ dram1_id[`QBITS] ] <= `VAL;
iqentry_aq[ dram1_id[`QBITS] ] <= `INV;
iqentry_cmt[ dram1_id[`QBITS] ] <= `VAL;
iqentry_aq[ dram1_id[`QBITS] ] <= `INV;
//iqentry_out[ dram1_id[`QBITS] ] <= `INV;
end
2'd2: begin
2'd2: if (`NUM_MEM > 2) begin
dram2 <= `DRAMREQ_READY;
iqentry_exc[dram2_id[`QBITS]] <= wrv_i|err_i ? `FLT_DWF : `FLT_NONE;
if (err_i|wrv_i) iqentry_a1[dram2_id[`QBITS]] <= adr_o;
iqentry_cmt[ dram2_id[`QBITS] ] <= `VAL;
iqentry_aq[ dram2_id[`QBITS] ] <= `INV;
iqentry_cmt[ dram2_id[`QBITS] ] <= `VAL;
iqentry_aq[ dram2_id[`QBITS] ] <= `INV;
//iqentry_out[ dram2_id[`QBITS] ] <= `INV;
end
default: ;
endcase
`endif
bstate <= B19;
end
B2:
7510,7 → 8367,7
ol_o <= dram0_ol;
bstate <= B2d;
end
2'd1: begin
2'd1: if (`NUM_MEM > 1) begin
cti_o <= 3'b001;
bte_o <= 2'b01;
cyc_o <= `HIGH;
7520,7 → 8377,7
ol_o <= dram1_ol;
bstate <= B2d;
end
2'd2: begin
2'd2: if (`NUM_MEM > 2) begin
cti_o <= 3'b001;
bte_o <= 2'b01;
cyc_o <= `HIGH;
7544,11 → 8401,11
iqentry_a1[dram0_id[`QBITS]] <= adr_o;
iqentry_exc[dram0_id[`QBITS]] <= err_i ? `FLT_DBE : `FLT_DRF;
end
2'd1: if (err_i|rdv_i) begin
2'd1: if ((err_i|rdv_i) && `NUM_MEM > 1) begin
iqentry_a1[dram1_id[`QBITS]] <= adr_o;
iqentry_exc[dram1_id[`QBITS]] <= err_i ? `FLT_DBE : `FLT_DRF;
end
2'd2: if (err_i|rdv_i) begin
2'd2: if ((err_i|rdv_i) && `NUM_MEM > 2) begin
iqentry_a1[dram2_id[`QBITS]] <= adr_o;
iqentry_exc[dram2_id[`QBITS]] <= err_i ? `FLT_DBE : `FLT_DRF;
end
7589,14 → 8446,14
if (ack_i|err_i) begin
errq <= errq | err_i;
exvq <= exvq | exv_i;
// L1_en <= 8'h3 << {L2_adr[4:3],1'b0};
// L1_en <= 9'h3 << {L2_xsel,L2_adr[4:3],1'b0};
// L1_wr0 <= `TRUE;
// L1_wr1 <= `TRUE;
// L1_adr <= L2_adr;
if (err_i)
L2_rdat <= {8{13'b0,3'd7,3'b0,`FLT_IBE,`BRK}};
L2_rdat <= {9{11'b0,4'd7,1'b0,`FLT_IBE,2'b00,`BRK}};
else
L2_rdat <= {4{dat_i}};
L2_rdat <= {dat_i[31:0],{4{dat_i}}};
iccnt <= iccnt + 3'd1;
//stb_o <= `LOW;
if (iccnt==3'd3)
7620,7 → 8477,8
begin
L1_wr0 <= `FALSE;
L1_wr1 <= `FALSE;
L1_en <= 8'hFF;
L1_wr2 <= `FALSE;
L1_en <= 9'h1FF;
L2_xsel <= 1'b0;
if (~ack_i) begin
bstate <= BIDLE;
7689,12 → 8547,12
iqentry_exc [ dram0_id[`QBITS] ] <= err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
if (err_i|rdv_i) iqentry_a1[dram0_id[`QBITS]] <= adr_o;
end
2'b01: begin
2'b01: if (`NUM_MEM > 1) begin
dram1 <= `DRAMREQ_READY;
iqentry_exc [ dram1_id[`QBITS] ] <= err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
if (err_i|rdv_i) iqentry_a1[dram1_id[`QBITS]] <= adr_o;
end
2'b10: begin
2'b10: if (`NUM_MEM > 2) begin
dram2 <= `DRAMREQ_READY;
iqentry_exc [ dram2_id[`QBITS] ] <= err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
if (err_i|rdv_i) iqentry_a1[dram2_id[`QBITS]] <= adr_o;
7844,8 → 8702,9
);
end
`endif
`ifdef FCU_ENH
$display("Call Stack:");
for (n = 0; n < 32; n = n + 4)
for (n = 0; n < 16; n = n + 4)
$display("%c%d: %h %c%d: %h %c%d: %h %c%d: %h",
ufb1.ursb1.rasp==n+0 ?">" : " ", n[4:0]+0, ufb1.ursb1.ras[n+0],
ufb1.ursb1.rasp==n+1 ?">" : " ", n[4:0]+1, ufb1.ursb1.ras[n+1],
7853,7 → 8712,7
ufb1.ursb1.rasp==n+3 ?">" : " ", n[4:0]+3, ufb1.ursb1.ras[n+3]
);
$display("\n");
`endif
// $display("Return address stack:");
// for (n = 0; n < 16; n = n + 1)
// $display("%d %h", rasp+n[3:0], ras[rasp+n[3:0]]);
7895,14 → 8754,18
$display("%d %h %h %c%h %o #",
dram0, dram0_addr, dram0_data, (IsFlowCtrl(dram0_instr) ? 98 : (IsMem(dram0_instr)) ? 109 : 97),
dram0_instr, dram0_id);
if (`NUM_MEM > 1)
$display("%d %h %h %c%h %o #",
dram1, dram1_addr, dram1_data, (IsFlowCtrl(dram1_instr) ? 98 : (IsMem(dram1_instr)) ? 109 : 97),
dram1_instr, dram1_id);
if (`NUM_MEM > 2)
$display("%d %h %h %c%h %o #",
dram2, dram2_addr, dram2_data, (IsFlowCtrl(dram2_instr) ? 98 : (IsMem(dram2_instr)) ? 109 : 97),
dram2_instr, dram2_id);
$display("%d %h %o %h #", dramA_v, dramA_bus, dramA_id, dramA_exc);
if (`NUM_MEM > 1)
$display("%d %h %o %h #", dramB_v, dramB_bus, dramB_id, dramB_exc);
if (`NUM_MEM > 2)
$display("%d %h %o %h #", dramC_v, dramC_bus, dramC_id, dramC_exc);
$display("ALU");
$display("%d %h %h %h %c%h %d %o %h #",
7910,12 → 8773,13
(IsFlowCtrl(alu0_instr) ? 98 : IsMem(alu0_instr) ? 109 : 97),
alu0_instr, alu0_bt, alu0_sourceid, alu0_pc);
$display("%d %h %o 0 #", alu0_v, alu0_bus, alu0_id);
 
$display("%d %h %h %h %c%h %d %o %h #",
alu1_dataready, alu1_argI, alu1_argA, alu1_argB,
(IsFlowCtrl(alu1_instr) ? 98 : IsMem(alu1_instr) ? 109 : 97),
alu1_instr, alu1_bt, alu1_sourceid, alu1_pc);
$display("%d %h %o 0 #", alu1_v, alu1_bus, alu1_id);
if (`NUM_ALU > 1) begin
$display("%d %h %h %h %c%h %d %o %h #",
alu1_dataready, alu1_argI, alu1_argA, alu1_argB,
(IsFlowCtrl(alu1_instr) ? 98 : IsMem(alu1_instr) ? 109 : 97),
alu1_instr, alu1_bt, alu1_sourceid, alu1_pc);
$display("%d %h %o 0 #", alu1_v, alu1_bus, alu1_id);
end
$display("FCU");
$display("%d %h %h %h %h #", fcu_v, fcu_bus, fcu_argI, fcu_argA, fcu_argB);
$display("%c %h %h #", fcu_branchmiss?"m":" ", fcu_sourceid, fcu_misspc);
7923,6 → 8787,7
$display("0: %c %h %o %d #", commit0_v?"v":" ", commit0_bus, commit0_id, commit0_tgt[4:0]);
$display("1: %c %h %o %d #", commit1_v?"v":" ", commit1_bus, commit1_id, commit1_tgt[4:0]);
$display("instructions committed: %d ticks: %d ", I, tick);
$display("Write merges: %d", wb_merges);
 
//
// $display("\n\n\n\n\n\n\n\n");
8129,6 → 8994,39
seq_num <= seq_num + 5'd1;
end
*/
 
task wb_update;
input [`QBITS] id;
input rmw;
input [7:0] sel;
input [1:0] ol;
input [`ABITS] addr;
input [63:0] data;
begin
if (wbptr > 0 && wb_addr[wbptr-1][AMSB:3]==addr[AMSB:3] && wb_ol[wbptr-1]==ol && wb_rmw[wbptr-1]==rmw) begin
wb_sel[wbptr-1] <= wb_sel[wbptr-1] | sel;
if (sel[0]) wb_data[wbptr-1][ 7: 0] <= data[ 7: 0];
if (sel[1]) wb_data[wbptr-1][15: 8] <= data[15: 8];
if (sel[2]) wb_data[wbptr-1][23:16] <= data[23:16];
if (sel[3]) wb_data[wbptr-1][31:24] <= data[31:24];
if (sel[4]) wb_data[wbptr-1][39:32] <= data[39:32];
if (sel[5]) wb_data[wbptr-1][47:40] <= data[47:40];
if (sel[6]) wb_data[wbptr-1][55:48] <= data[55:48];
if (sel[7]) wb_data[wbptr-1][63:56] <= data[63:56];
wb_id[wbptr-1] <= wb_id[wbptr-1] | (16'd1 << id);
wb_merges <= wb_merges + 32'd1;
end
else begin
wb_v[wbptr] <= wb_en;
wb_id[wbptr] <= (16'd1 << id);
wb_rmw[wbptr] <= rmw;
wb_ol[wbptr] <= ol;
wb_sel[wbptr] <= sel;
wb_addr[wbptr] <= {addr[AMSB:3],3'b0};
wb_data[wbptr] <= data;
end
end
endtask
// Increment the head pointers
// Also increments the instruction counter
// Used when instructions are committed.
8146,11 → 9044,34
head6 <= head6 + amt;
head7 <= head7 + amt;
I <= I + amt;
if (amt==3'd2) begin
if (amt==3'd3) begin
iqentry_agen[head0] <= `INV;
iqentry_agen[head1] <= `INV;
iqentry_agen[head2] <= `INV;
iqentry_mem[head0] <= `FALSE;
iqentry_mem[head1] <= `FALSE;
iqentry_mem[head2] <= `FALSE;
iqentry_iv[head0] <= `INV;
iqentry_iv[head1] <= `INV;
iqentry_iv[head2] <= `INV;
iqentry_alu[head0] <= `FALSE;
iqentry_alu[head1] <= `FALSE;
iqentry_alu[head2] <= `FALSE;
end
else if (amt==3'd2) begin
iqentry_agen[head0] <= `INV;
iqentry_agen[head1] <= `INV;
iqentry_mem[head0] <= `FALSE;
iqentry_mem[head1] <= `FALSE;
iqentry_iv[head0] <= `INV;
iqentry_iv[head1] <= `INV;
iqentry_alu[head0] <= `FALSE;
iqentry_alu[head1] <= `FALSE;
end else if (amt==3'd1) begin
iqentry_agen[head0] <= `INV;
iqentry_agen[head0] <= `INV;
iqentry_mem[head0] <= `FALSE;
iqentry_iv[head0] <= `INV;
iqentry_alu[head0] <= `FALSE;
end
end
endtask
8412,6 → 9333,7
mstatus[13:6] <= 8'h00;
mstatus[19:14] <= 6'd0;
`endif
wb_en <= `TRUE;
sema[0] <= 1'b0;
ve_hold <= {vqet1,10'd0,vqe1,10'd0,vqet0,10'd0,vqe0};
`ifdef SUPPORT_DBG
8570,8 → 9492,12
dbg_ctrl[63] <= dbg_ctrl[55];
`endif
end
default: ;
endcase
`MEMNDX:
case(iqentry_instr[head][`INSTRUCTION_S2])
`CACHEX:
case(iqentry_instr[head][20:16])
case(iqentry_instr[head][22:18])
5'h03: invic <= TRUE;
5'h10: cr0[30] <= FALSE;
5'h11: cr0[30] <= TRUE;
8601,7 → 9527,7
end
`endif
`CACHE:
case(iqentry_instr[head][15:11])
case(iqentry_instr[head][17:13])
5'h03: invic <= TRUE;
5'h10: cr0[30] <= FALSE;
5'h11: cr0[30] <= TRUE;
8609,37 → 9535,40
endcase
`FLOAT:
case(iqentry_instr[head][`INSTRUCTION_S2])
`FRM: fp_rm <= iqentry_res[head][2:0];
`FRM: begin
fp1_rm <= iqentry_res[head][2:0];
fp2_rm <= iqentry_res[head][2:0];
end
`FCX:
begin
fp_sx <= fp_sx & ~iqentry_res[head][5];
fp_inex <= fp_inex & ~iqentry_res[head][4];
fp_dbzx <= fp_dbzx & ~(iqentry_res[head][3]|iqentry_res[head][0]);
fp_underx <= fp_underx & ~iqentry_res[head][2];
fp_overx <= fp_overx & ~iqentry_res[head][1];
fp_giopx <= fp_giopx & ~iqentry_res[head][0];
fp_infdivx <= fp_infdivx & ~iqentry_res[head][0];
fp_zerozerox <= fp_zerozerox & ~iqentry_res[head][0];
fp_subinfx <= fp_subinfx & ~iqentry_res[head][0];
fp_infzerox <= fp_infzerox & ~iqentry_res[head][0];
fp_NaNCmpx <= fp_NaNCmpx & ~iqentry_res[head][0];
fp_swtx <= 1'b0;
fp1_sx <= fp1_sx & ~iqentry_res[head][5];
fp1_inex <= fp1_inex & ~iqentry_res[head][4];
fp1_dbzx <= fp1_dbzx & ~(iqentry_res[head][3]|iqentry_res[head][0]);
fp1_underx <= fp1_underx & ~iqentry_res[head][2];
fp1_overx <= fp1_overx & ~iqentry_res[head][1];
fp1_giopx <= fp1_giopx & ~iqentry_res[head][0];
fp1_infdivx <= fp1_infdivx & ~iqentry_res[head][0];
fp1_zerozerox <= fp1_zerozerox & ~iqentry_res[head][0];
fp1_subinfx <= fp1_subinfx & ~iqentry_res[head][0];
fp1_infzerox <= fp1_infzerox & ~iqentry_res[head][0];
fp1_NaNCmpx <= fp1_NaNCmpx & ~iqentry_res[head][0];
fp1_swtx <= 1'b0;
end
`FDX:
begin
fp_inexe <= fp_inexe & ~iqentry_res[head][4];
fp_dbzxe <= fp_dbzxe & ~iqentry_res[head][3];
fp_underxe <= fp_underxe & ~iqentry_res[head][2];
fp_overxe <= fp_overxe & ~iqentry_res[head][1];
fp_invopxe <= fp_invopxe & ~iqentry_res[head][0];
fp1_inexe <= fp1_inexe & ~iqentry_res[head][4];
fp1_dbzxe <= fp1_dbzxe & ~iqentry_res[head][3];
fp1_underxe <= fp1_underxe & ~iqentry_res[head][2];
fp1_overxe <= fp1_overxe & ~iqentry_res[head][1];
fp1_invopxe <= fp1_invopxe & ~iqentry_res[head][0];
end
`FEX:
begin
fp_inexe <= fp_inexe | iqentry_res[head][4];
fp_dbzxe <= fp_dbzxe | iqentry_res[head][3];
fp_underxe <= fp_underxe | iqentry_res[head][2];
fp_overxe <= fp_overxe | iqentry_res[head][1];
fp_invopxe <= fp_invopxe | iqentry_res[head][0];
fp1_inexe <= fp1_inexe | iqentry_res[head][4];
fp1_dbzxe <= fp1_dbzxe | iqentry_res[head][3];
fp1_underxe <= fp1_underxe | iqentry_res[head][2];
fp1_overxe <= fp1_overxe | iqentry_res[head][1];
fp1_invopxe <= fp1_invopxe | iqentry_res[head][0];
end
default:
begin
8647,30 → 9576,30
// 28 to 24 are exception enables
// 23 is nsfp
// 22 is a fractie
fp_fractie <= iqentry_a0[head][22];
fp_raz <= iqentry_a0[head][21];
fp1_fractie <= iqentry_a0[head][22];
fp1_raz <= iqentry_a0[head][21];
// 20 is a 0
fp_neg <= iqentry_a0[head][19];
fp_pos <= iqentry_a0[head][18];
fp_zero <= iqentry_a0[head][17];
fp_inf <= iqentry_a0[head][16];
fp1_neg <= iqentry_a0[head][19];
fp1_pos <= iqentry_a0[head][18];
fp1_zero <= iqentry_a0[head][17];
fp1_inf <= iqentry_a0[head][16];
// 15 swtx
// 14
fp_inex <= fp_inex | (fp_inexe & iqentry_a0[head][14]);
fp_dbzx <= fp_dbzx | (fp_dbzxe & iqentry_a0[head][13]);
fp_underx <= fp_underx | (fp_underxe & iqentry_a0[head][12]);
fp_overx <= fp_overx | (fp_overxe & iqentry_a0[head][11]);
fp1_inex <= fp1_inex | (fp1_inexe & iqentry_a0[head][14]);
fp1_dbzx <= fp1_dbzx | (fp1_dbzxe & iqentry_a0[head][13]);
fp1_underx <= fp1_underx | (fp1_underxe & iqentry_a0[head][12]);
fp1_overx <= fp1_overx | (fp1_overxe & iqentry_a0[head][11]);
//fp_giopx <= fp_giopx | (fp_giopxe & iqentry_res2[head][10]);
//fp_invopx <= fp_invopx | (fp_invopxe & iqentry_res2[head][24]);
//
fp_cvtx <= fp_cvtx | (fp_giopxe & iqentry_a0[head][7]);
fp_sqrtx <= fp_sqrtx | (fp_giopxe & iqentry_a0[head][6]);
fp_NaNCmpx <= fp_NaNCmpx | (fp_giopxe & iqentry_a0[head][5]);
fp_infzerox <= fp_infzerox | (fp_giopxe & iqentry_a0[head][4]);
fp_zerozerox <= fp_zerozerox | (fp_giopxe & iqentry_a0[head][3]);
fp_infdivx <= fp_infdivx | (fp_giopxe & iqentry_a0[head][2]);
fp_subinfx <= fp_subinfx | (fp_giopxe & iqentry_a0[head][1]);
fp_snanx <= fp_snanx | (fp_giopxe & iqentry_a0[head][0]);
fp1_cvtx <= fp1_cvtx | (fp1_giopxe & iqentry_a0[head][7]);
fp1_sqrtx <= fp1_sqrtx | (fp1_giopxe & iqentry_a0[head][6]);
fp1_NaNCmpx <= fp1_NaNCmpx | (fp1_giopxe & iqentry_a0[head][5]);
fp1_infzerox <= fp1_infzerox | (fp1_giopxe & iqentry_a0[head][4]);
fp1_zerozerox <= fp1_zerozerox | (fp1_giopxe & iqentry_a0[head][3]);
fp1_infdivx <= fp1_infdivx | (fp1_giopxe & iqentry_a0[head][2]);
fp1_subinfx <= fp1_subinfx | (fp1_giopxe & iqentry_a0[head][1]);
fp1_snanx <= fp1_snanx | (fp1_giopxe & iqentry_a0[head][0]);
 
end
endcase
8700,12 → 9629,13
`CSR_TICK: dat <= tick;
`CSR_PCR: dat <= pcr;
`CSR_PCR2: dat <= pcr2;
`CSR_PMR: dat <= pmr;
`CSR_WBRCD: dat <= wbrcd;
`CSR_SEMA: dat <= sema;
`CSR_SBL: dat <= sbl;
`CSR_SBU: dat <= sbu;
`CSR_TCB: dat <= tcb;
`CSR_FSTAT: dat <= {fp_rgs,fp_status};
`CSR_FSTAT: dat <= {fp1_rgs,fp1_status};
`ifdef SUPPORT_DBG
`CSR_DBAD0: dat <= dbg_adr0;
`CSR_DBAD1: dat <= dbg_adr1;
8789,12 → 9719,32
`CSR_CR0: cr0 <= dat;
`CSR_PCR: pcr <= dat[31:0];
`CSR_PCR2: pcr2 <= dat;
`CSR_PMR: case(`NUM_IDU)
0,1: pmr[0] <= 1'b1;
2:
begin
if (dat[1:0]==2'b00)
pmr[1:0] <= 2'b01;
else
pmr[1:0] <= dat[1:0];
pmr[63:2] <= dat[63:2];
end
3:
begin
if (dat[2:0]==3'b000)
pmr[2:0] <= 3'b001;
else
pmr[2:0] <= dat[2:0];
pmr[63:3] <= dat[63:3];
end
default: pmr[0] <= 1'b1;
endcase
`CSR_WBRCD: wbrcd <= dat;
`CSR_SEMA: sema <= dat;
`CSR_SBL: sbl <= dat[31:0];
`CSR_SBU: sbu <= dat[31:0];
`CSR_TCB: tcb <= dat;
`CSR_FSTAT: fpu_csr[37:32] <= dat[37:32];
`CSR_FSTAT: fpu1_csr[37:32] <= dat[37:32];
`CSR_BADADR: badaddr[{thread,csrno[13:11]}] <= dat;
`CSR_CAUSE: cause[{thread,csrno[13:11]}] <= dat[15:0];
`ifdef SUPPORT_DBG
8847,6 → 9797,7
`CSR_CR0: cr0 <= cr0 | dat;
`CSR_PCR: pcr[31:0] <= pcr[31:0] | dat[31:0];
`CSR_PCR2: pcr2 <= pcr2 | dat;
`CSR_PMR: pmr <= pmr | dat;
`CSR_WBRCD: wbrcd <= wbrcd | dat;
`ifdef SUPPORT_DBG
`CSR_DBCTRL: dbg_ctrl <= dbg_ctrl | dat;
8864,6 → 9815,13
`CSR_CR0: cr0 <= cr0 & ~dat;
`CSR_PCR: pcr <= pcr & ~dat;
`CSR_PCR2: pcr2 <= pcr2 & ~dat;
`CSR_PMR: begin
if (dat[1:0]==2'b11)
pmr[1:0] <= 2'b01;
else
pmr[1:0] <= pmr[1:0] & ~dat[1:0];
pmr[63:2] <= pmr[63:2] & ~dat[63:2];
end
`CSR_WBRCD: wbrcd <= wbrcd & ~dat;
`ifdef SUPPORT_DBG
`CSR_DBCTRL: dbg_ctrl <= dbg_ctrl & ~dat;
8882,21 → 9840,6
endtask
 
/*
function [63:0] assign_a0;
input [31:0] fb_instr;
begin
if (IsShifti(fb_instr)||IsVShifti(fb_instr)||IsSEI(fb_instr)||IsRTI(fb_instr))
assign_a0 = {58'd0,fb_instr[21:16]};
// else if (IsBranch(fb_instr))
// assign_a0 = {{51{fb_instr[`INSTRUCTION_SB]}},fb_instr[31:22],fb_instr[0],2'b00};
// else if (fb_instr[`INSTRUCTION_OP] == `CALL || fb_instr[`INSTRUCTION_OP] == `JMP)
// assign_a0 = {{36{fb_instr[31]}},fb_instr[31:6],2'd0};
else
assign_a0 = {{48{fb_instr[`INSTRUCTION_SB]}},fb_instr[31:16]};
end
endfunction
*/
/*
task aluissue;
input alu_idle;
input [QENTRIES-1:0] iq_alu0;
/FT64v5/rtl/twoway/FT64_fetchbuf.v
22,6 → 22,7
//
// ============================================================================
//
`include "FT64_config.vh"
`include "FT64_defines.vh"
 
// FETCH
31,7 → 32,7
// do nothing (kinda like alpha approach)
// Like to turn this into an independent module at some point.
//
module FT64_fetchbuf(rst, clk4x, clk,
module FT64_fetchbuf(rst, clk4x, clk, fcu_clk,
cs_i, cyc_i, stb_i, ack_o, we_i, adr_i, dat_i,
hirq, thread_en,
regLR,
53,9 → 54,10
btgtA, btgtB, btgtC, btgtD,
nop_fetchbuf,
take_branch0, take_branch1,
stompedRets
stompedRets,
panic
);
parameter AMSB = 31;
parameter AMSB = `AMSB;
parameter RSTPC = 32'hFFFC0100;
parameter TRUE = 1'b1;
parameter FALSE = 1'b0;
62,6 → 64,7
input rst;
input clk4x;
input clk;
input fcu_clk;
input cs_i;
input cyc_i;
input stb_i;
123,7 → 126,7
output take_branch0;
output take_branch1;
input [3:0] stompedRets;
 
output reg [3:0] panic;
integer n;
 
//`include "FT64_decode.vh"
279,10 → 282,11
*/
assign threadx = fetchbuf;
 
`ifdef FCU_ENH
FT64_RSB #(AMSB) ursb1
(
.rst(rst),
.clk(clk),
.clk(fcu_clk),
.regLR(regLR),
.queued1(queued1),
.queued2(queued2),
300,7 → 304,7
FT64_RSB #(AMSB) ursb2
(
.rst(rst),
.clk(clk),
.clk(fcu_clk),
.regLR(regLR),
.queued1(queued1),
.queued2(1'b0),
314,6 → 318,10
.stompedRet(stompedRet),
.pc(retpc1)
);
`else
assign retpc0 = RSTPC;
assign retpc1 = RSTPC;
`endif
 
wire peclk, neclk;
edge_det ued1 (.rst(rst), .clk(clk4x), .ce(1'b1), .i(clk), .pe(peclk), .ne(neclk), .ee());
329,6 → 337,7
fetchbufC_v <= 0;
fetchbufD_v <= 0;
fetchbuf <= 0;
panic <= `PANIC_NONE;
end
else begin
347,6 → 356,9
// for that thread is assigned the current fetchbuf pc.
// For the thread that misses the pc is simply assigned the misspc.
if (branchmiss) begin
$display("***********");
$display("Branch miss");
$display("***********");
if (branchmiss_thrd) begin
pc0 <= fetchbuf0_pc;
`ifdef SUPPORT_SMT
721,7 → 733,7
fetchbufB_v <= `INV;
fetchbuf <= ~fetchbuf;
end
default: ;
default: panic <= `PANIC_INVALIDIQSTATE;
endcase
else case ({fetchbufC_v, fetchbufD_v, (queued1|queuedNop), (queued2|queuedNop)})
4'b00_00 : ; // do nothing
758,7 → 770,7
fetchbufD_v <= `INV;
fetchbuf <= ~fetchbuf;
end
default: ;
default: panic <= `PANIC_INVALIDIQSTATE;
endcase
//
// get data iff the fetch buffers are empty
854,7 → 866,10
`ifdef SUPPORT_SMT
pc0 <= pc0 + fetchbuf0_insln;
`else
if (`WAYS > 1)
pc0 <= pc0 + fetchbuf0_insln + fetchbuf1_insln;
else
pc0 <= pc0 + fetchbuf0_insln;
`endif
end
endtask
862,13 → 877,11
task FetchB;
begin
fetchbufB_instr <= cinsn1;
fetchbufB_v <= `VAL;
fetchbufB_v <= `WAYS > 1;
fetchbufB_pc <= pc1;
`ifdef SUPPORT_SMT
fetchbufB_pc <= pc1;
if (phit)
pc1 <= pc1 + fetchbuf1_insln;
`else
fetchbufB_pc <= pc0 + fetchbuf0_insln;
`endif
end
endtask
890,7 → 903,10
`ifdef SUPPORT_SMT
pc0 <= pc0 + fetchbuf0_insln;
`else
if (`WAYS > 1)
pc0 <= pc0 + fetchbuf0_insln + fetchbuf1_insln;
else
pc0 <= pc0 + fetchbuf0_insln;
`endif
end
endtask
898,13 → 914,11
task FetchD;
begin
fetchbufD_instr <= cinsn1;
fetchbufD_v <= `VAL;
fetchbufD_v <= `WAYS > 1;
fetchbufD_pc <= pc1;
`ifdef SUPPORT_SMT
fetchbufD_pc <= pc1;
if (phit)
pc1 <= pc1 + fetchbuf1_insln;
`else
fetchbufD_pc <= pc0 + fetchbuf0_insln;
`endif
end
endtask
/FT64v5/rtl/twoway/FT64_regfile2w6r_oc.v
23,10 → 23,10
// Register file with two write ports and six read ports.
// ============================================================================
//
`define SIM
`include "FT64_config.vh"
 
module FT64_regfileRam_sim(clka, ena, wea, addra, dina, clkb, enb, addrb, doutb);
parameter WID=65;
parameter WID=64;
parameter RBIT = 11;
input clka;
input ena;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.